mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 22:12:53 +02:00
build: Dockerize apps/fetch_news.py as ingestor
This commit is contained in:
parent
170056bf58
commit
5dcf8ecf55
14
Dockerfile
14
Dockerfile
@ -24,9 +24,23 @@ FROM base as ingest
|
|||||||
|
|
||||||
LABEL PART="DATA_INGESTOR"
|
LABEL PART="DATA_INGESTOR"
|
||||||
|
|
||||||
|
### Install Chrome ###
|
||||||
|
# Update the package lists
|
||||||
|
RUN apt-get update
|
||||||
|
|
||||||
|
# Install wget and unzip
|
||||||
|
RUN apt-get install -y wget unzip
|
||||||
|
|
||||||
|
# Install Google Chrome
|
||||||
|
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||||
|
RUN dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install
|
||||||
|
|
||||||
RUN pip install --find-links=dist aki-prj23-transparenzregister[ingest] --no-cache-dir && \
|
RUN pip install --find-links=dist aki-prj23-transparenzregister[ingest] --no-cache-dir && \
|
||||||
rm dist/ -R
|
rm dist/ -R
|
||||||
|
|
||||||
|
ENTRYPOINT ["fetch-news-schedule", "ENV"]
|
||||||
|
CMD ["--level", "DEBUG"]
|
||||||
|
|
||||||
FROM base as data-transformation
|
FROM base as data-transformation
|
||||||
|
|
||||||
LABEL PART="DATA-TRANSFORMATION"
|
LABEL PART="DATA-TRANSFORMATION"
|
||||||
|
@ -56,12 +56,12 @@ the following layout:
|
|||||||
```
|
```
|
||||||
PYTHON_POSTGRES_USERNAME=postgres
|
PYTHON_POSTGRES_USERNAME=postgres
|
||||||
PYTHON_POSTGRES_PASSWORD=postgres
|
PYTHON_POSTGRES_PASSWORD=postgres
|
||||||
PYTHON_POSTGRES_HOST=localhost
|
PYTHON_POSTGRES_HOST=postgres
|
||||||
PYTHON_POSTGRES_DATABASE=postgres
|
PYTHON_POSTGRES_DATABASE=postgres
|
||||||
PYTHON_POSTGRES_PORT=5432
|
PYTHON_POSTGRES_PORT=5432
|
||||||
|
|
||||||
PYTHON_MONGO_USERNAME=username
|
PYTHON_MONGO_USERNAME=username
|
||||||
PYTHON_MONGO_HOST=localhost
|
PYTHON_MONGO_HOST=mongodb
|
||||||
PYTHON_MONGO_PASSWORD=password
|
PYTHON_MONGO_PASSWORD=password
|
||||||
PYTHON_MONGO_PORT=27017
|
PYTHON_MONGO_PORT=27017
|
||||||
PYTHON_MONGO_DATABASE=transparenzregister
|
PYTHON_MONGO_DATABASE=transparenzregister
|
||||||
|
@ -13,7 +13,7 @@ services:
|
|||||||
PYTHON_MONGO_PORT: ${PYTHON_MONGO_PORT:-27017}
|
PYTHON_MONGO_PORT: ${PYTHON_MONGO_PORT:-27017}
|
||||||
PYTHON_MONGO_DATABASE: ${PYTHON_MONGO_DATABASE:-transparenzregister}
|
PYTHON_MONGO_DATABASE: ${PYTHON_MONGO_DATABASE:-transparenzregister}
|
||||||
deploy:
|
deploy:
|
||||||
replicas: 0
|
replicas: 1
|
||||||
restart: on-failure:3
|
restart: on-failure:3
|
||||||
|
|
||||||
mongodb:
|
mongodb:
|
||||||
|
@ -49,7 +49,7 @@ def fetch_news_cli() -> None: # pragma: no cover
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
run_pending()
|
run_pending()
|
||||||
time.sleep(30)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
def schedule(config_provider: ConfigProvider) -> int:
|
def schedule(config_provider: ConfigProvider) -> int:
|
||||||
|
@ -64,9 +64,14 @@ class HandelsblattRSS(BaseNewsExtractor):
|
|||||||
"safebrowsing.enabled": True,
|
"safebrowsing.enabled": True,
|
||||||
}
|
}
|
||||||
options.add_argument("--headless=new")
|
options.add_argument("--headless=new")
|
||||||
|
options.add_argument("--disable-gpu")
|
||||||
options.add_experimental_option("prefs", preferences)
|
options.add_experimental_option("prefs", preferences)
|
||||||
options.add_experimental_option("excludeSwitches", ["enable-logging"])
|
options.add_experimental_option("excludeSwitches", ["enable-logging"])
|
||||||
|
|
||||||
|
# Arguments required for running Chrome in Docker
|
||||||
|
options.add_argument("--no-sandbox")
|
||||||
|
options.add_argument("--disable-dev-shm-usage")
|
||||||
|
|
||||||
driver = webdriver.Chrome(options=options)
|
driver = webdriver.Chrome(options=options)
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
content = driver.page_source
|
content = driver.page_source
|
||||||
|
Loading…
x
Reference in New Issue
Block a user