build: Dockerize apps/fetch_news.py as ingestor

This commit is contained in:
TrisNol 2023-11-11 13:19:23 +01:00
parent 170056bf58
commit 5dcf8ecf55
5 changed files with 23 additions and 4 deletions

View File

@ -24,9 +24,23 @@ FROM base as ingest
LABEL PART="DATA_INGESTOR" LABEL PART="DATA_INGESTOR"
### Install Chrome ###
# Update the package lists
RUN apt-get update
# Install wget and unzip
RUN apt-get install -y wget unzip
# Install Google Chrome
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
RUN dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install
RUN pip install --find-links=dist aki-prj23-transparenzregister[ingest] --no-cache-dir && \ RUN pip install --find-links=dist aki-prj23-transparenzregister[ingest] --no-cache-dir && \
rm dist/ -R rm dist/ -R
ENTRYPOINT ["fetch-news-schedule", "ENV"]
CMD ["--level", "DEBUG"]
FROM base as data-transformation FROM base as data-transformation
LABEL PART="DATA-TRANSFORMATION" LABEL PART="DATA-TRANSFORMATION"

View File

@ -56,12 +56,12 @@ the following layout:
``` ```
PYTHON_POSTGRES_USERNAME=postgres PYTHON_POSTGRES_USERNAME=postgres
PYTHON_POSTGRES_PASSWORD=postgres PYTHON_POSTGRES_PASSWORD=postgres
PYTHON_POSTGRES_HOST=localhost PYTHON_POSTGRES_HOST=postgres
PYTHON_POSTGRES_DATABASE=postgres PYTHON_POSTGRES_DATABASE=postgres
PYTHON_POSTGRES_PORT=5432 PYTHON_POSTGRES_PORT=5432
PYTHON_MONGO_USERNAME=username PYTHON_MONGO_USERNAME=username
PYTHON_MONGO_HOST=localhost PYTHON_MONGO_HOST=mongodb
PYTHON_MONGO_PASSWORD=password PYTHON_MONGO_PASSWORD=password
PYTHON_MONGO_PORT=27017 PYTHON_MONGO_PORT=27017
PYTHON_MONGO_DATABASE=transparenzregister PYTHON_MONGO_DATABASE=transparenzregister

View File

@ -13,7 +13,7 @@ services:
PYTHON_MONGO_PORT: ${PYTHON_MONGO_PORT:-27017} PYTHON_MONGO_PORT: ${PYTHON_MONGO_PORT:-27017}
PYTHON_MONGO_DATABASE: ${PYTHON_MONGO_DATABASE:-transparenzregister} PYTHON_MONGO_DATABASE: ${PYTHON_MONGO_DATABASE:-transparenzregister}
deploy: deploy:
replicas: 0 replicas: 1
restart: on-failure:3 restart: on-failure:3
mongodb: mongodb:

View File

@ -49,7 +49,7 @@ def fetch_news_cli() -> None: # pragma: no cover
while True: while True:
run_pending() run_pending()
time.sleep(30) time.sleep(1)
def schedule(config_provider: ConfigProvider) -> int: def schedule(config_provider: ConfigProvider) -> int:

View File

@ -64,9 +64,14 @@ class HandelsblattRSS(BaseNewsExtractor):
"safebrowsing.enabled": True, "safebrowsing.enabled": True,
} }
options.add_argument("--headless=new") options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_experimental_option("prefs", preferences) options.add_experimental_option("prefs", preferences)
options.add_experimental_option("excludeSwitches", ["enable-logging"]) options.add_experimental_option("excludeSwitches", ["enable-logging"])
# Arguments required for running Chrome in Docker
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options) driver = webdriver.Chrome(options=options)
driver.get(url) driver.get(url)
content = driver.page_source content = driver.page_source