mirror of
https://github.com/fhswf/aki_prj23_transparenzregister.git
synced 2025-04-22 22:12:53 +02:00
build: Dockerize apps/fetch_news.py as ingestor
This commit is contained in:
parent
170056bf58
commit
5dcf8ecf55
14
Dockerfile
14
Dockerfile
@ -24,9 +24,23 @@ FROM base as ingest
|
||||
|
||||
LABEL PART="DATA_INGESTOR"
|
||||
|
||||
### Install Chrome ###
|
||||
# Update the package lists
|
||||
RUN apt-get update
|
||||
|
||||
# Install wget and unzip
|
||||
RUN apt-get install -y wget unzip
|
||||
|
||||
# Install Google Chrome
|
||||
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||
RUN dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install
|
||||
|
||||
RUN pip install --find-links=dist aki-prj23-transparenzregister[ingest] --no-cache-dir && \
|
||||
rm dist/ -R
|
||||
|
||||
ENTRYPOINT ["fetch-news-schedule", "ENV"]
|
||||
CMD ["--level", "DEBUG"]
|
||||
|
||||
FROM base as data-transformation
|
||||
|
||||
LABEL PART="DATA-TRANSFORMATION"
|
||||
|
@ -56,12 +56,12 @@ the following layout:
|
||||
```
|
||||
PYTHON_POSTGRES_USERNAME=postgres
|
||||
PYTHON_POSTGRES_PASSWORD=postgres
|
||||
PYTHON_POSTGRES_HOST=localhost
|
||||
PYTHON_POSTGRES_HOST=postgres
|
||||
PYTHON_POSTGRES_DATABASE=postgres
|
||||
PYTHON_POSTGRES_PORT=5432
|
||||
|
||||
PYTHON_MONGO_USERNAME=username
|
||||
PYTHON_MONGO_HOST=localhost
|
||||
PYTHON_MONGO_HOST=mongodb
|
||||
PYTHON_MONGO_PASSWORD=password
|
||||
PYTHON_MONGO_PORT=27017
|
||||
PYTHON_MONGO_DATABASE=transparenzregister
|
||||
|
@ -13,7 +13,7 @@ services:
|
||||
PYTHON_MONGO_PORT: ${PYTHON_MONGO_PORT:-27017}
|
||||
PYTHON_MONGO_DATABASE: ${PYTHON_MONGO_DATABASE:-transparenzregister}
|
||||
deploy:
|
||||
replicas: 0
|
||||
replicas: 1
|
||||
restart: on-failure:3
|
||||
|
||||
mongodb:
|
||||
|
@ -49,7 +49,7 @@ def fetch_news_cli() -> None: # pragma: no cover
|
||||
|
||||
while True:
|
||||
run_pending()
|
||||
time.sleep(30)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def schedule(config_provider: ConfigProvider) -> int:
|
||||
|
@ -64,9 +64,14 @@ class HandelsblattRSS(BaseNewsExtractor):
|
||||
"safebrowsing.enabled": True,
|
||||
}
|
||||
options.add_argument("--headless=new")
|
||||
options.add_argument("--disable-gpu")
|
||||
options.add_experimental_option("prefs", preferences)
|
||||
options.add_experimental_option("excludeSwitches", ["enable-logging"])
|
||||
|
||||
# Arguments required for running Chrome in Docker
|
||||
options.add_argument("--no-sandbox")
|
||||
options.add_argument("--disable-dev-shm-usage")
|
||||
|
||||
driver = webdriver.Chrome(options=options)
|
||||
driver.get(url)
|
||||
content = driver.page_source
|
||||
|
Loading…
x
Reference in New Issue
Block a user