Synced repo using 'sync_with_huggingface' Github Action
Browse files- Dockerfile +2 -0
- scrape_chroma.py +2 -1
Dockerfile
CHANGED
|
@@ -18,6 +18,8 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 18 |
COPY . .
|
| 19 |
|
| 20 |
RUN chmod +x /app/entrypoint.sh
|
|
|
|
|
|
|
| 21 |
|
| 22 |
EXPOSE 8501
|
| 23 |
|
|
|
|
| 18 |
COPY . .
|
| 19 |
|
| 20 |
RUN chmod +x /app/entrypoint.sh
|
| 21 |
+
RUN mkdir -p /app/data && chmod -R 777 /app/data
|
| 22 |
+
|
| 23 |
|
| 24 |
EXPOSE 8501
|
| 25 |
|
scrape_chroma.py
CHANGED
|
@@ -43,7 +43,8 @@ def scrape_and_store():
|
|
| 43 |
print(f"Fetched {len(all_titles_sources)} unique titles.")
|
| 44 |
|
| 45 |
# Save to CSV
|
| 46 |
-
save_titles_to_csv(all_titles_sources, filename="pib_titles.csv")
|
|
|
|
| 47 |
|
| 48 |
# Prepare for ChromaDB
|
| 49 |
documents = [title for title, source in all_titles_sources]
|
|
|
|
| 43 |
print(f"Fetched {len(all_titles_sources)} unique titles.")
|
| 44 |
|
| 45 |
# Save to CSV
|
| 46 |
+
save_titles_to_csv(all_titles_sources, filename="data/pib_titles.csv")
|
| 47 |
+
|
| 48 |
|
| 49 |
# Prepare for ChromaDB
|
| 50 |
documents = [title for title, source in all_titles_sources]
|