Spaces:
Sleeping
Sleeping
Update rss_processor.py
Browse files- rss_processor.py +2 -3
rss_processor.py
CHANGED
|
@@ -195,12 +195,11 @@ def process_and_store_articles(articles):
|
|
| 195 |
doc = Document(page_content=clean_text(article["description"]), metadata=metadata)
|
| 196 |
docs_to_add.append(doc)
|
| 197 |
ids_to_add.append(doc_id)
|
| 198 |
-
existing_ids.add(doc_id)
|
| 199 |
|
| 200 |
if docs_to_add:
|
| 201 |
try:
|
| 202 |
vector_db.add_documents(documents=docs_to_add, ids=ids_to_add)
|
| 203 |
-
vector_db.persist()
|
| 204 |
logger.info(f"Added {len(docs_to_add)} new articles to DB. Total in DB: {vector_db._collection.count()}")
|
| 205 |
except Exception as e:
|
| 206 |
logger.error(f"Error storing articles: {e}")
|
|
@@ -233,7 +232,7 @@ def upload_to_hf_hub():
|
|
| 233 |
repo_id=REPO_ID,
|
| 234 |
repo_type="dataset",
|
| 235 |
token=HF_API_TOKEN,
|
| 236 |
-
commit_message="Update RSS news database"
|
| 237 |
)
|
| 238 |
logger.info(f"Database folder '{LOCAL_DB_DIR}' uploaded to: {REPO_ID}")
|
| 239 |
except Exception as e:
|
|
|
|
| 195 |
doc = Document(page_content=clean_text(article["description"]), metadata=metadata)
|
| 196 |
docs_to_add.append(doc)
|
| 197 |
ids_to_add.append(doc_id)
|
|
|
|
| 198 |
|
| 199 |
if docs_to_add:
|
| 200 |
try:
|
| 201 |
vector_db.add_documents(documents=docs_to_add, ids=ids_to_add)
|
| 202 |
+
vector_db._client.persist()
|
| 203 |
logger.info(f"Added {len(docs_to_add)} new articles to DB. Total in DB: {vector_db._collection.count()}")
|
| 204 |
except Exception as e:
|
| 205 |
logger.error(f"Error storing articles: {e}")
|
|
|
|
| 232 |
repo_id=REPO_ID,
|
| 233 |
repo_type="dataset",
|
| 234 |
token=HF_API_TOKEN,
|
| 235 |
+
commit_message=f"Update RSS news database {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
| 236 |
)
|
| 237 |
logger.info(f"Database folder '{LOCAL_DB_DIR}' uploaded to: {REPO_ID}")
|
| 238 |
except Exception as e:
|