Spaces:
Running
Running
Asish Karthikeya Gogineni commited on
Commit ·
733ecfe
1
Parent(s): c06be9c
fix: Add detailed logging and error handling for source ingestion
Browse files- Better error messages for each stage of ingestion
- Creates extraction directory proactively
- Logs handler type, paths, and document counts
- Helps debug issues on Hugging Face
code_chatbot/universal_ingestor.py
CHANGED
|
@@ -431,17 +431,43 @@ def process_source(source: str, extract_to: str) -> Tuple[list, str]:
|
|
| 431 |
Returns:
|
| 432 |
Tuple of (documents, local_path)
|
| 433 |
"""
|
| 434 |
-
|
|
|
|
| 435 |
|
| 436 |
-
|
| 437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
documents = []
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
|
| 446 |
return documents, ingestor.local_path
|
| 447 |
|
|
|
|
| 431 |
Returns:
|
| 432 |
Tuple of (documents, local_path)
|
| 433 |
"""
|
| 434 |
+
logger.info(f"Processing source: {source}")
|
| 435 |
+
logger.info(f"Extract destination: {extract_to}")
|
| 436 |
|
| 437 |
+
# Ensure the extraction directory exists
|
| 438 |
+
try:
|
| 439 |
+
os.makedirs(extract_to, exist_ok=True)
|
| 440 |
+
logger.info(f"Created/verified extract directory: {extract_to}")
|
| 441 |
+
except Exception as e:
|
| 442 |
+
logger.error(f"Failed to create extract directory {extract_to}: {e}")
|
| 443 |
+
raise ValueError(f"Cannot create extraction directory: {e}")
|
| 444 |
+
|
| 445 |
+
try:
|
| 446 |
+
ingestor = UniversalIngestor(source, local_dir=extract_to)
|
| 447 |
+
logger.info(f"Ingestor created with handler: {type(ingestor.delegate).__name__}")
|
| 448 |
+
except Exception as e:
|
| 449 |
+
logger.error(f"Failed to create ingestor: {e}")
|
| 450 |
+
raise ValueError(f"Cannot process source '{source}': {e}")
|
| 451 |
+
|
| 452 |
+
try:
|
| 453 |
+
if not ingestor.download():
|
| 454 |
+
raise ValueError(f"Failed to download/prepare source: {source}")
|
| 455 |
+
logger.info(f"Download complete. Local path: {ingestor.local_path}")
|
| 456 |
+
except Exception as e:
|
| 457 |
+
logger.error(f"Download failed: {e}")
|
| 458 |
+
raise ValueError(f"Failed to download/prepare source: {source} - {e}")
|
| 459 |
|
| 460 |
documents = []
|
| 461 |
+
try:
|
| 462 |
+
for content, metadata in ingestor.walk(get_content=True):
|
| 463 |
+
documents.append(Document(
|
| 464 |
+
page_content=content,
|
| 465 |
+
metadata=metadata
|
| 466 |
+
))
|
| 467 |
+
logger.info(f"Ingested {len(documents)} documents")
|
| 468 |
+
except Exception as e:
|
| 469 |
+
logger.error(f"Failed to walk documents: {e}")
|
| 470 |
+
raise ValueError(f"Failed to process files: {e}")
|
| 471 |
|
| 472 |
return documents, ingestor.local_path
|
| 473 |
|