Asish Karthikeya Gogineni commited on
Commit
733ecfe
·
1 Parent(s): c06be9c

fix: Add detailed logging and error handling for source ingestion

Browse files

- Better error messages for each stage of ingestion
- Creates extraction directory proactively
- Logs handler type, paths, and document counts
- Helps debug issues on Hugging Face

Files changed (1) hide show
  1. code_chatbot/universal_ingestor.py +34 -8
code_chatbot/universal_ingestor.py CHANGED
@@ -431,17 +431,43 @@ def process_source(source: str, extract_to: str) -> Tuple[list, str]:
431
  Returns:
432
  Tuple of (documents, local_path)
433
  """
434
- ingestor = UniversalIngestor(source, local_dir=extract_to)
 
435
 
436
- if not ingestor.download():
437
- raise ValueError(f"Failed to download/prepare source: {source}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  documents = []
440
- for content, metadata in ingestor.walk(get_content=True):
441
- documents.append(Document(
442
- page_content=content,
443
- metadata=metadata
444
- ))
 
 
 
 
 
445
 
446
  return documents, ingestor.local_path
447
 
 
431
  Returns:
432
  Tuple of (documents, local_path)
433
  """
434
+ logger.info(f"Processing source: {source}")
435
+ logger.info(f"Extract destination: {extract_to}")
436
 
437
+ # Ensure the extraction directory exists
438
+ try:
439
+ os.makedirs(extract_to, exist_ok=True)
440
+ logger.info(f"Created/verified extract directory: {extract_to}")
441
+ except Exception as e:
442
+ logger.error(f"Failed to create extract directory {extract_to}: {e}")
443
+ raise ValueError(f"Cannot create extraction directory: {e}")
444
+
445
+ try:
446
+ ingestor = UniversalIngestor(source, local_dir=extract_to)
447
+ logger.info(f"Ingestor created with handler: {type(ingestor.delegate).__name__}")
448
+ except Exception as e:
449
+ logger.error(f"Failed to create ingestor: {e}")
450
+ raise ValueError(f"Cannot process source '{source}': {e}")
451
+
452
+ try:
453
+ if not ingestor.download():
454
+ raise ValueError(f"Failed to download/prepare source: {source}")
455
+ logger.info(f"Download complete. Local path: {ingestor.local_path}")
456
+ except Exception as e:
457
+ logger.error(f"Download failed: {e}")
458
+ raise ValueError(f"Failed to download/prepare source: {source} - {e}")
459
 
460
  documents = []
461
+ try:
462
+ for content, metadata in ingestor.walk(get_content=True):
463
+ documents.append(Document(
464
+ page_content=content,
465
+ metadata=metadata
466
+ ))
467
+ logger.info(f"Ingested {len(documents)} documents")
468
+ except Exception as e:
469
+ logger.error(f"Failed to walk documents: {e}")
470
+ raise ValueError(f"Failed to process files: {e}")
471
 
472
  return documents, ingestor.local_path
473