""" Example script showing how to use the document ingestion system programmatically. """ from ingestion import DocumentIngestion def main(): # Initialize the ingestion system ingestion = DocumentIngestion(embedding_model="all-MiniLM-L6-v2") # Example 1: Process PDFs pdf_paths = [ # Add your PDF file paths here # "path/to/document1.pdf", # "path/to/document2.pdf", ] # Example 2: Process URLs urls = [ # Add URLs here # "https://en.wikipedia.org/wiki/Artificial_intelligence", # "https://huggingface.co/docs/transformers", ] # Process documents if pdf_paths or urls: print("Processing documents...") documents = ingestion.process_documents(pdf_paths=pdf_paths, urls=urls) print(f"Processed {len(documents)} document chunks") # Build vector store ingestion.build_vector_store() # Save vector store ingestion.save("data/vector_store") # Example search query = "What is artificial intelligence?" results = ingestion.search(query, k=3) print(f"\nSearch results for: '{query}'") print("-" * 50) for i, result in enumerate(results, 1): print(f"\nResult {i}:") print(f"Source: {result['metadata']['source']}") print(f"Score: {result['score']:.4f}") print(f"Text: {result['text'][:200]}...") else: print("Please add PDF paths or URLs to the script to test ingestion.") if __name__ == "__main__": main()