Spaces:
Sleeping
Sleeping
| from doc_anal import DocumentIntelligenceService | |
| from clean import process_content | |
| from chunking import split_content_into_batches | |
| from embedding_gen import embed_docling_chunks_azure | |
| from qdrant_setup import batch_upsert_docling_chunks_to_qdrant | |
| from docling_chunker_fixed import split_to_docling_chunks | |
| doc=DocumentIntelligenceService() | |
| def process_doc(url:str): | |
| import time | |
| t1=time.time() | |
| analysis=doc.analyze(source=url) | |
| t2=time.time() | |
| print("doc processing time",t2-t1) | |
| content=analysis["analyzeResult"]["content"] | |
| batches= split_to_docling_chunks(content) | |
| emb_cont= embed_docling_chunks_azure(docling_chunks=batches) | |
| batch_upsert_docling_chunks_to_qdrant(emb_cont['chunks_data']) | |
| print("processing complete") | |