Spaces:
Sleeping
Sleeping
File size: 768 Bytes
5d07afe 5d1cbd9 5d07afe 5d1cbd9 5d07afe 5d1cbd9 5d07afe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | from doc_anal import DocumentIntelligenceService
from clean import process_content
from chunking import split_content_into_batches
from embedding_gen import embed_docling_chunks_azure
from qdrant_setup import batch_upsert_docling_chunks_to_qdrant
from docling_chunker_fixed import split_to_docling_chunks
doc=DocumentIntelligenceService()
def process_doc(url:str):
import time
t1=time.time()
analysis=doc.analyze(source=url)
t2=time.time()
print("doc processing time",t2-t1)
content=analysis["analyzeResult"]["content"]
batches= split_to_docling_chunks(content)
emb_cont= embed_docling_chunks_azure(docling_chunks=batches)
batch_upsert_docling_chunks_to_qdrant(emb_cont['chunks_data'])
print("processing complete")
|