Image_generation / process_req.py
manasdhir's picture
minor changes
5d1cbd9
raw
history blame contribute delete
768 Bytes
from doc_anal import DocumentIntelligenceService
from clean import process_content
from chunking import split_content_into_batches
from embedding_gen import embed_docling_chunks_azure
from qdrant_setup import batch_upsert_docling_chunks_to_qdrant
from docling_chunker_fixed import split_to_docling_chunks
doc=DocumentIntelligenceService()
def process_doc(url:str):
import time
t1=time.time()
analysis=doc.analyze(source=url)
t2=time.time()
print("doc processing time",t2-t1)
content=analysis["analyzeResult"]["content"]
batches= split_to_docling_chunks(content)
emb_cont= embed_docling_chunks_azure(docling_chunks=batches)
batch_upsert_docling_chunks_to_qdrant(emb_cont['chunks_data'])
print("processing complete")