File size: 768 Bytes
5d07afe
 
 
5d1cbd9
 
 
5d07afe
 
 
 
 
 
 
5d1cbd9
5d07afe
5d1cbd9
 
 
5d07afe
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from doc_anal import DocumentIntelligenceService
from clean import process_content
from chunking import split_content_into_batches
from embedding_gen import embed_docling_chunks_azure
from qdrant_setup import batch_upsert_docling_chunks_to_qdrant
from docling_chunker_fixed import split_to_docling_chunks
doc=DocumentIntelligenceService()

def process_doc(url:str):
    import time
    t1=time.time()
    analysis=doc.analyze(source=url)
    t2=time.time()
    print("doc processing time",t2-t1)
    content=analysis["analyzeResult"]["content"]
    batches= split_to_docling_chunks(content)
    emb_cont= embed_docling_chunks_azure(docling_chunks=batches)
    batch_upsert_docling_chunks_to_qdrant(emb_cont['chunks_data'])
    print("processing complete")