from doc_anal import DocumentIntelligenceService
from clean import process_content
from chunking import split_content_into_batches
from embedding_gen import embed_docling_chunks_azure
from qdrant_setup import batch_upsert_docling_chunks_to_qdrant
from docling_chunker_fixed import split_to_docling_chunks
doc=DocumentIntelligenceService()

def process_doc(url:str):
    import time
    t1=time.time()
    analysis=doc.analyze(source=url)
    t2=time.time()
    print("doc processing time",t2-t1)
    content=analysis["analyzeResult"]["content"]
    batches= split_to_docling_chunks(content)
    emb_cont= embed_docling_chunks_azure(docling_chunks=batches)
    batch_upsert_docling_chunks_to_qdrant(emb_cont['chunks_data'])
    print("processing complete")