File size: 4,730 Bytes
471f9ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import os
import gradio as gr
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Qdrant
from langchain_docling import DoclingLoader
from langchain_docling.loader import ExportType
from transformers import AutoTokenizer

# ========== Load API KEYS ==========
load_dotenv()
huggingfacehub_api_token = os.getenv("HF_TOKEN")
Qdrant_api_key = os.getenv("QDRANT_API_KEY")

# ========== LLM ==========
llm = ChatHuggingFace(
    llm=HuggingFaceEndpoint(
        repo_id="Qwen/Qwen3-235B-A22B-Instruct-2507",
        provider="together",
        huggingfacehub_api_token=huggingfacehub_api_token,
        task="conversational"
    )
)

MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


retriever_cache = {}

# ========== Prepare Data ==========
def prepare_data(filepath):
    loader = DoclingLoader(file_path=filepath, export_type=ExportType.MARKDOWN).load()
    from langchain.text_splitter import CharacterTextSplitter
    text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
        tokenizer, chunk_size=300, chunk_overlap=20
    )
    normal_chunks = text_splitter.create_documents(
        [loader[0].model_dump()['page_content']], 
        metadatas=[loader[0].model_dump()['metadata']]
    )
    return normal_chunks

# ========== Hybrid Search ==========
def Hybrid_search(normal_chunks):
    embedding_llm = HuggingFaceEmbeddings(model_name=MODEL_NAME)

    qdrant_store = Qdrant.from_documents(
        documents=normal_chunks,
        embedding=embedding_llm,
        url="https://3464a78e-425b-4e6b-bc10-5b0333dc9ad1.us-east4-0.gcp.cloud.qdrant.io:6333",
        api_key=Qdrant_api_key,
        collection_name="my_collection",
        force_recreate=True
    )

    dense_retriever = qdrant_store.as_retriever(
        search_kwargs={"k": 8, "score_threshold": 0.25}
    )
    bm25_retriever = BM25Retriever.from_documents(normal_chunks)
    bm25_retriever.k = 8

    hybrid_retriever = EnsembleRetriever(
        retrievers=[bm25_retriever, dense_retriever],
        weights=[0.4, 0.6]
    )
    return hybrid_retriever

# ========== Call Model ==========
def call_model(question, retriever):
    qna_template = """

    You are an assistant for question-answering tasks. 

    Use the following pieces of retrieved context to answer the question.

    If you don't know the answer, just say that you don't know. 

    Use three sentences maximum and keep the answer concise.

    Question: {question}

    Context: {context}

    Answer:

    """
    from langchain.prompts import PromptTemplate
    qna_prompt = PromptTemplate(
        template=qna_template,
        input_variables=['context', 'question']
    )

    stuff_chain = create_stuff_documents_chain(llm, prompt=qna_prompt)
    retrieved_docs = retriever.get_relevant_documents(question)
    
    answer = stuff_chain.invoke(
        {
            "context": retrieved_docs,
            "question": question
        }
    )
    return answer

# ========== Gradio App ==========
def upload_pdf(file_path, progress=gr.Progress()):
    progress(0, desc="Preparing data...")
    chunks = prepare_data(file_path)
    progress(0.5, desc="Building retrievers...")
    retriever_cache["retriever"] = Hybrid_search(chunks)
    progress(1.0, desc="Done βœ…")
    return "βœ… PDF uploaded successfully! Now ask your questions."

def qa_interface(question):
    if "retriever" not in retriever_cache:
        return "❌ Please upload a PDF first."
    return call_model(question, retriever_cache["retriever"])

with gr.Blocks() as demo:
    gr.Markdown("## πŸ“š PDF Q&A with Hybrid Search + LLM")
    
    with gr.Row():
        file_input = gr.File(label="Upload PDF", type="filepath")
        upload_output = gr.Textbox(label="Upload Status")

    upload_btn = gr.Button("Upload PDF")
    upload_btn.click(
        fn=upload_pdf,
        inputs=[file_input],
        outputs=[upload_output]
    )
    
    question_input = gr.Textbox(label="Ask a question")
    output = gr.Markdown()
    submit_btn = gr.Button("Get Answer")

    submit_btn.click(
        fn=qa_interface,
        inputs=[question_input],
        outputs=output
    )

demo.launch(share=True)