PDF-Rag-System / app.py
Mohamed2210's picture
Upload 3 files
471f9ee verified
import os
import gradio as gr
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Qdrant
from langchain_docling import DoclingLoader
from langchain_docling.loader import ExportType
from transformers import AutoTokenizer
# ========== Load API KEYS ==========
load_dotenv()
huggingfacehub_api_token = os.getenv("HF_TOKEN")
Qdrant_api_key = os.getenv("QDRANT_API_KEY")
# ========== LLM ==========
llm = ChatHuggingFace(
llm=HuggingFaceEndpoint(
repo_id="Qwen/Qwen3-235B-A22B-Instruct-2507",
provider="together",
huggingfacehub_api_token=huggingfacehub_api_token,
task="conversational"
)
)
MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
retriever_cache = {}
# ========== Prepare Data ==========
def prepare_data(filepath):
loader = DoclingLoader(file_path=filepath, export_type=ExportType.MARKDOWN).load()
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
tokenizer, chunk_size=300, chunk_overlap=20
)
normal_chunks = text_splitter.create_documents(
[loader[0].model_dump()['page_content']],
metadatas=[loader[0].model_dump()['metadata']]
)
return normal_chunks
# ========== Hybrid Search ==========
def Hybrid_search(normal_chunks):
embedding_llm = HuggingFaceEmbeddings(model_name=MODEL_NAME)
qdrant_store = Qdrant.from_documents(
documents=normal_chunks,
embedding=embedding_llm,
url="https://3464a78e-425b-4e6b-bc10-5b0333dc9ad1.us-east4-0.gcp.cloud.qdrant.io:6333",
api_key=Qdrant_api_key,
collection_name="my_collection",
force_recreate=True
)
dense_retriever = qdrant_store.as_retriever(
search_kwargs={"k": 8, "score_threshold": 0.25}
)
bm25_retriever = BM25Retriever.from_documents(normal_chunks)
bm25_retriever.k = 8
hybrid_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, dense_retriever],
weights=[0.4, 0.6]
)
return hybrid_retriever
# ========== Call Model ==========
def call_model(question, retriever):
qna_template = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
from langchain.prompts import PromptTemplate
qna_prompt = PromptTemplate(
template=qna_template,
input_variables=['context', 'question']
)
stuff_chain = create_stuff_documents_chain(llm, prompt=qna_prompt)
retrieved_docs = retriever.get_relevant_documents(question)
answer = stuff_chain.invoke(
{
"context": retrieved_docs,
"question": question
}
)
return answer
# ========== Gradio App ==========
def upload_pdf(file_path, progress=gr.Progress()):
progress(0, desc="Preparing data...")
chunks = prepare_data(file_path)
progress(0.5, desc="Building retrievers...")
retriever_cache["retriever"] = Hybrid_search(chunks)
progress(1.0, desc="Done βœ…")
return "βœ… PDF uploaded successfully! Now ask your questions."
def qa_interface(question):
if "retriever" not in retriever_cache:
return "❌ Please upload a PDF first."
return call_model(question, retriever_cache["retriever"])
with gr.Blocks() as demo:
gr.Markdown("## πŸ“š PDF Q&A with Hybrid Search + LLM")
with gr.Row():
file_input = gr.File(label="Upload PDF", type="filepath")
upload_output = gr.Textbox(label="Upload Status")
upload_btn = gr.Button("Upload PDF")
upload_btn.click(
fn=upload_pdf,
inputs=[file_input],
outputs=[upload_output]
)
question_input = gr.Textbox(label="Ask a question")
output = gr.Markdown()
submit_btn = gr.Button("Get Answer")
submit_btn.click(
fn=qa_interface,
inputs=[question_input],
outputs=output
)
demo.launch(share=True)