pkraman06's picture
Update app.py
deac4cd verified
Raw
History Blame Contribute Delete
4.43 kB
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Imported Chroma from the dedicated langchain_chroma package
from langchain_chroma import Chroma
from langchain_huggingface import (
HuggingFaceEmbeddings,
HuggingFaceEndpoint
)
# =====================================================
# HUGGING FACE TOKEN
# =====================================================
HF_TOKEN = os.getenv("HF_TOKEN")
# =====================================================
# EMBEDDING MODEL
# =====================================================
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'},
encode_kwargs={'normalize_embeddings': True}
)
# =====================================================
# LLM MODEL
# =====================================================
llm = HuggingFaceEndpoint(
repo_id="NousResearch/Llama-2-7b-chat-hf",
task="text-generation",
huggingfacehub_api_token=HF_TOKEN,
temperature=0.5,
max_new_tokens=512
)
# =====================================================
# VECTOR DATABASE
# =====================================================
db = None
# =====================================================
# PDF PROCESSING FUNCTION
# =====================================================
def process_pdf(pdf_file):
global db
if pdf_file is None:
return "Please upload a PDF file."
try:
# Load PDF
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
# Split into chunks
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
docs = splitter.split_documents(documents)
# Create Chroma vector store in-memory
db = Chroma.from_documents(
documents=docs,
embedding=embedding_model
)
return "PDF processed and indexed in ChromaDB successfully!"
except Exception as e:
return f"Error processing PDF: {str(e)}"
# =====================================================
# QUESTION ANSWERING FUNCTION
# =====================================================
def ask_question(question):
global db
if db is None:
return "Please upload and process a PDF first."
if question.strip() == "":
return "Please enter a question."
try:
# Retrieve relevant chunks from Chroma
docs = db.similarity_search(
question,
k=3
)
# Combine retrieved text
context = "\n\n".join(
[doc.page_content for doc in docs]
)
# Prompt
prompt = f"""You are a helpful PDF question answering assistant.
Answer the question ONLY from the provided context.
If the answer is not in the context, say:
"I could not find the answer in the PDF."
Context:
{context}
Question:
{question}
Answer:"""
response = llm.invoke(prompt)
return response
except Exception as e:
return f"Error generating answer: {str(e)}"
# =====================================================
# GRADIO UI
# =====================================================
with gr.Blocks() as demo:
gr.Markdown("# PDF Question Answering Bot (Powered by ChromaDB)")
pdf_input = gr.File(
label="Upload PDF",
file_types=[".pdf"]
)
process_btn = gr.Button("Process PDF")
process_output = gr.Textbox(
label="PDF Status"
)
process_btn.click(
fn=process_pdf,
inputs=pdf_input,
outputs=process_output
)
question_input = gr.Textbox(
label="Ask a Question"
)
ask_btn = gr.Button("Get Answer")
answer_output = gr.Textbox(
label="Answer",
lines=10
)
ask_btn.click(
fn=ask_question,
inputs=question_input,
outputs=answer_output
)
# =====================================================
# LAUNCH APP
# =====================================================
if __name__ == "__main__":
demo.launch()