AI / app.py
avinashkumarkashyap's picture
Update app.py
b5b30ae verified
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
# -- Set Hugging Face API token from secret --
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_API_TOKEN")
# -- Shared LLM and Embedder --
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = HuggingFaceHub(
repo_id="google/flan-t5-large",
model_kwargs={"temperature": 0.3, "max_length": 512}
)
# -- Internal state variables --
global_chunks = []
def build_qa_chain(pdf_file):
global global_chunks
loader = PyPDFLoader(pdf_file.name)
docs = loader.load()
chunks = RecursiveCharacterTextSplitter(
chunk_size=500, chunk_overlap=50
).split_documents(docs)
global_chunks = chunks # store for summarizer
db = FAISS.from_documents(chunks, embedder)
return RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())
def summarize_document():
if not global_chunks:
return "⚠️ Please upload and index a document first."
# Merge all text from chunks
full_text = " ".join(chunk.page_content for chunk in global_chunks)
# Use a prompt template for summarization
prompt = PromptTemplate(
input_variables=["doc"],
template="Please summarize the following document in a few concise bullet points:\n\n{doc}"
)
chain = LLMChain(llm=llm, prompt=prompt)
summary = chain.run(full_text[:3000]) # Trim to fit model input
return summary
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# AskYourDocs 📄🤖\nUpload a PDF, then ask questions or get a summary.")
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
build_btn = gr.Button("Index Document")
summarize_btn = gr.Button("Summarize Document")
status = gr.Markdown("")
qa_state = gr.State()
question = gr.Textbox(label="Ask a question")
answer = gr.Textbox(label="Answer", interactive=False)
summary_output = gr.Textbox(label="Summary", lines=10, interactive=False)
# Step 1: Build vector index
def _index(pdf):
if pdf is None:
return "⚠️ Please upload a PDF first.", None
chain = build_qa_chain(pdf)
return "✅ Document indexed!", chain
build_btn.click(_index, inputs=pdf_input, outputs=[status, qa_state])
# Step 2: Ask questions
def _ask(q, chain):
if chain is None:
return "⚠️ Upload and index a PDF first."
if not q.strip():
return "⚠️ Ask something..."
return chain.run(q)
question.submit(_ask, inputs=[question, qa_state], outputs=answer)
# Step 3: Summarize document
summarize_btn.click(fn=summarize_document, outputs=summary_output)
demo.launch()