import os import gradio as gr from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import HuggingFaceHub from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.chains import LLMChain # -- Set Hugging Face API token from secret -- os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_API_TOKEN") # -- Shared LLM and Embedder -- embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") llm = HuggingFaceHub( repo_id="google/flan-t5-large", model_kwargs={"temperature": 0.3, "max_length": 512} ) # -- Internal state variables -- global_chunks = [] def build_qa_chain(pdf_file): global global_chunks loader = PyPDFLoader(pdf_file.name) docs = loader.load() chunks = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ).split_documents(docs) global_chunks = chunks # store for summarizer db = FAISS.from_documents(chunks, embedder) return RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever()) def summarize_document(): if not global_chunks: return "⚠️ Please upload and index a document first." # Merge all text from chunks full_text = " ".join(chunk.page_content for chunk in global_chunks) # Use a prompt template for summarization prompt = PromptTemplate( input_variables=["doc"], template="Please summarize the following document in a few concise bullet points:\n\n{doc}" ) chain = LLMChain(llm=llm, prompt=prompt) summary = chain.run(full_text[:3000]) # Trim to fit model input return summary # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown("# AskYourDocs 📄🤖\nUpload a PDF, then ask questions or get a summary.") pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) build_btn = gr.Button("Index Document") summarize_btn = gr.Button("Summarize Document") status = gr.Markdown("") qa_state = gr.State() question = gr.Textbox(label="Ask a question") answer = gr.Textbox(label="Answer", interactive=False) summary_output = gr.Textbox(label="Summary", lines=10, interactive=False) # Step 1: Build vector index def _index(pdf): if pdf is None: return "⚠️ Please upload a PDF first.", None chain = build_qa_chain(pdf) return "✅ Document indexed!", chain build_btn.click(_index, inputs=pdf_input, outputs=[status, qa_state]) # Step 2: Ask questions def _ask(q, chain): if chain is None: return "⚠️ Upload and index a PDF first." if not q.strip(): return "⚠️ Ask something..." return chain.run(q) question.submit(_ask, inputs=[question, qa_state], outputs=answer) # Step 3: Summarize document summarize_btn.click(fn=summarize_document, outputs=summary_output) demo.launch()