Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import LLMChain | |
| # -- Set Hugging Face API token from secret -- | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_API_TOKEN") | |
| # -- Shared LLM and Embedder -- | |
| embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| llm = HuggingFaceHub( | |
| repo_id="google/flan-t5-large", | |
| model_kwargs={"temperature": 0.3, "max_length": 512} | |
| ) | |
| # -- Internal state variables -- | |
| global_chunks = [] | |
| def build_qa_chain(pdf_file): | |
| global global_chunks | |
| loader = PyPDFLoader(pdf_file.name) | |
| docs = loader.load() | |
| chunks = RecursiveCharacterTextSplitter( | |
| chunk_size=500, chunk_overlap=50 | |
| ).split_documents(docs) | |
| global_chunks = chunks # store for summarizer | |
| db = FAISS.from_documents(chunks, embedder) | |
| return RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever()) | |
| def summarize_document(): | |
| if not global_chunks: | |
| return "⚠️ Please upload and index a document first." | |
| # Merge all text from chunks | |
| full_text = " ".join(chunk.page_content for chunk in global_chunks) | |
| # Use a prompt template for summarization | |
| prompt = PromptTemplate( | |
| input_variables=["doc"], | |
| template="Please summarize the following document in a few concise bullet points:\n\n{doc}" | |
| ) | |
| chain = LLMChain(llm=llm, prompt=prompt) | |
| summary = chain.run(full_text[:3000]) # Trim to fit model input | |
| return summary | |
| # --- Gradio UI --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# AskYourDocs 📄🤖\nUpload a PDF, then ask questions or get a summary.") | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| build_btn = gr.Button("Index Document") | |
| summarize_btn = gr.Button("Summarize Document") | |
| status = gr.Markdown("") | |
| qa_state = gr.State() | |
| question = gr.Textbox(label="Ask a question") | |
| answer = gr.Textbox(label="Answer", interactive=False) | |
| summary_output = gr.Textbox(label="Summary", lines=10, interactive=False) | |
| # Step 1: Build vector index | |
| def _index(pdf): | |
| if pdf is None: | |
| return "⚠️ Please upload a PDF first.", None | |
| chain = build_qa_chain(pdf) | |
| return "✅ Document indexed!", chain | |
| build_btn.click(_index, inputs=pdf_input, outputs=[status, qa_state]) | |
| # Step 2: Ask questions | |
| def _ask(q, chain): | |
| if chain is None: | |
| return "⚠️ Upload and index a PDF first." | |
| if not q.strip(): | |
| return "⚠️ Ask something..." | |
| return chain.run(q) | |
| question.submit(_ask, inputs=[question, qa_state], outputs=answer) | |
| # Step 3: Summarize document | |
| summarize_btn.click(fn=summarize_document, outputs=summary_output) | |
| demo.launch() | |