Spaces:

himanshukumar378
/

Mutliple_chat_pdf

Sleeping

App Files Files Community

himanshukumar378 commited on Aug 19, 2025

Commit

c630cd2

verified ·

1 Parent(s): 60db15e

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -107

app.py CHANGED Viewed

@@ -1,135 +1,120 @@
 import gradio as gr
-import os
 from PyPDF2 import PdfReader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
-from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
-from langchain.prompts import PromptTemplate
-from transformers import pipeline
-# -----------------------------
-# 🔹 Model Fallback Setup
-# -----------------------------
-HF_MODELS = [
-    "google/flan-t5-small",
-    "google/flan-t5-base",
-    "google/flan-t5-large",
-    "google/flan-ul2"
-]
 def load_llm():
-    """Try loading Hugging Face models in order, return first successful LLM."""
-    for model_name in HF_MODELS:
         try:
-            print(f"🔄 Trying to load model: {model_name}")
             pipe = pipeline(
                 "text2text-generation",
-                model=model_name,
-                tokenizer=model_name,
-                max_new_tokens=512,
             )
-            return HuggingFacePipeline(pipeline=pipe), model_name
         except Exception as e:
-            print(f"⚠️ Failed to load {model_name}: {e}")
-    raise RuntimeError("❌ Could not load any Hugging Face model.")
-# Load at startup
-llm, active_model = load_llm()
-print(f"✅ Using model: {active_model}")
-# -----------------------------
-# 🔹 PDF Processing
-# -----------------------------
-def process_pdf(pdf_paths):
-    """Extract text from PDFs, chunk it, and return FAISS vector DB."""
     text = ""
-    for pdf_path in pdf_paths:
-        try:
-            reader = PdfReader(pdf_path)
-            for page in reader.pages:
-                page_text = page.extract_text()
-                if page_text:
-                    text += page_text + "\n"
-        except Exception as e:
-            print(f"⚠️ Error reading {pdf_path}: {e}")
     if not text.strip():
-        return None
     # Split text into chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000, chunk_overlap=200
-    )
-    chunks = text_splitter.split_text(text)
-    # Convert chunks into vector DB
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    db = FAISS.from_texts(chunks, embedding=embeddings)
     return db
-# -----------------------------
-# 🔹 Question Answering
-# -----------------------------
-def ask_question(pdf_paths, question):
-    if not pdf_paths:
-        return "⚠️ Please upload at least one PDF."
-    if not question or not question.strip():
-        return "⚠️ Please enter a question."
-    db = process_pdf(pdf_paths)
-    if db is None:
-        return "⚠️ Couldn't extract any text from the PDFs."
-    retriever = db.as_retriever(search_kwargs={"k": 3})
-    docs = retriever.get_relevant_documents(question)
-    context = "\n".join(getattr(d, "page_content", str(d)) for d in docs)
-    prompt = PromptTemplate(
-        input_variables=["context", "question"],
-        template=(
-            "Answer the question using ONLY the context below. "
-            "If the answer isn't in the context, say you don't know.\n\n"
-            "Context:\n{context}\n\nQuestion: {question}\nAnswer:"
-        ),
-    )
-    final_prompt = prompt.format(context=context, question=question)
-    # Try multiple models for answering
-    for model_name in HF_MODELS:
-        try:
-            pipe = pipeline(
-                "text2text-generation",
-                model=model_name,
-                tokenizer=model_name,
-                max_new_tokens=512,
-            )
-            llm = HuggingFacePipeline(pipeline=pipe)
-            result = llm.invoke(final_prompt)
-            return str(getattr(result, "content", result)) + f"\n\n✅ Answered using {model_name}"
-        except Exception as e:
-            print(f"⚠️ Model {model_name} failed: {e}")
-            continue
-    return "❌ All models failed to generate an answer."
-# -----------------------------
-# 🔹 Gradio UI
-# -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 📘 PDF Question Answering with Hugging Face (Fallback Models)")
     with gr.Row():
-        pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_types_metadata=None, type="filepath", file_count="multiple")
         question_input = gr.Textbox(label="Ask a Question")
-    answer_output = gr.Textbox(label="Answer", lines=10)
-    ask_btn = gr.Button("Get Answer")
-    ask_btn.click(fn=ask_question, inputs=[pdf_input, question_input], outputs=answer_output)
-# -----------------------------
-# 🔹 Launch App
-# -----------------------------
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 from PyPDF2 import PdfReader
+# LangChain components
+from langchain.text_splitter import CharacterTextSplitter
 from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_core.prompts import PromptTemplate
+from langchain_community.llms import HuggingFacePipeline
+# Hugging Face Transformers
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# ---------------- Load LLM with fallback ----------------
 def load_llm():
+    model_ids = [
+        "google/flan-t5-small",   # lightweight, safe
+        "google/flan-t5-base",    # more powerful
+        "google/flan-t5-large",   # stronger, but bigger
+        "google/flan-t5-xl"       # may fail in free tier, but used if available
+    ]
+    for model_id in model_ids:
         try:
+            tokenizer = AutoTokenizer.from_pretrained(model_id)
+            model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
             pipe = pipeline(
                 "text2text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                max_length=512
             )
+            print(f"✅ Loaded model: {model_id}")
+            return HuggingFacePipeline(pipeline=pipe)
         except Exception as e:
+            print(f"⚠️ Failed to load {model_id}: {e}")
+            continue
+    raise RuntimeError("❌ No model could be loaded. Please check Hugging Face space resources.")
+llm = load_llm()
+# ---------------- Process PDF ----------------
+def process_pdf(pdf_files):
     text = ""
+    for pdf in pdf_files:
+        reader = PdfReader(pdf.name)
+        for page in reader.pages:
+            extracted = page.extract_text()
+            if extracted:
+                text += extracted + "\n"
     if not text.strip():
+        return None  # return None if empty
     # Split text into chunks
+    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    texts = splitter.split_text(text)
+    # Embeddings & vector store
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    db = FAISS.from_texts(texts, embeddings)
     return db
+# ---------------- Ask Questions ----------------
+def ask_question(pdf_files, question):
+    try:
+        db = process_pdf(pdf_files)
+        if not db:
+            return "⚠️ No text found in the uploaded PDF(s)."
+        retriever = db.as_retriever(search_kwargs={"k": 3})
+        docs = retriever.get_relevant_documents(question)
+        # Combine retrieved context
+        context = "\n".join([doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs])
+        # Prompt template
+        prompt = PromptTemplate(
+            input_variables=["context", "question"],
+            template="Answer the question using the following context:\n{context}\n\nQuestion: {question}\nAnswer:"
+        )
+        final_prompt = prompt.format(context=context, question=question)
+        response = llm.invoke(final_prompt)
+        return response if response else "⚠️ No answer generated. Try another question."
+    except Exception as e:
+        return f"⚠️ Error while generating answer: {str(e)}"
+# ---------------- Gradio UI ----------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 📚 Multiple PDF Chatbot (with Hugging Face fallback models)")
+    with gr.Row():
+        pdf_input = gr.File(
+            file_types=[".pdf"],
+            type="file",
+            label="Upload PDF(s)",
+            file_types=[".pdf"],
+            file_types_multiple=True
+        )
     with gr.Row():
         question_input = gr.Textbox(label="Ask a Question")
+    with gr.Row():
+        output = gr.Textbox(label="Answer")
+    submit = gr.Button("Submit")
+    submit.click(fn=ask_question, inputs=[pdf_input, question_input], outputs=output)
+demo.launch()