Spaces:

himanshukumar378
/

Mutliple_chat_pdf

Sleeping

App Files Files Community

himanshukumar378 commited on Aug 19, 2025

Commit

cb6ff7e

verified ·

1 Parent(s): 4f755fe

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -56

app.py CHANGED Viewed

@@ -8,40 +8,27 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_core.prompts import PromptTemplate
 # Hugging Face Transformers
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-# ---------------- Load LLM with fallback ----------------
 def load_llm():
-    model_ids = [
-        "google/flan-t5-base",
-        "google/flan-t5-small",
-        "google/flan-t5-large"
-    ]
-    for model_id in model_ids:
-        try:
-            print(f"Attempting to load model: {model_id}")
-            tokenizer = AutoTokenizer.from_pretrained(model_id)
-            model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
-            # Create pipeline directly without LangChain wrapper
-            pipe = pipeline(
-                "text2text-generation",
-                model=model,
-                tokenizer=tokenizer,
-                max_length=512
-            )
-            print(f"✅ Successfully loaded model: {model_id}")
-            return pipe
-        except Exception as e:
-            print(f"⚠️ Failed to load {model_id}: {e}")
-            continue
-    raise RuntimeError("❌ No model could be loaded.")
-llm_pipeline = load_llm()
 # ---------------- Process PDF ----------------
@@ -58,7 +45,7 @@ def process_pdf(pdf_files):
         return None
     # Split text into chunks
-    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     texts = splitter.split_text(text)
     # Embeddings & vector store
@@ -73,58 +60,99 @@ def ask_question(pdf_files, question):
     try:
         if not pdf_files:
             return "⚠️ Please upload at least one PDF file."
         db = process_pdf(pdf_files)
         if not db:
             return "⚠️ No text found in the uploaded PDF(s)."
-        retriever = db.as_retriever(search_kwargs={"k": 3})
         docs = retriever.get_relevant_documents(question)
         # Combine retrieved context
         context = "\n".join([doc.page_content for doc in docs])
-        # Prompt template
-        prompt_template = f"""Answer the question based on the following context:
-Context: {context}
 Question: {question}
 Answer:"""
-        # Use the pipeline directly
-        result = llm_pipeline(prompt_template, max_length=512, do_sample=False)
         response = result[0]['generated_text'].strip()
-        return response if response else "⚠️ No answer generated. Try another question."
     except Exception as e:
-        return f"⚠️ Error while generating answer: {str(e)}"
 # ---------------- Gradio UI ----------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 📚 Multiple PDF Chatbot")
     gr.Markdown("Upload PDF files and ask questions about their content.")
     with gr.Row():
-        pdf_input = gr.File(
-            label="Upload PDF(s)",
-            file_types=[".pdf"],
-            file_count="multiple"
-        )
     with gr.Row():
-        question_input = gr.Textbox(
-            label="Ask a Question",
-            placeholder="Type your question here..."
         )
-    with gr.Row():
-        output = gr.Textbox(label="Answer", lines=5)
-    submit_btn = gr.Button("Ask Question", variant="primary")
-    submit_btn.click(fn=ask_question, inputs=[pdf_input, question_input], outputs=output)
 demo.launch()

 from langchain_core.prompts import PromptTemplate
 # Hugging Face Transformers
+from transformers import pipeline
+# ---------------- Load LLM ----------------
 def load_llm():
+    try:
+        # Use a model that's good at instruction following
+        pipe = pipeline(
+            "text2text-generation",
+            model="google/flan-t5-base",
+            max_length=512,
+            temperature=0.1  # Lower temperature for more focused answers
+        )
+        print("✅ Successfully loaded model: google/flan-t5-base")
+        return pipe
+    except Exception as e:
+        print(f"⚠️ Failed to load model: {e}")
+        return None
+llm = load_llm()
 # ---------------- Process PDF ----------------
         return None
     # Split text into chunks
+    splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
     texts = splitter.split_text(text)
     # Embeddings & vector store
     try:
         if not pdf_files:
             return "⚠️ Please upload at least one PDF file."
+        if not llm:
+            return "⚠️ Language model failed to load. Please try again later."
         db = process_pdf(pdf_files)
         if not db:
             return "⚠️ No text found in the uploaded PDF(s)."
+        retriever = db.as_retriever(search_kwargs={"k": 4})
         docs = retriever.get_relevant_documents(question)
         # Combine retrieved context
         context = "\n".join([doc.page_content for doc in docs])
+        # Clean up context to remove excessive whitespace
+        context = " ".join(context.split())
+        # Better prompt template that forces the model to answer
+        prompt = f"""Based on the following information, answer the question clearly and concisely.
+Information:
+{context}
 Question: {question}
 Answer:"""
+        # Generate response
+        result = llm(
+            prompt,
+            max_length=300,
+            num_return_sequences=1,
+            do_sample=False,
+            temperature=0.1
+        )
         response = result[0]['generated_text'].strip()
+        # Clean up the response
+        if response.startswith("Answer:"):
+            response = response.replace("Answer:", "").strip()
+        # If response is empty or just repeats the prompt, provide fallback
+        if not response or len(response) < 10:
+            return "I couldn't find a clear answer to your question in the provided documents. Please try rephrasing your question or check if the relevant information is in the uploaded PDFs."
+        return response
     except Exception as e:
+        return f"⚠️ Error: {str(e)}"
 # ---------------- Gradio UI ----------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 📚 PDF Question Answering System")
     gr.Markdown("Upload PDF files and ask questions about their content.")
     with gr.Row():
+        with gr.Column():
+            pdf_input = gr.File(
+                label="Upload PDF Files",
+                file_types=[".pdf"],
+                file_count="multiple"
+            )
+        with gr.Column():
+            question_input = gr.Textbox(
+                label="Your Question",
+                placeholder="What would you like to know about the document?",
+                lines=2
+            )
+            submit_btn = gr.Button("Ask Question", variant="primary")
     with gr.Row():
+        output = gr.Textbox(
+            label="Answer",
+            lines=4,
+            interactive=False
         )
+    # Examples
+    gr.Examples(
+        examples=[
+            ["What is the main topic of this document?"],
+            ["Can you summarize the key points?"],
+            ["What are the main findings or conclusions?"],
+            ["Who are the authors and what are their credentials?"]
+        ],
+        inputs=question_input,
+        label="Example Questions"
+    )
+    # Handle both button click and enter key
+    submit_btn.click(ask_question, inputs=[pdf_input, question_input], outputs=output)
+    question_input.submit(ask_question, inputs=[pdf_input, question_input], outputs=output)
 demo.launch()