himanshukumar378 commited on
Commit
cb6ff7e
·
verified ·
1 Parent(s): 4f755fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -56
app.py CHANGED
@@ -8,40 +8,27 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_core.prompts import PromptTemplate
9
 
10
  # Hugging Face Transformers
11
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
12
 
13
 
14
- # ---------------- Load LLM with fallback ----------------
15
  def load_llm():
16
- model_ids = [
17
- "google/flan-t5-base",
18
- "google/flan-t5-small",
19
- "google/flan-t5-large"
20
- ]
21
-
22
- for model_id in model_ids:
23
- try:
24
- print(f"Attempting to load model: {model_id}")
25
- tokenizer = AutoTokenizer.from_pretrained(model_id)
26
- model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
27
-
28
- # Create pipeline directly without LangChain wrapper
29
- pipe = pipeline(
30
- "text2text-generation",
31
- model=model,
32
- tokenizer=tokenizer,
33
- max_length=512
34
- )
35
- print(f"✅ Successfully loaded model: {model_id}")
36
- return pipe
37
- except Exception as e:
38
- print(f"⚠️ Failed to load {model_id}: {e}")
39
- continue
40
-
41
- raise RuntimeError("❌ No model could be loaded.")
42
 
43
 
44
- llm_pipeline = load_llm()
45
 
46
 
47
  # ---------------- Process PDF ----------------
@@ -58,7 +45,7 @@ def process_pdf(pdf_files):
58
  return None
59
 
60
  # Split text into chunks
61
- splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
62
  texts = splitter.split_text(text)
63
 
64
  # Embeddings & vector store
@@ -73,58 +60,99 @@ def ask_question(pdf_files, question):
73
  try:
74
  if not pdf_files:
75
  return "⚠️ Please upload at least one PDF file."
 
 
 
76
 
77
  db = process_pdf(pdf_files)
78
  if not db:
79
  return "⚠️ No text found in the uploaded PDF(s)."
80
 
81
- retriever = db.as_retriever(search_kwargs={"k": 3})
82
  docs = retriever.get_relevant_documents(question)
83
 
84
  # Combine retrieved context
85
  context = "\n".join([doc.page_content for doc in docs])
 
 
 
86
 
87
- # Prompt template
88
- prompt_template = f"""Answer the question based on the following context:
89
 
90
- Context: {context}
 
91
 
92
  Question: {question}
93
 
94
  Answer:"""
95
 
96
- # Use the pipeline directly
97
- result = llm_pipeline(prompt_template, max_length=512, do_sample=False)
 
 
 
 
 
 
 
98
  response = result[0]['generated_text'].strip()
99
-
100
- return response if response else "⚠️ No answer generated. Try another question."
 
 
 
 
 
 
 
 
101
 
102
  except Exception as e:
103
- return f"⚠️ Error while generating answer: {str(e)}"
104
 
105
 
106
  # ---------------- Gradio UI ----------------
107
  with gr.Blocks() as demo:
108
- gr.Markdown("## 📚 Multiple PDF Chatbot")
109
  gr.Markdown("Upload PDF files and ask questions about their content.")
110
-
111
  with gr.Row():
112
- pdf_input = gr.File(
113
- label="Upload PDF(s)",
114
- file_types=[".pdf"],
115
- file_count="multiple"
116
- )
117
-
 
 
 
 
 
 
 
 
118
  with gr.Row():
119
- question_input = gr.Textbox(
120
- label="Ask a Question",
121
- placeholder="Type your question here..."
 
122
  )
123
-
124
- with gr.Row():
125
- output = gr.Textbox(label="Answer", lines=5)
126
-
127
- submit_btn = gr.Button("Ask Question", variant="primary")
128
- submit_btn.click(fn=ask_question, inputs=[pdf_input, question_input], outputs=output)
 
 
 
 
 
 
 
 
 
 
129
 
130
  demo.launch()
 
8
  from langchain_core.prompts import PromptTemplate
9
 
10
  # Hugging Face Transformers
11
+ from transformers import pipeline
12
 
13
 
14
+ # ---------------- Load LLM ----------------
15
  def load_llm():
16
+ try:
17
+ # Use a model that's good at instruction following
18
+ pipe = pipeline(
19
+ "text2text-generation",
20
+ model="google/flan-t5-base",
21
+ max_length=512,
22
+ temperature=0.1 # Lower temperature for more focused answers
23
+ )
24
+ print(" Successfully loaded model: google/flan-t5-base")
25
+ return pipe
26
+ except Exception as e:
27
+ print(f"⚠️ Failed to load model: {e}")
28
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
+ llm = load_llm()
32
 
33
 
34
  # ---------------- Process PDF ----------------
 
45
  return None
46
 
47
  # Split text into chunks
48
+ splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
49
  texts = splitter.split_text(text)
50
 
51
  # Embeddings & vector store
 
60
  try:
61
  if not pdf_files:
62
  return "⚠️ Please upload at least one PDF file."
63
+
64
+ if not llm:
65
+ return "⚠️ Language model failed to load. Please try again later."
66
 
67
  db = process_pdf(pdf_files)
68
  if not db:
69
  return "⚠️ No text found in the uploaded PDF(s)."
70
 
71
+ retriever = db.as_retriever(search_kwargs={"k": 4})
72
  docs = retriever.get_relevant_documents(question)
73
 
74
  # Combine retrieved context
75
  context = "\n".join([doc.page_content for doc in docs])
76
+
77
+ # Clean up context to remove excessive whitespace
78
+ context = " ".join(context.split())
79
 
80
+ # Better prompt template that forces the model to answer
81
+ prompt = f"""Based on the following information, answer the question clearly and concisely.
82
 
83
+ Information:
84
+ {context}
85
 
86
  Question: {question}
87
 
88
  Answer:"""
89
 
90
+ # Generate response
91
+ result = llm(
92
+ prompt,
93
+ max_length=300,
94
+ num_return_sequences=1,
95
+ do_sample=False,
96
+ temperature=0.1
97
+ )
98
+
99
  response = result[0]['generated_text'].strip()
100
+
101
+ # Clean up the response
102
+ if response.startswith("Answer:"):
103
+ response = response.replace("Answer:", "").strip()
104
+
105
+ # If response is empty or just repeats the prompt, provide fallback
106
+ if not response or len(response) < 10:
107
+ return "I couldn't find a clear answer to your question in the provided documents. Please try rephrasing your question or check if the relevant information is in the uploaded PDFs."
108
+
109
+ return response
110
 
111
  except Exception as e:
112
+ return f"⚠️ Error: {str(e)}"
113
 
114
 
115
  # ---------------- Gradio UI ----------------
116
  with gr.Blocks() as demo:
117
+ gr.Markdown("## 📚 PDF Question Answering System")
118
  gr.Markdown("Upload PDF files and ask questions about their content.")
119
+
120
  with gr.Row():
121
+ with gr.Column():
122
+ pdf_input = gr.File(
123
+ label="Upload PDF Files",
124
+ file_types=[".pdf"],
125
+ file_count="multiple"
126
+ )
127
+ with gr.Column():
128
+ question_input = gr.Textbox(
129
+ label="Your Question",
130
+ placeholder="What would you like to know about the document?",
131
+ lines=2
132
+ )
133
+ submit_btn = gr.Button("Ask Question", variant="primary")
134
+
135
  with gr.Row():
136
+ output = gr.Textbox(
137
+ label="Answer",
138
+ lines=4,
139
+ interactive=False
140
  )
141
+
142
+ # Examples
143
+ gr.Examples(
144
+ examples=[
145
+ ["What is the main topic of this document?"],
146
+ ["Can you summarize the key points?"],
147
+ ["What are the main findings or conclusions?"],
148
+ ["Who are the authors and what are their credentials?"]
149
+ ],
150
+ inputs=question_input,
151
+ label="Example Questions"
152
+ )
153
+
154
+ # Handle both button click and enter key
155
+ submit_btn.click(ask_question, inputs=[pdf_input, question_input], outputs=output)
156
+ question_input.submit(ask_question, inputs=[pdf_input, question_input], outputs=output)
157
 
158
  demo.launch()