SamarthPujari commited on
Commit
37316fa
·
verified ·
1 Parent(s): 4f86fde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -14
app.py CHANGED
@@ -80,11 +80,9 @@ qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
80
  def document_qna_tool(pdf_path: str, question: str) -> str:
81
  """
82
  A tool that answers natural language questions about a given PDF document.
83
-
84
  Args:
85
  pdf_path (str): Path to the local PDF file.
86
  question (str): Question about the content of the PDF.
87
-
88
  Returns:
89
  str: Answer to the question based on the content.
90
  """
@@ -116,24 +114,17 @@ def document_qna_tool(pdf_path: str, question: str) -> str:
116
  return "[ERROR] No readable text in the PDF."
117
 
118
  print(f"[DEBUG] Extracted {len(text_chunks)} text chunks.")
119
- print(f"[DEBUG] First text chunk preview:\n{text_chunks[0][:300]}...")
120
 
 
121
  embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)
122
  question_embedding = embedding_model.encode(question, convert_to_tensor=True)
123
 
124
  print("[DEBUG] Performing semantic search...")
125
  scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
126
-
127
- print(f"[DEBUG] Similarity scores: {scores}")
128
-
129
- if scores.shape[0] == 0:
130
- return "[ERROR] No semantic matches found in PDF text."
131
-
132
  best_match_idx = scores.argmax().item()
133
  best_context = text_chunks[best_match_idx]
134
 
135
- print(f"[DEBUG] Best context preview:\n{best_context[:300]}...")
136
-
137
  prompt = f"Context: {best_context}\nQuestion: {question}"
138
  print("[DEBUG] Calling QA model...")
139
  answer = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
@@ -177,8 +168,5 @@ agent = CodeAgent(
177
  description=None,
178
  prompt_templates=prompt_templates
179
  )
180
- print("[DEBUG] Registered Tools:")
181
- for t in agent.tools:
182
- print(f" - {getattr(t, 'name', str(t))}")
183
 
184
  GradioUI(agent).launch()
 
80
  def document_qna_tool(pdf_path: str, question: str) -> str:
81
  """
82
  A tool that answers natural language questions about a given PDF document.
 
83
  Args:
84
  pdf_path (str): Path to the local PDF file.
85
  question (str): Question about the content of the PDF.
 
86
  Returns:
87
  str: Answer to the question based on the content.
88
  """
 
114
  return "[ERROR] No readable text in the PDF."
115
 
116
  print(f"[DEBUG] Extracted {len(text_chunks)} text chunks.")
 
117
 
118
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
119
  embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)
120
  question_embedding = embedding_model.encode(question, convert_to_tensor=True)
121
 
122
  print("[DEBUG] Performing semantic search...")
123
  scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
 
 
 
 
 
 
124
  best_match_idx = scores.argmax().item()
125
  best_context = text_chunks[best_match_idx]
126
 
127
+ qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
 
128
  prompt = f"Context: {best_context}\nQuestion: {question}"
129
  print("[DEBUG] Calling QA model...")
130
  answer = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
 
168
  description=None,
169
  prompt_templates=prompt_templates
170
  )
 
 
 
171
 
172
  GradioUI(agent).launch()