First_agent_template

Sleeping

App Files Files Community

SamarthPujari commited on Jun 14, 2025

Commit

5f65018

verified ·

1 Parent(s): e58d6bc

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -15

app.py CHANGED Viewed

@@ -82,44 +82,60 @@ qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
 @tool
 def document_qna_tool(pdf_path: str, question: str) -> str:
-    """
-    A tool for answering questions based on the content of a PDF document.
-    Args:
-        pdf_path (str): Path to the local PDF file.
-        question (str): A natural language question to ask about the PDF content.
-    Returns:
-        str: Answer to the question based on the PDF's content.
-    """
     try:
         if not os.path.exists(pdf_path):
-            return f"Error: File not found at {pdf_path}"
-        # Step 1: Extract text from PDF
         doc = fitz.open(pdf_path)
         text_chunks = []
         for page in doc:
             text = page.get_text()
             if text.strip():
                 text_chunks.append(text)
         doc.close()
         if not text_chunks:
-            return "No text found in the PDF."
-        # Step 2: Semantic search
         embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)
         question_embedding = embedding_model.encode(question, convert_to_tensor=True)
         scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
-        best_match_idx = scores.argmax()
         best_context = text_chunks[best_match_idx]
-        # Step 3: Answer question
         prompt = f"Context: {best_context}\nQuestion: {question}"
         answer = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
         return f"Answer: {answer.strip()}"
     except Exception as e:
-        return f"Error processing document QnA: {type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
 # -------------------- Other Components --------------------
 final_answer = FinalAnswerTool()

 @tool
 def document_qna_tool(pdf_path: str, question: str) -> str:
+    import os, fitz, traceback
+    from sentence_transformers import SentenceTransformer, util
+    from transformers import pipeline
     try:
+        # Step 0: Log input
+        print(f"[DEBUG] Received pdf_path: {pdf_path}")
+        print(f"[DEBUG] Received question: {question}")
+        # Step 1: Check file exists
         if not os.path.exists(pdf_path):
+            return f"[ERROR] File does not exist at {pdf_path}"
+        # Step 2: Try opening PDF
+        print("[DEBUG] Opening PDF...")
         doc = fitz.open(pdf_path)
+        # Step 3: Extract text
+        print("[DEBUG] Extracting text...")
         text_chunks = []
         for page in doc:
             text = page.get_text()
             if text.strip():
                 text_chunks.append(text)
         doc.close()
+        print(f"[DEBUG] Extracted {len(text_chunks)} chunks of text")
         if not text_chunks:
+            return "[ERROR] No text found in the PDF."
+        # Step 4: Load model
+        print("[DEBUG] Loading embedding model...")
+        embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+        print("[DEBUG] Encoding text...")
         embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)
         question_embedding = embedding_model.encode(question, convert_to_tensor=True)
+        # Step 5: Semantic search
+        print("[DEBUG] Performing semantic search...")
         scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
+        best_match_idx = scores.argmax().item()
         best_context = text_chunks[best_match_idx]
+        print(f"[DEBUG] Found best context index: {best_match_idx}")
+        # Step 6: Answer question
+        print("[DEBUG] Loading QA model...")
+        qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
         prompt = f"Context: {best_context}\nQuestion: {question}"
+        print(f"[DEBUG] Prompting model...")
         answer = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
         return f"Answer: {answer.strip()}"
     except Exception as e:
+        return f"[EXCEPTION] {type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
 # -------------------- Other Components --------------------
 final_answer = FinalAnswerTool()