Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -80,11 +80,9 @@ qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
|
|
| 80 |
def document_qna_tool(pdf_path: str, question: str) -> str:
|
| 81 |
"""
|
| 82 |
A tool that answers natural language questions about a given PDF document.
|
| 83 |
-
|
| 84 |
Args:
|
| 85 |
pdf_path (str): Path to the local PDF file.
|
| 86 |
question (str): Question about the content of the PDF.
|
| 87 |
-
|
| 88 |
Returns:
|
| 89 |
str: Answer to the question based on the content.
|
| 90 |
"""
|
|
@@ -116,24 +114,17 @@ def document_qna_tool(pdf_path: str, question: str) -> str:
|
|
| 116 |
return "[ERROR] No readable text in the PDF."
|
| 117 |
|
| 118 |
print(f"[DEBUG] Extracted {len(text_chunks)} text chunks.")
|
| 119 |
-
print(f"[DEBUG] First text chunk preview:\n{text_chunks[0][:300]}...")
|
| 120 |
|
|
|
|
| 121 |
embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)
|
| 122 |
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
| 123 |
|
| 124 |
print("[DEBUG] Performing semantic search...")
|
| 125 |
scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
|
| 126 |
-
|
| 127 |
-
print(f"[DEBUG] Similarity scores: {scores}")
|
| 128 |
-
|
| 129 |
-
if scores.shape[0] == 0:
|
| 130 |
-
return "[ERROR] No semantic matches found in PDF text."
|
| 131 |
-
|
| 132 |
best_match_idx = scores.argmax().item()
|
| 133 |
best_context = text_chunks[best_match_idx]
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
prompt = f"Context: {best_context}\nQuestion: {question}"
|
| 138 |
print("[DEBUG] Calling QA model...")
|
| 139 |
answer = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
|
|
@@ -177,8 +168,5 @@ agent = CodeAgent(
|
|
| 177 |
description=None,
|
| 178 |
prompt_templates=prompt_templates
|
| 179 |
)
|
| 180 |
-
print("[DEBUG] Registered Tools:")
|
| 181 |
-
for t in agent.tools:
|
| 182 |
-
print(f" - {getattr(t, 'name', str(t))}")
|
| 183 |
|
| 184 |
GradioUI(agent).launch()
|
|
|
|
| 80 |
def document_qna_tool(pdf_path: str, question: str) -> str:
|
| 81 |
"""
|
| 82 |
A tool that answers natural language questions about a given PDF document.
|
|
|
|
| 83 |
Args:
|
| 84 |
pdf_path (str): Path to the local PDF file.
|
| 85 |
question (str): Question about the content of the PDF.
|
|
|
|
| 86 |
Returns:
|
| 87 |
str: Answer to the question based on the content.
|
| 88 |
"""
|
|
|
|
| 114 |
return "[ERROR] No readable text in the PDF."
|
| 115 |
|
| 116 |
print(f"[DEBUG] Extracted {len(text_chunks)} text chunks.")
|
|
|
|
| 117 |
|
| 118 |
+
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 119 |
embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)
|
| 120 |
question_embedding = embedding_model.encode(question, convert_to_tensor=True)
|
| 121 |
|
| 122 |
print("[DEBUG] Performing semantic search...")
|
| 123 |
scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
best_match_idx = scores.argmax().item()
|
| 125 |
best_context = text_chunks[best_match_idx]
|
| 126 |
|
| 127 |
+
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
|
|
|
|
| 128 |
prompt = f"Context: {best_context}\nQuestion: {question}"
|
| 129 |
print("[DEBUG] Calling QA model...")
|
| 130 |
answer = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
|
|
|
|
| 168 |
description=None,
|
| 169 |
prompt_templates=prompt_templates
|
| 170 |
)
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
GradioUI(agent).launch()
|