Spaces:
Sleeping
Sleeping
version7
Browse files
app.py
CHANGED
|
@@ -16,21 +16,13 @@ def extract_text_from_pdf(pdf_file):
|
|
| 16 |
|
| 17 |
pdf_text = ""
|
| 18 |
try:
|
| 19 |
-
#
|
| 20 |
-
with
|
| 21 |
-
temp_pdf.write(pdf_file)
|
| 22 |
-
temp_path = temp_pdf.name
|
| 23 |
-
|
| 24 |
-
# Open the temporary file with PyPDF2
|
| 25 |
-
with open(temp_path, 'rb') as f:
|
| 26 |
pdf_reader = PyPDF2.PdfReader(f)
|
| 27 |
for page_num in range(len(pdf_reader.pages)):
|
| 28 |
page = pdf_reader.pages[page_num]
|
| 29 |
pdf_text += page.extract_text() + "\n"
|
| 30 |
|
| 31 |
-
# Clean up the temporary file
|
| 32 |
-
os.unlink(temp_path)
|
| 33 |
-
|
| 34 |
except Exception as e:
|
| 35 |
return f"Error processing PDF: {str(e)}"
|
| 36 |
|
|
@@ -122,22 +114,6 @@ def query_qa_system(question, model, index, text_chunks, similarity_threshold=0.
|
|
| 122 |
'found_answer': False
|
| 123 |
}
|
| 124 |
|
| 125 |
-
def ask_question(question, model, index, text_chunks):
|
| 126 |
-
"""
|
| 127 |
-
User-friendly interface for asking questions
|
| 128 |
-
"""
|
| 129 |
-
result = query_qa_system(question, model, index, text_chunks)
|
| 130 |
-
print("\nQ:", question)
|
| 131 |
-
print("-" * 50)
|
| 132 |
-
if result['found_answer']:
|
| 133 |
-
print(f"Found matching section (confidence: {result['confidence']:.2f}):")
|
| 134 |
-
print(f"\n{result['full_text']}\n")
|
| 135 |
-
return result
|
| 136 |
-
else:
|
| 137 |
-
print(result['full_text'])
|
| 138 |
-
print(f"Best match confidence: {result['confidence']:.2f}")
|
| 139 |
-
return result
|
| 140 |
-
|
| 141 |
# Global variables to store model, index, and text chunks
|
| 142 |
global_model = None
|
| 143 |
global_index = None
|
|
|
|
| 16 |
|
| 17 |
pdf_text = ""
|
| 18 |
try:
|
| 19 |
+
# In Hugging Face Spaces, pdf_file is already a file path
|
| 20 |
+
with open(pdf_file.name, 'rb') as f:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
pdf_reader = PyPDF2.PdfReader(f)
|
| 22 |
for page_num in range(len(pdf_reader.pages)):
|
| 23 |
page = pdf_reader.pages[page_num]
|
| 24 |
pdf_text += page.extract_text() + "\n"
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
except Exception as e:
|
| 27 |
return f"Error processing PDF: {str(e)}"
|
| 28 |
|
|
|
|
| 114 |
'found_answer': False
|
| 115 |
}
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# Global variables to store model, index, and text chunks
|
| 118 |
global_model = None
|
| 119 |
global_index = None
|