Spaces:
Sleeping
Sleeping
Version5
Browse files
app.py
CHANGED
|
@@ -3,11 +3,24 @@ from sentence_transformers import SentenceTransformer
|
|
| 3 |
import faiss
|
| 4 |
import re
|
| 5 |
import gradio as gr
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def preprocess_text(text):
|
| 13 |
"""
|
|
@@ -110,8 +123,45 @@ def ask_question(question, model, index, text_chunks):
|
|
| 110 |
print(result['full_text'])
|
| 111 |
print(f"Best match confidence: {result['confidence']:.2f}")
|
| 112 |
return result
|
| 113 |
-
|
| 114 |
-
model, index,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# Custom CSS for professional styling
|
| 117 |
custom_css = """
|
|
@@ -262,7 +312,7 @@ with gr.Blocks(title="Interview Q&A Assistant", css=custom_css) as demo:
|
|
| 262 |
</div>
|
| 263 |
""")
|
| 264 |
|
| 265 |
-
# Set up events
|
| 266 |
upload_button.click(upload_file, inputs=pdf_upload, outputs=status_text)
|
| 267 |
submit_button.click(answer_question, inputs=question_input, outputs=answer_output)
|
| 268 |
|
|
|
|
| 3 |
import faiss
|
| 4 |
import re
|
| 5 |
import gradio as gr
|
| 6 |
+
import PyPDF2
|
| 7 |
+
import io
|
| 8 |
|
| 9 |
+
def extract_text_from_pdf(pdf_file):
|
| 10 |
+
"""
|
| 11 |
+
Extract text from a PDF file
|
| 12 |
+
"""
|
| 13 |
+
if pdf_file is None:
|
| 14 |
+
return "Please upload a PDF file."
|
| 15 |
+
|
| 16 |
+
pdf_text = ""
|
| 17 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
|
| 18 |
+
|
| 19 |
+
for page_num in range(len(pdf_reader.pages)):
|
| 20 |
+
page = pdf_reader.pages[page_num]
|
| 21 |
+
pdf_text += page.extract_text() + "\n"
|
| 22 |
+
|
| 23 |
+
return pdf_text
|
| 24 |
|
| 25 |
def preprocess_text(text):
|
| 26 |
"""
|
|
|
|
| 123 |
print(result['full_text'])
|
| 124 |
print(f"Best match confidence: {result['confidence']:.2f}")
|
| 125 |
return result
|
| 126 |
+
|
| 127 |
+
# Global variables to store model, index, and text chunks
|
| 128 |
+
global_model = None
|
| 129 |
+
global_index = None
|
| 130 |
+
global_text_chunks = None
|
| 131 |
+
|
| 132 |
+
def upload_file(file):
|
| 133 |
+
global global_model, global_index, global_text_chunks
|
| 134 |
+
if file is not None:
|
| 135 |
+
try:
|
| 136 |
+
# Extract text from PDF
|
| 137 |
+
pdf_text = extract_text_from_pdf(file)
|
| 138 |
+
|
| 139 |
+
# Initialize QA system
|
| 140 |
+
global_model, global_index, global_text_chunks = create_qa_system(pdf_text)
|
| 141 |
+
|
| 142 |
+
return "✅ Document processed successfully! You can now ask questions."
|
| 143 |
+
except Exception as e:
|
| 144 |
+
return f"❌ Error processing document: {str(e)}"
|
| 145 |
+
else:
|
| 146 |
+
return "❌ Please upload a PDF file."
|
| 147 |
+
|
| 148 |
+
def answer_question(question):
|
| 149 |
+
global global_model, global_index, global_text_chunks
|
| 150 |
+
|
| 151 |
+
if global_model is None or global_index is None or global_text_chunks is None:
|
| 152 |
+
return "Please upload and process a document first."
|
| 153 |
+
|
| 154 |
+
if not question.strip():
|
| 155 |
+
return "Please enter a question."
|
| 156 |
+
|
| 157 |
+
result = query_qa_system(question, global_model, global_index, global_text_chunks)
|
| 158 |
+
|
| 159 |
+
if result['found_answer']:
|
| 160 |
+
response = f"Found matching section (confidence: {result['confidence']:.2f}):\n\n{result['full_text']}"
|
| 161 |
+
else:
|
| 162 |
+
response = f"{result['full_text']}\nBest match confidence: {result['confidence']:.2f}"
|
| 163 |
+
|
| 164 |
+
return response
|
| 165 |
|
| 166 |
# Custom CSS for professional styling
|
| 167 |
custom_css = """
|
|
|
|
| 312 |
</div>
|
| 313 |
""")
|
| 314 |
|
| 315 |
+
# Set up events
|
| 316 |
upload_button.click(upload_file, inputs=pdf_upload, outputs=status_text)
|
| 317 |
submit_button.click(answer_question, inputs=question_input, outputs=answer_output)
|
| 318 |
|