Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,28 +19,31 @@ index = None
|
|
| 19 |
# π PDF Text Extraction & Processing
|
| 20 |
def process_pdf(file):
|
| 21 |
global chunks, index
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# β Ask a Question
|
| 46 |
def ask_question(query):
|
|
@@ -51,6 +54,7 @@ def ask_question(query):
|
|
| 51 |
distances, indices = index.search(np.array(query_embedding), k=3)
|
| 52 |
context = "\n".join([chunks[i] for i in indices[0]])
|
| 53 |
|
|
|
|
| 54 |
client = Groq(api_key=GROQ_API_KEY)
|
| 55 |
prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
|
| 56 |
|
|
@@ -71,9 +75,12 @@ answer_output = gr.Textbox(label="π Answer")
|
|
| 71 |
pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
|
| 72 |
pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)
|
| 73 |
|
|
|
|
| 74 |
app = gr.TabbedInterface(
|
| 75 |
[pdf_processor, pdf_qa],
|
| 76 |
tab_names=["Upload PDF", "Ask a Question"]
|
| 77 |
)
|
| 78 |
|
| 79 |
-
app
|
|
|
|
|
|
|
|
|
| 19 |
# π PDF Text Extraction & Processing
|
| 20 |
def process_pdf(file):
|
| 21 |
global chunks, index
|
| 22 |
+
try:
|
| 23 |
+
reader = PdfReader(file.name)
|
| 24 |
+
text = "\n".join(page.extract_text() or "" for page in reader.pages)
|
| 25 |
+
|
| 26 |
+
if not text.strip():
|
| 27 |
+
return "β No text found in the PDF. Please upload a different file."
|
| 28 |
+
|
| 29 |
+
# π Chunking
|
| 30 |
+
chunk_size = 300
|
| 31 |
+
chunk_overlap = 50
|
| 32 |
+
words = text.split()
|
| 33 |
+
chunks = [
|
| 34 |
+
" ".join(words[i:i + chunk_size])
|
| 35 |
+
for i in range(0, len(words), chunk_size - chunk_overlap)
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
# π Embeddings + FAISS
|
| 39 |
+
embeddings = model.encode(chunks)
|
| 40 |
+
dimension = embeddings.shape[1]
|
| 41 |
+
index = faiss.IndexFlatL2(dimension)
|
| 42 |
+
index.add(np.array(embeddings))
|
| 43 |
+
|
| 44 |
+
return f"β
Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions."
|
| 45 |
+
except Exception as e:
|
| 46 |
+
return f"β Error processing the PDF: {str(e)}"
|
| 47 |
|
| 48 |
# β Ask a Question
|
| 49 |
def ask_question(query):
|
|
|
|
| 54 |
distances, indices = index.search(np.array(query_embedding), k=3)
|
| 55 |
context = "\n".join([chunks[i] for i in indices[0]])
|
| 56 |
|
| 57 |
+
# Use Groq API for question answering
|
| 58 |
client = Groq(api_key=GROQ_API_KEY)
|
| 59 |
prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
|
| 60 |
|
|
|
|
| 75 |
pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
|
| 76 |
pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)
|
| 77 |
|
| 78 |
+
# Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs
|
| 79 |
app = gr.TabbedInterface(
|
| 80 |
[pdf_processor, pdf_qa],
|
| 81 |
tab_names=["Upload PDF", "Ask a Question"]
|
| 82 |
)
|
| 83 |
|
| 84 |
+
# Launch the app
|
| 85 |
+
if __name__ == "__main__":
|
| 86 |
+
app.launch()
|