Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,28 +1,39 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
chunks = []
|
| 6 |
faiss_index = None
|
| 7 |
chunk_texts = []
|
| 8 |
groq_api_key = ""
|
| 9 |
|
| 10 |
-
def upload_pdf(file, use_chapter_split):
|
| 11 |
-
global chunks, faiss_index, chunk_texts
|
| 12 |
-
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
| 13 |
-
tmp.write(file.read())
|
| 14 |
-
tmp_path = tmp.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
chunks = split_by_chapter(pages) if use_chapter_split else pages
|
| 18 |
faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
|
| 19 |
return f"β
Uploaded and indexed {len(chunks)} chunks."
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
pages
|
| 25 |
-
return pages
|
| 26 |
|
| 27 |
|
| 28 |
def set_api_key(api_key):
|
|
@@ -33,6 +44,9 @@ def set_api_key(api_key):
|
|
| 33 |
def process_query(query):
|
| 34 |
if not groq_api_key:
|
| 35 |
return "β Please provide a valid Groq API key."
|
|
|
|
|
|
|
|
|
|
| 36 |
results = retrieve_text(query, faiss_index, chunk_texts, chunks)
|
| 37 |
output = generate_notes_questions(results[0]['text'], groq_api_key)
|
| 38 |
return f"π **{results[0].get('title', 'Page')}** (pages: {results[0].get('pages')})\n\n{output}"
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
from utils import (
|
| 5 |
+
extract_pdf_by_page, split_by_chapter,
|
| 6 |
+
build_faiss_index, retrieve_text, generate_notes_questions
|
| 7 |
+
)
|
| 8 |
|
| 9 |
chunks = []
|
| 10 |
faiss_index = None
|
| 11 |
chunk_texts = []
|
| 12 |
groq_api_key = ""
|
| 13 |
|
| 14 |
+
#def upload_pdf(file, use_chapter_split):
|
| 15 |
+
# global chunks, faiss_index, chunk_texts
|
| 16 |
+
# with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
| 17 |
+
# tmp.write(file.read())
|
| 18 |
+
# tmp_path = tmp.name
|
| 19 |
+
|
| 20 |
+
# pages = extract_pdf_by_page(tmp_path)
|
| 21 |
+
# chunks = split_by_chapter(pages) if use_chapter_split else pages
|
| 22 |
+
# faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
|
| 23 |
+
# return f"β
Uploaded and indexed {len(chunks)} chunks."
|
| 24 |
|
| 25 |
+
def upload_pdf(file_path, use_chapter_split):
|
| 26 |
+
global chunks, faiss_index, chunk_texts
|
| 27 |
+
pages = extract_pdf_by_page(file_path)
|
| 28 |
chunks = split_by_chapter(pages) if use_chapter_split else pages
|
| 29 |
faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
|
| 30 |
return f"β
Uploaded and indexed {len(chunks)} chunks."
|
| 31 |
+
|
| 32 |
|
| 33 |
+
#def extract_text_from_pdf(file_path):
|
| 34 |
+
# doc = fitz.open(file_path) # file_path is already a string
|
| 35 |
+
# pages = [page.get_text() for page in doc]
|
| 36 |
+
# return pages
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
def set_api_key(api_key):
|
|
|
|
| 44 |
def process_query(query):
|
| 45 |
if not groq_api_key:
|
| 46 |
return "β Please provide a valid Groq API key."
|
| 47 |
+
if not faiss_index:
|
| 48 |
+
return "β Please upload and process a PDF first."
|
| 49 |
+
|
| 50 |
results = retrieve_text(query, faiss_index, chunk_texts, chunks)
|
| 51 |
output = generate_notes_questions(results[0]['text'], groq_api_key)
|
| 52 |
return f"π **{results[0].get('title', 'Page')}** (pages: {results[0].get('pages')})\n\n{output}"
|