asaeed23 commited on
Commit
d5ba13f
Β·
verified Β·
1 Parent(s): dedc8ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -13
app.py CHANGED
@@ -1,28 +1,39 @@
1
  import gradio as gr
2
- import tempfile
3
- from utils import extract_pdf_by_page, split_by_chapter, build_faiss_index, retrieve_text, generate_notes_questions
 
 
 
 
4
 
5
  chunks = []
6
  faiss_index = None
7
  chunk_texts = []
8
  groq_api_key = ""
9
 
10
- def upload_pdf(file, use_chapter_split):
11
- global chunks, faiss_index, chunk_texts
12
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
13
- tmp.write(file.read())
14
- tmp_path = tmp.name
 
 
 
 
 
15
 
16
- pages = extract_pdf_by_page(tmp_path)
 
 
17
  chunks = split_by_chapter(pages) if use_chapter_split else pages
18
  faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
19
  return f"βœ… Uploaded and indexed {len(chunks)} chunks."
 
20
 
21
-
22
- def extract_text_from_pdf(file_path):
23
- doc = fitz.open(file_path) # file_path is already a string
24
- pages = [page.get_text() for page in doc]
25
- return pages
26
 
27
 
28
  def set_api_key(api_key):
@@ -33,6 +44,9 @@ def set_api_key(api_key):
33
  def process_query(query):
34
  if not groq_api_key:
35
  return "❌ Please provide a valid Groq API key."
 
 
 
36
  results = retrieve_text(query, faiss_index, chunk_texts, chunks)
37
  output = generate_notes_questions(results[0]['text'], groq_api_key)
38
  return f"πŸ“˜ **{results[0].get('title', 'Page')}** (pages: {results[0].get('pages')})\n\n{output}"
 
1
  import gradio as gr
2
+ import os
3
+
4
+ from utils import (
5
+ extract_pdf_by_page, split_by_chapter,
6
+ build_faiss_index, retrieve_text, generate_notes_questions
7
+ )
8
 
9
  chunks = []
10
  faiss_index = None
11
  chunk_texts = []
12
  groq_api_key = ""
13
 
14
+ #def upload_pdf(file, use_chapter_split):
15
+ # global chunks, faiss_index, chunk_texts
16
+ # with tempfile.NamedTemporaryFile(delete=False) as tmp:
17
+ # tmp.write(file.read())
18
+ # tmp_path = tmp.name
19
+
20
+ # pages = extract_pdf_by_page(tmp_path)
21
+ # chunks = split_by_chapter(pages) if use_chapter_split else pages
22
+ # faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
23
+ # return f"βœ… Uploaded and indexed {len(chunks)} chunks."
24
 
25
+ def upload_pdf(file_path, use_chapter_split):
26
+ global chunks, faiss_index, chunk_texts
27
+ pages = extract_pdf_by_page(file_path)
28
  chunks = split_by_chapter(pages) if use_chapter_split else pages
29
  faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
30
  return f"βœ… Uploaded and indexed {len(chunks)} chunks."
31
+
32
 
33
+ #def extract_text_from_pdf(file_path):
34
+ # doc = fitz.open(file_path) # file_path is already a string
35
+ # pages = [page.get_text() for page in doc]
36
+ # return pages
 
37
 
38
 
39
  def set_api_key(api_key):
 
44
  def process_query(query):
45
  if not groq_api_key:
46
  return "❌ Please provide a valid Groq API key."
47
+ if not faiss_index:
48
+ return "❌ Please upload and process a PDF first."
49
+
50
  results = retrieve_text(query, faiss_index, chunk_texts, chunks)
51
  output = generate_notes_questions(results[0]['text'], groq_api_key)
52
  return f"πŸ“˜ **{results[0].get('title', 'Page')}** (pages: {results[0].get('pages')})\n\n{output}"