File size: 2,740 Bytes
e812a8d
d5ba13f
 
 
 
 
 
e812a8d
 
 
 
 
 
d5ba13f
 
 
 
 
 
 
 
 
 
e812a8d
d5ba13f
 
 
e812a8d
 
 
d5ba13f
e812a8d
d5ba13f
 
 
 
dedc8ff
 
e812a8d
 
 
 
 
 
 
 
d5ba13f
 
 
e812a8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import os

from utils import (
    extract_pdf_by_page, split_by_chapter,
    build_faiss_index, retrieve_text, generate_notes_questions
)

chunks = []
faiss_index = None
chunk_texts = []
groq_api_key = ""

#def upload_pdf(file, use_chapter_split):
#    global chunks, faiss_index, chunk_texts
#    with tempfile.NamedTemporaryFile(delete=False) as tmp:
#        tmp.write(file.read())
#        tmp_path = tmp.name

#    pages = extract_pdf_by_page(tmp_path)
#    chunks = split_by_chapter(pages) if use_chapter_split else pages
#    faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
#    return f"βœ… Uploaded and indexed {len(chunks)} chunks."

def upload_pdf(file_path, use_chapter_split):
    global chunks, faiss_index, chunk_texts
    pages = extract_pdf_by_page(file_path)
    chunks = split_by_chapter(pages) if use_chapter_split else pages
    faiss_index, chunk_texts, chunks = build_faiss_index(chunks)
    return f"βœ… Uploaded and indexed {len(chunks)} chunks."
    

#def extract_text_from_pdf(file_path):
#    doc = fitz.open(file_path)  # file_path is already a string
#    pages = [page.get_text() for page in doc]
#    return pages

    
def set_api_key(api_key):
    global groq_api_key
    groq_api_key = api_key
    return "βœ… API key set!"

def process_query(query):
    if not groq_api_key:
        return "❌ Please provide a valid Groq API key."
    if not faiss_index:
        return "❌ Please upload and process a PDF first."
        
    results = retrieve_text(query, faiss_index, chunk_texts, chunks)
    output = generate_notes_questions(results[0]['text'], groq_api_key)
    return f"πŸ“˜ **{results[0].get('title', 'Page')}** (pages: {results[0].get('pages')})\n\n{output}"

with gr.Blocks(title="RAG PDF Notes & Questions Generator") as demo:
    gr.Markdown("# πŸ“š PDF Chapter Notes & Question Generator using Groq LLM")

    with gr.Row():
        pdf = gr.File(label="Upload PDF", file_types=[".pdf"])
        use_chapter = gr.Checkbox(label="Split by Chapter", value=True)
        upload_btn = gr.Button("Process PDF")

    output_text = gr.Textbox(label="Upload Status")
    upload_btn.click(fn=upload_pdf, inputs=[pdf, use_chapter], outputs=output_text)

    api_key_input = gr.Textbox(label="πŸ” Groq API Key", type="password")
    api_btn = gr.Button("Set API Key")
    api_output = gr.Textbox(label="API Key Status")
    api_btn.click(fn=set_api_key, inputs=[api_key_input], outputs=api_output)

    query_input = gr.Textbox(label="Enter chapter title or page keyword")
    query_btn = gr.Button("Generate Notes & Questions")
    result_output = gr.Markdown()
    query_btn.click(fn=process_query, inputs=[query_input], outputs=[result_output])

demo.launch()