Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from utils import ( | |
| extract_pdf_by_page, split_by_chapter, | |
| build_faiss_index, retrieve_text, generate_notes_questions | |
| ) | |
| chunks = [] | |
| faiss_index = None | |
| chunk_texts = [] | |
| groq_api_key = "" | |
| #def upload_pdf(file, use_chapter_split): | |
| # global chunks, faiss_index, chunk_texts | |
| # with tempfile.NamedTemporaryFile(delete=False) as tmp: | |
| # tmp.write(file.read()) | |
| # tmp_path = tmp.name | |
| # pages = extract_pdf_by_page(tmp_path) | |
| # chunks = split_by_chapter(pages) if use_chapter_split else pages | |
| # faiss_index, chunk_texts, chunks = build_faiss_index(chunks) | |
| # return f"β Uploaded and indexed {len(chunks)} chunks." | |
| def upload_pdf(file_path, use_chapter_split): | |
| global chunks, faiss_index, chunk_texts | |
| pages = extract_pdf_by_page(file_path) | |
| chunks = split_by_chapter(pages) if use_chapter_split else pages | |
| faiss_index, chunk_texts, chunks = build_faiss_index(chunks) | |
| return f"β Uploaded and indexed {len(chunks)} chunks." | |
| #def extract_text_from_pdf(file_path): | |
| # doc = fitz.open(file_path) # file_path is already a string | |
| # pages = [page.get_text() for page in doc] | |
| # return pages | |
| def set_api_key(api_key): | |
| global groq_api_key | |
| groq_api_key = api_key | |
| return "β API key set!" | |
| def process_query(query): | |
| if not groq_api_key: | |
| return "β Please provide a valid Groq API key." | |
| if not faiss_index: | |
| return "β Please upload and process a PDF first." | |
| results = retrieve_text(query, faiss_index, chunk_texts, chunks) | |
| output = generate_notes_questions(results[0]['text'], groq_api_key) | |
| return f"π **{results[0].get('title', 'Page')}** (pages: {results[0].get('pages')})\n\n{output}" | |
| with gr.Blocks(title="RAG PDF Notes & Questions Generator") as demo: | |
| gr.Markdown("# π PDF Chapter Notes & Question Generator using Groq LLM") | |
| with gr.Row(): | |
| pdf = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| use_chapter = gr.Checkbox(label="Split by Chapter", value=True) | |
| upload_btn = gr.Button("Process PDF") | |
| output_text = gr.Textbox(label="Upload Status") | |
| upload_btn.click(fn=upload_pdf, inputs=[pdf, use_chapter], outputs=output_text) | |
| api_key_input = gr.Textbox(label="π Groq API Key", type="password") | |
| api_btn = gr.Button("Set API Key") | |
| api_output = gr.Textbox(label="API Key Status") | |
| api_btn.click(fn=set_api_key, inputs=[api_key_input], outputs=api_output) | |
| query_input = gr.Textbox(label="Enter chapter title or page keyword") | |
| query_btn = gr.Button("Generate Notes & Questions") | |
| result_output = gr.Markdown() | |
| query_btn.click(fn=process_query, inputs=[query_input], outputs=[result_output]) | |
| demo.launch() | |