Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import fitz # PyMuPDF | |
| import numpy as np | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # β Load Groq API key securely | |
| groq_api_key = os.getenv("GROQ_API_KEY") | |
| client = Groq(api_key=groq_api_key) | |
| # Load embedding model | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| stored_chunks = [] | |
| stored_index = None | |
| def extract_text_from_pdf(pdf_path): | |
| doc = fitz.open(pdf_path) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| def handle_pdf(file_path): | |
| global stored_chunks, stored_index | |
| try: | |
| # Read text | |
| text = extract_text_from_pdf(file_path) | |
| # Simple chunking by 500 words | |
| words = text.split() | |
| chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)] | |
| # Embed and build FAISS index | |
| embeddings = model.encode(chunks) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(np.array(embeddings)) | |
| # Store for later use | |
| stored_chunks = chunks | |
| stored_index = index | |
| return "β PDF successfully processed. Ready for questions." | |
| except Exception as e: | |
| return f"β Error during PDF processing: {str(e)}" | |
| def answer_query(query): | |
| if not stored_chunks or stored_index is None: | |
| return "β Please upload and process a PDF first." | |
| try: | |
| query_vec = model.encode(query).reshape(1, -1) | |
| D, I = stored_index.search(query_vec, k=3) | |
| top_chunks = [stored_chunks[i] for i in I[0]] | |
| context = "\n\n".join(top_chunks) | |
| prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:""" | |
| response = client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.2 | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"β Error during answering: {str(e)}" | |
| # π§ Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π PDF Q&A using Groq + LLaMA3") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| process_output = gr.Textbox(label="Processing Status") | |
| process_button = gr.Button("π₯ Process PDF") | |
| process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output]) | |
| gr.Markdown("## π¬ Ask a Question from the PDF") | |
| question_input = gr.Textbox(label="Your Question") | |
| ask_button = gr.Button("π€ Ask") | |
| answer_output = gr.Textbox(label="Answer", lines=5) | |
| ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output]) | |
| demo.launch() | |