Spaces:

isana25
/

RAG_Application

Sleeping

File size: 2,758 Bytes

import gradio as gr
import os
import fitz  # PyMuPDF
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq

# ✅ Load Groq API key securely
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

stored_chunks = []
stored_index = None

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def handle_pdf(file_path):
    global stored_chunks, stored_index

    try:
        # Read text
        text = extract_text_from_pdf(file_path)

        # Simple chunking by 500 words
        words = text.split()
        chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)]

        # Embed and build FAISS index
        embeddings = model.encode(chunks)
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))

        # Store for later use
        stored_chunks = chunks
        stored_index = index

        return "✅ PDF successfully processed. Ready for questions."
    except Exception as e:
        return f"❌ Error during PDF processing: {str(e)}"

def answer_query(query):
    if not stored_chunks or stored_index is None:
        return "❌ Please upload and process a PDF first."

    try:
        query_vec = model.encode(query).reshape(1, -1)
        D, I = stored_index.search(query_vec, k=3)
        top_chunks = [stored_chunks[i] for i in I[0]]

        context = "\n\n".join(top_chunks)
        prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""

        response = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"❌ Error during answering: {str(e)}"

# 🧠 Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 📄 PDF Q&A using Groq + LLaMA3")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        process_output = gr.Textbox(label="Processing Status")
        process_button = gr.Button("📥 Process PDF")

    process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output])

    gr.Markdown("## 💬 Ask a Question from the PDF")
    question_input = gr.Textbox(label="Your Question")
    ask_button = gr.Button("🤖 Ask")
    answer_output = gr.Textbox(label="Answer", lines=5)

    ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output])

demo.launch()