Spaces:

Amaanali01
/

Simple_RAG-ChatBoot

Sleeping

File size: 2,835 Bytes

import os
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq

# 🔐 Groq API Key (embed securely for private use only)
GROQ_API_KEY = "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"

# 📦 Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# 📂 Global storage for vector index and chunks
chunks = []
index = None

# 📄 PDF Text Extraction & Processing
def process_pdf(file):
    global chunks, index
    try:
        reader = PdfReader(file.name)
        text = "\n".join(page.extract_text() or "" for page in reader.pages)

        if not text.strip():
            return "❌ No text found in the PDF. Please upload a different file."

        # 📏 Chunking
        chunk_size = 300
        chunk_overlap = 50
        words = text.split()
        chunks = [
            " ".join(words[i:i + chunk_size])
            for i in range(0, len(words), chunk_size - chunk_overlap)
        ]

        # 📊 Embeddings + FAISS
        embeddings = model.encode(chunks)
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(np.array(embeddings))

        return f"✅ Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions."
    except Exception as e:
        return f"❌ Error processing the PDF: {str(e)}"

# ❓ Ask a Question
def ask_question(query):
    if not chunks or index is None:
        return "⚠️ Please upload and process a PDF first."

    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k=3)
    context = "\n".join([chunks[i] for i in indices[0]])

    # Use Groq API for question answering
    client = Groq(api_key=GROQ_API_KEY)
    prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"

    try:
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-70b-8192"
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"❌ Error from Groq API: {str(e)}"

# 🎛️ Gradio Interface
file_input = gr.File(label="📄 Upload PDF")
question_input = gr.Textbox(label="❓ Ask a Question about the PDF")
answer_output = gr.Textbox(label="📘 Answer")

pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)

# Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs
app = gr.TabbedInterface(
    [pdf_processor, pdf_qa],
    tab_names=["Upload PDF", "Ask a Question"]
)

# Launch the app
if __name__ == "__main__":
    app.launch()