Spaces:

kaleempk
/

Grok_APP_PDF

Sleeping

File size: 3,444 Bytes

59a061e

import os
import gradio as gr
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import requests

# Set your Groq API key and model
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
GROQ_MODEL = "llama3-8b-8192"

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def extract_text_from_pdf(file):
    reader = PdfReader(file)
    return "\n".join(page.extract_text() or "" for page in reader.pages)

def embed_document(text, chunk_size=500):
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    embeddings = embedding_model.encode(chunks)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings))
    return chunks, index

def query_groq(prompt):
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": GROQ_MODEL,
        "messages": [
            {
                "role": "system",
                "content": (
                    "You are a helpful and knowledgeable AI assistant. A user has uploaded a document. "
                    "Your task is to analyze the content of the document and provide accurate, clear, and concise answers to any questions "
                    "the user asks based on that document. If the answer is not found in the document, politely state that the information is not available in the provided file."
                )
            },
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.3
    }

    response = requests.post(url, headers=headers, json=payload)
    try:
        data = response.json()
        if 'choices' in data:
            return data['choices'][0]['message']['content']
        elif 'error' in data:
            return f"❌ API Error: {data['error']['message']}"
        else:
            return "❌ Unexpected API response:\n" + str(data)
    except Exception as e:
        return f"❌ Failed to parse response: {e}\nRaw: {response.text}"

doc_chunks = []
doc_index = None

def handle_upload(file):
    global doc_chunks, doc_index
    text = extract_text_from_pdf(file.name)
    doc_chunks, doc_index = embed_document(text)
    return "✅ Document processed. You may now ask questions."

def answer_question(question):
    if not doc_chunks or doc_index is None:
        return "⚠️ Please upload a document first."

    query_embedding = embedding_model.encode([question])
    D, I = doc_index.search(np.array(query_embedding), k=5)
    context = "\n\n".join([doc_chunks[i] for i in I[0]])
    prompt = f"The user asked: '{question}'\n\nUse the following document content to answer:\n{context}"
    return query_groq(prompt)

with gr.Blocks() as demo:
    gr.Markdown("## 📄 RAG App with Groq API (PDF-Based Q&A)")
    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_btn = gr.Button("Process Document")
    upload_status = gr.Textbox(label="Status", interactive=False)

    question = gr.Textbox(label="Ask a question about the document")
    answer = gr.Textbox(label="Answer", lines=5)

    upload_btn.click(fn=handle_upload, inputs=file_input, outputs=upload_status)
    question.submit(fn=answer_question, inputs=question, outputs=answer)

demo.launch()