Grok_APP_PDF / app.py
kaleempk's picture
Update app.py
59a061e verified
import os
import gradio as gr
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import requests
# Set your Groq API key and model
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
GROQ_MODEL = "llama3-8b-8192"
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
def extract_text_from_pdf(file):
reader = PdfReader(file)
return "\n".join(page.extract_text() or "" for page in reader.pages)
def embed_document(text, chunk_size=500):
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
embeddings = embedding_model.encode(chunks)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
return chunks, index
def query_groq(prompt):
url = "https://api.groq.com/openai/v1/chat/completions"
headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": GROQ_MODEL,
"messages": [
{
"role": "system",
"content": (
"You are a helpful and knowledgeable AI assistant. A user has uploaded a document. "
"Your task is to analyze the content of the document and provide accurate, clear, and concise answers to any questions "
"the user asks based on that document. If the answer is not found in the document, politely state that the information is not available in the provided file."
)
},
{"role": "user", "content": prompt}
],
"temperature": 0.3
}
response = requests.post(url, headers=headers, json=payload)
try:
data = response.json()
if 'choices' in data:
return data['choices'][0]['message']['content']
elif 'error' in data:
return f"❌ API Error: {data['error']['message']}"
else:
return "❌ Unexpected API response:\n" + str(data)
except Exception as e:
return f"❌ Failed to parse response: {e}\nRaw: {response.text}"
doc_chunks = []
doc_index = None
def handle_upload(file):
global doc_chunks, doc_index
text = extract_text_from_pdf(file.name)
doc_chunks, doc_index = embed_document(text)
return "βœ… Document processed. You may now ask questions."
def answer_question(question):
if not doc_chunks or doc_index is None:
return "⚠️ Please upload a document first."
query_embedding = embedding_model.encode([question])
D, I = doc_index.search(np.array(query_embedding), k=5)
context = "\n\n".join([doc_chunks[i] for i in I[0]])
prompt = f"The user asked: '{question}'\n\nUse the following document content to answer:\n{context}"
return query_groq(prompt)
with gr.Blocks() as demo:
gr.Markdown("## πŸ“„ RAG App with Groq API (PDF-Based Q&A)")
with gr.Row():
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_btn = gr.Button("Process Document")
upload_status = gr.Textbox(label="Status", interactive=False)
question = gr.Textbox(label="Ask a question about the document")
answer = gr.Textbox(label="Answer", lines=5)
upload_btn.click(fn=handle_upload, inputs=file_input, outputs=upload_status)
question.submit(fn=answer_question, inputs=question, outputs=answer)
demo.launch()