Spaces:

kaleempk
/

Grok_APP_PDF

Sleeping

App Files Files Community

Grok_APP_PDF / app.py

kaleempk

Update app.py

59a061e verified 9 months ago

raw

history blame contribute delete

3.44 kB

	import os
	import gradio as gr
	from pypdf import PdfReader
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import requests

	# Set your Groq API key and model
	GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
	GROQ_MODEL = "llama3-8b-8192"

	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	def extract_text_from_pdf(file):
	reader = PdfReader(file)
	return "\n".join(page.extract_text() or "" for page in reader.pages)

	def embed_document(text, chunk_size=500):
	chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
	embeddings = embedding_model.encode(chunks)
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(np.array(embeddings))
	return chunks, index

	def query_groq(prompt):
	url = "https://api.groq.com/openai/v1/chat/completions"
	headers = {
	"Authorization": f"Bearer {GROQ_API_KEY}",
	"Content-Type": "application/json"
	}
	payload = {
	"model": GROQ_MODEL,
	"messages": [
	{
	"role": "system",
	"content": (
	"You are a helpful and knowledgeable AI assistant. A user has uploaded a document. "
	"Your task is to analyze the content of the document and provide accurate, clear, and concise answers to any questions "
	"the user asks based on that document. If the answer is not found in the document, politely state that the information is not available in the provided file."
	)
	},
	{"role": "user", "content": prompt}
	],
	"temperature": 0.3
	}

	response = requests.post(url, headers=headers, json=payload)
	try:
	data = response.json()
	if 'choices' in data:
	return data['choices'][0]['message']['content']
	elif 'error' in data:
	return f"❌ API Error: {data['error']['message']}"
	else:
	return "❌ Unexpected API response:\n" + str(data)
	except Exception as e:
	return f"❌ Failed to parse response: {e}\nRaw: {response.text}"

	doc_chunks = []
	doc_index = None

	def handle_upload(file):
	global doc_chunks, doc_index
	text = extract_text_from_pdf(file.name)
	doc_chunks, doc_index = embed_document(text)
	return "✅ Document processed. You may now ask questions."

	def answer_question(question):
	if not doc_chunks or doc_index is None:
	return "⚠️ Please upload a document first."

	query_embedding = embedding_model.encode([question])
	D, I = doc_index.search(np.array(query_embedding), k=5)
	context = "\n\n".join([doc_chunks[i] for i in I[0]])
	prompt = f"The user asked: '{question}'\n\nUse the following document content to answer:\n{context}"
	return query_groq(prompt)

	with gr.Blocks() as demo:
	gr.Markdown("## 📄 RAG App with Groq API (PDF-Based Q&A)")
	with gr.Row():
	file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_btn = gr.Button("Process Document")
	upload_status = gr.Textbox(label="Status", interactive=False)

	question = gr.Textbox(label="Ask a question about the document")
	answer = gr.Textbox(label="Answer", lines=5)

	upload_btn.click(fn=handle_upload, inputs=file_input, outputs=upload_status)
	question.submit(fn=answer_question, inputs=question, outputs=answer)

	demo.launch()