Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| import requests | |
| # Set your Groq API key and model | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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") | |
| GROQ_MODEL = "llama3-8b-8192" | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| def extract_text_from_pdf(file): | |
| reader = PdfReader(file) | |
| return "\n".join(page.extract_text() or "" for page in reader.pages) | |
| def embed_document(text, chunk_size=500): | |
| chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| embeddings = embedding_model.encode(chunks) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(np.array(embeddings)) | |
| return chunks, index | |
| def query_groq(prompt): | |
| url = "https://api.groq.com/openai/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": GROQ_MODEL, | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a helpful and knowledgeable AI assistant. A user has uploaded a document. " | |
| "Your task is to analyze the content of the document and provide accurate, clear, and concise answers to any questions " | |
| "the user asks based on that document. If the answer is not found in the document, politely state that the information is not available in the provided file." | |
| ) | |
| }, | |
| {"role": "user", "content": prompt} | |
| ], | |
| "temperature": 0.3 | |
| } | |
| response = requests.post(url, headers=headers, json=payload) | |
| try: | |
| data = response.json() | |
| if 'choices' in data: | |
| return data['choices'][0]['message']['content'] | |
| elif 'error' in data: | |
| return f"β API Error: {data['error']['message']}" | |
| else: | |
| return "β Unexpected API response:\n" + str(data) | |
| except Exception as e: | |
| return f"β Failed to parse response: {e}\nRaw: {response.text}" | |
| doc_chunks = [] | |
| doc_index = None | |
| def handle_upload(file): | |
| global doc_chunks, doc_index | |
| text = extract_text_from_pdf(file.name) | |
| doc_chunks, doc_index = embed_document(text) | |
| return "β Document processed. You may now ask questions." | |
| def answer_question(question): | |
| if not doc_chunks or doc_index is None: | |
| return "β οΈ Please upload a document first." | |
| query_embedding = embedding_model.encode([question]) | |
| D, I = doc_index.search(np.array(query_embedding), k=5) | |
| context = "\n\n".join([doc_chunks[i] for i in I[0]]) | |
| prompt = f"The user asked: '{question}'\n\nUse the following document content to answer:\n{context}" | |
| return query_groq(prompt) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π RAG App with Groq API (PDF-Based Q&A)") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| upload_btn = gr.Button("Process Document") | |
| upload_status = gr.Textbox(label="Status", interactive=False) | |
| question = gr.Textbox(label="Ask a question about the document") | |
| answer = gr.Textbox(label="Answer", lines=5) | |
| upload_btn.click(fn=handle_upload, inputs=file_input, outputs=upload_status) | |
| question.submit(fn=answer_question, inputs=question, outputs=answer) | |
| demo.launch() | |