Spaces:
Sleeping
Sleeping
| import os | |
| import fitz # PyMuPDF | |
| import faiss | |
| import numpy as np | |
| import gradio as gr | |
| from groq import Groq | |
| from sentence_transformers import SentenceTransformer | |
| # β Load Groq API key from Hugging Face Secrets | |
| client = Groq(api_key=os.environ["GROQ_API_KEY"]) | |
| # β Sentence embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # === PDF β Text extraction === | |
| def extract_text_from_pdf(pdf_path): | |
| text = "" | |
| with fitz.open(pdf_path) as doc: | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # === Chunking text === | |
| def chunk_text(text, chunk_size=500): | |
| sentences = text.split(". ") | |
| chunks, current = [], "" | |
| for sentence in sentences: | |
| if len(current) + len(sentence) < chunk_size: | |
| current += sentence + ". " | |
| else: | |
| chunks.append(current.strip()) | |
| current = sentence + ". " | |
| if current: | |
| chunks.append(current.strip()) | |
| return chunks | |
| # === Vector store (FAISS) === | |
| class VectorStore: | |
| def __init__(self): | |
| self.index = faiss.IndexFlatL2(384) | |
| self.chunks = [] | |
| def add(self, embeddings, texts): | |
| self.index.add(np.array(embeddings)) | |
| self.chunks.extend(texts) | |
| def search(self, query, top_k=5): | |
| vec = embedding_model.encode([query]) | |
| _, I = self.index.search(np.array(vec), top_k) | |
| return [self.chunks[i] for i in I[0]] | |
| vs = VectorStore() | |
| system_prompt = "You are a study supervisor helping students understand their uploaded documents." | |
| # === Ask LLaMA 3 using Groq === | |
| def ask_llama3(system_prompt, user_prompt): | |
| try: | |
| result = client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| ) | |
| return result.choices[0].message.content | |
| except Exception as e: | |
| return f"β Groq API Error: {e}" | |
| # === PDF upload handler === | |
| def upload_pdf(pdf_file): | |
| try: | |
| text = extract_text_from_pdf(pdf_file.name) | |
| chunks = chunk_text(text) | |
| embeddings = embedding_model.encode(chunks) | |
| vs.add(embeddings, chunks) | |
| return "β Document uploaded and processed!" | |
| except Exception as e: | |
| return f"β PDF Processing Error: {e}" | |
| # === QA handler === | |
| def ask_question(question): | |
| if not vs.chunks: | |
| return "β οΈ Please upload and process a PDF document first." | |
| try: | |
| docs = vs.search(question) | |
| context = "\n".join(docs) | |
| prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}" | |
| return ask_llama3(system_prompt, prompt) | |
| except Exception as e: | |
| return f"β Question Answering Error: {e}" | |
| # === Gradio UI === | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π RAG PDF QA using LLaMA3 via Groq API") | |
| with gr.Row(): | |
| pdf_file = gr.File(label="Upload PDF Document") | |
| upload_button = gr.Button("Process PDF") | |
| with gr.Row(): | |
| question = gr.Textbox(label="Ask a question from the document") | |
| ask_button = gr.Button("Ask") | |
| answer = gr.Textbox(label="Answer", lines=6) | |
| upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer) | |
| ask_button.click(ask_question, inputs=question, outputs=answer) | |
| demo.launch() | |