File size: 3,393 Bytes
8107894
722f7a0
8107894
 
 
 
 
 
722f7a0
8107894
722f7a0
 
8107894
 
722f7a0
8107894
 
 
 
 
 
 
722f7a0
8107894
 
 
 
 
 
 
 
 
 
 
 
 
722f7a0
8107894
 
 
 
 
 
 
 
 
 
722f7a0
 
8107894
 
 
722f7a0
8107894
722f7a0
8107894
 
 
 
 
 
 
 
 
 
 
 
 
722f7a0
8107894
 
 
 
 
 
 
 
722f7a0
8107894
722f7a0
8107894
 
722f7a0
8107894
 
 
722f7a0
 
8107894
722f7a0
8107894
 
 
722f7a0
8107894
722f7a0
8107894
 
722f7a0
 
 
8107894
722f7a0
 
8107894
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import fitz  # PyMuPDF
import faiss
import numpy as np
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer

# βœ… Load Groq API key from Hugging Face Secrets
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# βœ… Sentence embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# === PDF β†’ Text extraction ===
def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text()
    return text

# === Chunking text ===
def chunk_text(text, chunk_size=500):
    sentences = text.split(". ")
    chunks, current = [], ""
    for sentence in sentences:
        if len(current) + len(sentence) < chunk_size:
            current += sentence + ". "
        else:
            chunks.append(current.strip())
            current = sentence + ". "
    if current:
        chunks.append(current.strip())
    return chunks

# === Vector store (FAISS) ===
class VectorStore:
    def __init__(self):
        self.index = faiss.IndexFlatL2(384)
        self.chunks = []

    def add(self, embeddings, texts):
        self.index.add(np.array(embeddings))
        self.chunks.extend(texts)

    def search(self, query, top_k=5):
        vec = embedding_model.encode([query])
        _, I = self.index.search(np.array(vec), top_k)
        return [self.chunks[i] for i in I[0]]

vs = VectorStore()
system_prompt = "You are a study supervisor helping students understand their uploaded documents."

# === Ask LLaMA 3 using Groq ===
def ask_llama3(system_prompt, user_prompt):
    try:
        result = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ]
        )
        return result.choices[0].message.content
    except Exception as e:
        return f"❌ Groq API Error: {e}"

# === PDF upload handler ===
def upload_pdf(pdf_file):
    try:
        text = extract_text_from_pdf(pdf_file.name)
        chunks = chunk_text(text)
        embeddings = embedding_model.encode(chunks)
        vs.add(embeddings, chunks)
        return "βœ… Document uploaded and processed!"
    except Exception as e:
        return f"❌ PDF Processing Error: {e}"

# === QA handler ===
def ask_question(question):
    if not vs.chunks:
        return "⚠️ Please upload and process a PDF document first."
    try:
        docs = vs.search(question)
        context = "\n".join(docs)
        prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}"
        return ask_llama3(system_prompt, prompt)
    except Exception as e:
        return f"❌ Question Answering Error: {e}"

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("## πŸ“š RAG PDF QA using LLaMA3 via Groq API")
    with gr.Row():
        pdf_file = gr.File(label="Upload PDF Document")
        upload_button = gr.Button("Process PDF")
    with gr.Row():
        question = gr.Textbox(label="Ask a question from the document")
        ask_button = gr.Button("Ask")
        answer = gr.Textbox(label="Answer", lines=6)

    upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer)
    ask_button.click(ask_question, inputs=question, outputs=answer)

demo.launch()