Dani786 commited on
Commit
8107894
·
verified ·
1 Parent(s): 7a5a5e8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz
3
+ import faiss
4
+ import numpy as np
5
+ import gradio as gr
6
+ from groq import Groq
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ # === SET YOUR GROQ API KEY HERE ===
10
+ os.environ["GROQ_API_KEY"] = "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"
11
+ client = Groq(api_key=os.environ["GROQ_API_KEY"])
12
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
+
14
+ # === PDF → Text ===
15
+ def extract_text_from_pdf(pdf_path):
16
+ text = ""
17
+ with fitz.open(pdf_path) as doc:
18
+ for page in doc:
19
+ text += page.get_text()
20
+ return text
21
+
22
+ # === Chunking ===
23
+ def chunk_text(text, chunk_size=500):
24
+ sentences = text.split(". ")
25
+ chunks, current = [], ""
26
+ for sentence in sentences:
27
+ if len(current) + len(sentence) < chunk_size:
28
+ current += sentence + ". "
29
+ else:
30
+ chunks.append(current.strip())
31
+ current = sentence + ". "
32
+ if current:
33
+ chunks.append(current.strip())
34
+ return chunks
35
+
36
+ # === Embedding + FAISS ===
37
+ class VectorStore:
38
+ def __init__(self):
39
+ self.index = faiss.IndexFlatL2(384)
40
+ self.chunks = []
41
+
42
+ def add(self, embeddings, texts):
43
+ self.index.add(np.array(embeddings))
44
+ self.chunks.extend(texts)
45
+
46
+ def search(self, query, top_k=5):
47
+ query_vec = embedding_model.encode([query])
48
+ D, I = self.index.search(np.array(query_vec), top_k)
49
+ return [self.chunks[i] for i in I[0]]
50
+
51
+ vs = VectorStore()
52
+ system_prompt = "You are a study supervisor helping students understand their documents."
53
+
54
+ # === Groq LLaMA 3 Inference ===
55
+ def ask_llama3(system_prompt, user_prompt):
56
+ try:
57
+ result = client.chat.completions.create(
58
+ model="llama3-8b-8192",
59
+ messages=[
60
+ {"role": "system", "content": system_prompt},
61
+ {"role": "user", "content": user_prompt},
62
+ ]
63
+ )
64
+ return result.choices[0].message.content
65
+ except Exception as e:
66
+ return f"❌ Groq API Error: {e}"
67
+
68
+ # === Gradio Logic ===
69
+ def upload_pdf(pdf_file):
70
+ try:
71
+ text = extract_text_from_pdf(pdf_file.name)
72
+ chunks = chunk_text(text)
73
+ embeddings = embedding_model.encode(chunks)
74
+ vs.add(embeddings, chunks)
75
+ return "✅ Document uploaded and processed!"
76
+ except Exception as e:
77
+ return f"❌ Error in PDF processing: {e}"
78
+
79
+ def ask_question(question):
80
+ if not vs.chunks:
81
+ return "⚠️ Please upload a document first."
82
+ try:
83
+ docs = vs.search(question)
84
+ context = "\n".join(docs)
85
+ user_prompt = f"Use this context to answer the question:\n\n{context}\n\nQuestion: {question}"
86
+ return ask_llama3(system_prompt, user_prompt)
87
+ except Exception as e:
88
+ return f"❌ Error during question answering: {e}"
89
+
90
+ # === Gradio UI ===
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown("## 📚 RAG PDF QA with LLaMA3 + Groq")
93
+ with gr.Row():
94
+ pdf_file = gr.File(label="Upload PDF")
95
+ upload_button = gr.Button("Process PDF")
96
+ with gr.Row():
97
+ user_question = gr.Textbox(label="Ask your question here")
98
+ submit_button = gr.Button("Ask")
99
+ answer_box = gr.Textbox(label="Answer", lines=5)
100
+
101
+ upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer_box)
102
+ submit_button.click(ask_question, inputs=user_question, outputs=answer_box)
103
+
104
+ demo.launch()