ilsa15 commited on
Commit
d8a0579
Β·
verified Β·
1 Parent(s): 464c627

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import faiss
4
+ import numpy as np
5
+ import PyPDF2
6
+ from sentence_transformers import SentenceTransformer
7
+ import requests
8
+
9
+ # πŸ” Use environment variable for Groq API key
10
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
11
+ GROQ_MODEL = "llama3-8b-8192"
12
+
13
+ # 🧠 Embedding Model
14
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+ # πŸ—ƒοΈ Vector DB
17
+ dimension = 384
18
+ index = faiss.IndexFlatL2(dimension)
19
+ text_chunks = []
20
+
21
+ def extract_text_from_pdf(pdf_file):
22
+ reader = PyPDF2.PdfReader(pdf_file)
23
+ text = ""
24
+ for page in reader.pages:
25
+ text += page.extract_text() or ""
26
+ return text
27
+
28
+ def chunk_text(text, chunk_size=500):
29
+ sentences = text.split('. ')
30
+ chunks, chunk = [], ""
31
+ for sentence in sentences:
32
+ if len(chunk) + len(sentence) < chunk_size:
33
+ chunk += sentence + ". "
34
+ else:
35
+ chunks.append(chunk.strip())
36
+ chunk = sentence + ". "
37
+ if chunk:
38
+ chunks.append(chunk.strip())
39
+ return chunks
40
+
41
+ def embed_and_store(chunks):
42
+ global text_chunks
43
+ text_chunks = chunks
44
+ embeddings = embedding_model.encode(chunks)
45
+ index.add(np.array(embeddings))
46
+
47
+ def retrieve_context(query, top_k=3):
48
+ query_vector = embedding_model.encode([query])
49
+ distances, indices = index.search(np.array(query_vector), top_k)
50
+ return "\n".join([text_chunks[i] for i in indices[0]])
51
+
52
+ def format_prompt(context, question):
53
+ system_msg = "You are a helpful research assistant who answers questions using only the uploaded document."
54
+ user_msg = f"Document Context:\n{context}\n\nQuestion: {question}\nAnswer:"
55
+ return [{"role": "system", "content": system_msg},
56
+ {"role": "user", "content": user_msg}]
57
+
58
+ def call_groq_api(messages):
59
+ url = "https://api.groq.com/openai/v1/chat/completions"
60
+ headers = {
61
+ "Authorization": f"Bearer {GROQ_API_KEY}",
62
+ "Content-Type": "application/json"
63
+ }
64
+ data = {
65
+ "model": GROQ_MODEL,
66
+ "messages": messages,
67
+ "temperature": 0.3
68
+ }
69
+ response = requests.post(url, headers=headers, json=data)
70
+ return response.json()['choices'][0]['message']['content']
71
+
72
+ def upload_file(pdf):
73
+ text = extract_text_from_pdf(pdf)
74
+ chunks = chunk_text(text)
75
+ embed_and_store(chunks)
76
+ return "βœ… Document processed. You may now ask questions."
77
+
78
+ def answer_question(question):
79
+ if not text_chunks:
80
+ return "❌ Please upload and process a document first."
81
+ context = retrieve_context(question)
82
+ messages = format_prompt(context, question)
83
+ return call_groq_api(messages)
84
+
85
+ with gr.Blocks() as rag_ui:
86
+ gr.Markdown("## πŸ“„ RAG Assistant with LLaMA3 (Groq)")
87
+
88
+ with gr.Row():
89
+ pdf_input = gr.File(label="Upload PDF")
90
+ upload_button = gr.Button("Process Document")
91
+
92
+ status_output = gr.Textbox(label="Status")
93
+ upload_button.click(upload_file, inputs=pdf_input, outputs=status_output)
94
+
95
+ gr.Markdown("### ❓ Ask a Question from the Uploaded PDF")
96
+ question_input = gr.Textbox(label="Your Question")
97
+ answer_output = gr.Textbox(label="Answer", lines=5)
98
+
99
+ ask_button = gr.Button("Get Answer")
100
+ ask_button.click(answer_question, inputs=question_input, outputs=answer_output)
101
+
102
+ rag_ui.launch()