FurqanIshaq commited on
Commit
7cd855e
·
verified ·
1 Parent(s): 82e34df

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import faiss
4
+ import streamlit as st
5
+ from PyPDF2 import PdfReader
6
+ from sentence_transformers import SentenceTransformer
7
+ from groq import Groq
8
+ import numpy as np
9
+
10
+ # --- SETUP GROQ ---
11
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
12
+
13
+ # --- LOAD EMBEDDING MODEL ---
14
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+ # --- STREAMLIT UI ---
17
+ st.set_page_config(page_title="RAG App with Groq", layout="wide")
18
+ st.title("📄🧠 RAG-Based Question Answering App")
19
+
20
+ # --- UPLOAD PDF ---
21
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
22
+ if uploaded_file:
23
+ reader = PdfReader(uploaded_file)
24
+ text = ""
25
+ for page in reader.pages:
26
+ text += page.extract_text()
27
+
28
+ # --- CHUNKING ---
29
+ def chunk_text(text, chunk_size=500):
30
+ words = text.split()
31
+ return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
32
+
33
+ chunks = chunk_text(text)
34
+
35
+ # --- VECTORIZE CHUNKS ---
36
+ embeddings = embedder.encode(chunks)
37
+ dim = embeddings[0].shape[0]
38
+ index = faiss.IndexFlatL2(dim)
39
+ index.add(np.array(embeddings))
40
+
41
+ st.success("✅ Document uploaded and indexed!")
42
+
43
+ # --- USER QUERY ---
44
+ user_query = st.text_input("Ask something about the document")
45
+ if user_query:
46
+ query_embedding = embedder.encode([user_query])
47
+ _, I = index.search(np.array(query_embedding), k=3)
48
+ retrieved_chunks = "\n\n".join([chunks[i] for i in I[0]])
49
+
50
+ # --- GROQ COMPLETION ---
51
+ response = client.chat.completions.create(
52
+ model="llama-3.1-8b-instant",
53
+ messages=[
54
+ {"role": "system", "content": "You are a helpful assistant who answers based on the provided context."},
55
+ {"role": "user", "content": f"Context: {retrieved_chunks}\n\nQuestion: {user_query}"}
56
+ ]
57
+ )
58
+
59
+ st.subheader("📢 Answer")
60
+ st.write(response.choices[0].message.content)