AamerAkhter commited on
Commit
22bec1e
·
verified ·
1 Parent(s): 261053d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz # PyMuPDF
3
+ import streamlit as st
4
+ from groq import Groq
5
+ from sentence_transformers import SentenceTransformer
6
+ import numpy as np
7
+ import faiss
8
+
9
+ # Set GROQ API key
10
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
11
+ client = Groq(api_key=GROQ_API_KEY)
12
+
13
+ # Load embedding model
14
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+ # --- Functions ---
17
+
18
+ # Extract text from uploaded PDF
19
+ def extract_text(file):
20
+ text = ""
21
+ with fitz.open(stream=file.read(), filetype="pdf") as doc:
22
+ for page in doc:
23
+ text += page.get_text()
24
+ return text
25
+
26
+ # Split text into chunks
27
+ def chunk_text(text, chunk_size=500):
28
+ words = text.split()
29
+ return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
30
+
31
+ # Generate embeddings
32
+ def embed_chunks(chunks):
33
+ return embedding_model.encode(chunks)
34
+
35
+ # Create FAISS index
36
+ def create_faiss_index(embeddings):
37
+ dim = embeddings.shape[1]
38
+ index = faiss.IndexFlatL2(dim)
39
+ index.add(embeddings)
40
+ return index
41
+
42
+ # Search top-k relevant chunks
43
+ def search_similar_chunks(query, index, chunks, k=3):
44
+ query_embedding = embedding_model.encode([query])
45
+ D, I = index.search(np.array(query_embedding), k)
46
+ return [chunks[i] for i in I[0]]
47
+
48
+ # Query LLM via GROQ
49
+ def query_llm(context, question):
50
+ messages = [
51
+ {"role": "system", "content": "You are a helpful assistant that answers based on the provided context."},
52
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}"}
53
+ ]
54
+ response = client.chat.completions.create(
55
+ messages=messages,
56
+ model="llama3-8b-8192"
57
+ )
58
+ return response.choices[0].message.content
59
+
60
+ # --- Streamlit Interface ---
61
+ st.title("📄 RAG App (PDF → Context → Answer via GROQ)")
62
+ st.markdown("Upload a PDF document and ask questions about its content.")
63
+
64
+ uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
65
+
66
+ if uploaded_file:
67
+ text = extract_text(uploaded_file)
68
+ chunks = chunk_text(text)
69
+ embeddings = embed_chunks(chunks)
70
+ index = create_faiss_index(np.array(embeddings))
71
+
72
+ question = st.text_input("Ask a question about the document:")
73
+ if question:
74
+ top_chunks = search_similar_chunks(question, index, chunks)
75
+ context = "\n".join(top_chunks)
76
+ answer = query_llm(context, question)
77
+ st.markdown("### ✅ Answer:")
78
+ st.write(answer)