ahmedumeraziz commited on
Commit
f37e775
·
verified ·
1 Parent(s): a3626c5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ import tiktoken
4
+ import faiss
5
+ import numpy as np
6
+ from sentence_transformers import SentenceTransformer
7
+ import requests
8
+
9
+ # Load embedding model
10
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
11
+
12
+ # GROQ API configuration
13
+ GROQ_API_KEY = "YOUR_GROQ_API_KEY" # ⛔ Replace this with your actual GROQ API key
14
+ GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
15
+ LLAMA3_MODEL = "llama3-8b-8192"
16
+
17
+ # Extract text from PDF
18
+ def load_pdf(pdf_file):
19
+ reader = PyPDF2.PdfReader(pdf_file)
20
+ return "".join(page.extract_text() for page in reader.pages)
21
+
22
+ # Chunk text
23
+ def chunk_text(text, chunk_size=500):
24
+ words = text.split()
25
+ return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
26
+
27
+ # Generate embeddings
28
+ def get_embeddings(chunks):
29
+ return embedding_model.encode(chunks)
30
+
31
+ # Create FAISS index
32
+ def create_faiss_index(embeddings):
33
+ index = faiss.IndexFlatL2(embeddings.shape[1])
34
+ index.add(np.array(embeddings))
35
+ return index
36
+
37
+ # Search index
38
+ def search_index(index, query, chunks, top_k=3):
39
+ q_embed = embedding_model.encode([query])
40
+ _, indices = index.search(np.array(q_embed), top_k)
41
+ return [chunks[i] for i in indices[0]]
42
+
43
+ # Generate answer using GROQ
44
+ def generate_answer(prompt):
45
+ headers = {
46
+ "Authorization": f"Bearer {GROQ_API_KEY}",
47
+ "Content-Type": "application/json"
48
+ }
49
+ data = {
50
+ "model": LLAMA3_MODEL,
51
+ "messages": [
52
+ {"role": "system", "content": "You are a helpful assistant."},
53
+ {"role": "user", "content": prompt}
54
+ ]
55
+ }
56
+ response = requests.post(GROQ_URL, headers=headers, json=data)
57
+ response.raise_for_status()
58
+ return response.json()["choices"][0]["message"]["content"]
59
+
60
+ # Streamlit UI
61
+ def main():
62
+ st.set_page_config("RAG App", layout="centered")
63
+ st.title("📄 PDF QA App with LLaMA 3 & GROQ")
64
+
65
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
66
+
67
+ if uploaded_file and st.button("Process PDF"):
68
+ with st.spinner("Processing..."):
69
+ text = load_pdf(uploaded_file)
70
+ chunks = chunk_text(text)
71
+ embeddings = get_embeddings(chunks)
72
+ index = create_faiss_index(embeddings)
73
+ st.session_state.chunks = chunks
74
+ st.session_state.index = index
75
+ st.success("✅ PDF processed and indexed.")
76
+
77
+ if "index" in st.session_state:
78
+ query = st.text_input("Ask a question about the PDF:")
79
+ if st.button("Get Answer"):
80
+ with st.spinner("Thinking..."):
81
+ top_chunks = search_index(st.session_state.index, query, st.session_state.chunks)
82
+ context = "\n\n".join(top_chunks)
83
+ prompt = f"Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}"
84
+ answer = generate_answer(prompt)
85
+ st.markdown("### 🧠 Answer:")
86
+ st.write(answer)
87
+
88
+ if __name__ == "__main__":
89
+ main()