ai-lover commited on
Commit
4a57fd0
·
verified ·
1 Parent(s): 70d8413

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py CHANGED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import PyPDF2
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ import faiss
6
+ from groq import Groq
7
+ import numpy as np
8
+
9
+ # Load your environment variables
10
+ API = os.environ['GROQ_API_KEY'] = "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"
11
+
12
+ # Initialize Groq client
13
+ client = Groq(api_key=API)
14
+
15
+ # Initialize HuggingFace embedding model from langchain_community
16
+ embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
17
+ embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_name)
18
+
19
+ # Determine the vector size dynamically by generating a sample embedding
20
+ sample_embedding = embedding_model.embed_query("test")
21
+ dimension = len(sample_embedding)
22
+
23
+ # Initialize FAISS
24
+ index = faiss.IndexFlatL2(dimension)
25
+
26
+ # Streamlit front-end
27
+ st.title("RAG-based PDF Query Application")
28
+
29
+ # File uploader
30
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
31
+ if uploaded_file is not None:
32
+ # Extract text from PDF
33
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
34
+ text = ""
35
+ for page in pdf_reader.pages:
36
+ text += page.extract_text()
37
+
38
+ st.write("PDF Uploaded Successfully!")
39
+
40
+ # Create chunks
41
+ def create_chunks(text, chunk_size=500):
42
+ words = text.split()
43
+ chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
44
+ return chunks
45
+
46
+ chunks = create_chunks(text)
47
+ st.write(f"Created {len(chunks)} chunks.")
48
+
49
+ # Generate embeddings
50
+ embeddings = [embedding_model.embed_query(chunk) for chunk in chunks]
51
+ embeddings = np.array(embeddings, dtype=np.float32) # Convert to float32
52
+ faiss.normalize_L2(embeddings)
53
+ index.add(embeddings)
54
+ st.write("Embeddings generated and stored in FAISS.")
55
+
56
+ # Query input
57
+ user_query = st.text_input("Enter your query:")
58
+ if user_query:
59
+ # Query embedding
60
+ query_embedding = embedding_model.embed_query(user_query)
61
+ query_embedding = np.array([query_embedding], dtype=np.float32) # Convert to float32
62
+ faiss.normalize_L2(query_embedding)
63
+
64
+ # Search for similar chunks
65
+ k = 3 # Number of nearest neighbors
66
+ distances, indices = index.search(query_embedding, k)
67
+ relevant_chunks = [chunks[i] for i in indices[0]]
68
+
69
+ # Pass to Groq API
70
+ prompt = "\n\n".join(relevant_chunks) + f"\n\nUser Query: {user_query}"
71
+ chat_completion = client.chat.completions.create(
72
+ messages=[
73
+ {"role": "user", "content": prompt}
74
+ ],
75
+ model="llama3-8b-8192"
76
+ )
77
+ response = chat_completion.choices[0].message.content
78
+
79
+ # Display response
80
+ st.write("### Response")
81
+ st.write(response)