zeeshan4801 commited on
Commit
cdb5969
·
verified ·
1 Parent(s): a8c7d8b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import PyPDF2
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ from groq import Groq
7
+
8
+ # Initialize Groq client
9
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
10
+
11
+ # Load embedding model
12
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
13
+
14
+ # Initialize FAISS Index
15
+ dimension = 384 # Dimension of embeddings
16
+ index = faiss.IndexFlatL2(dimension)
17
+
18
+ # Streamlit App
19
+ st.title("RAG Application with Groq and FAISS")
20
+
21
+ # PDF Upload
22
+ uploaded_file = st.file_uploader("Upload a PDF Document", type=["pdf"])
23
+ if uploaded_file:
24
+ # Extract text from PDF
25
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
26
+ text = ""
27
+ for page in pdf_reader.pages:
28
+ text += page.extract_text()
29
+
30
+ # Split text into chunks
31
+ chunks = [text[i:i+500] for i in range(0, len(text), 500)]
32
+ st.write(f"Document split into {len(chunks)} chunks.")
33
+
34
+ # Generate embeddings and store in FAISS
35
+ embeddings = embedding_model.encode(chunks)
36
+ index.add(embeddings)
37
+ st.success("Embeddings created and stored in FAISS.")
38
+
39
+ # Query and Response
40
+ user_query = st.text_input("Enter your query:")
41
+ if user_query:
42
+ query_embedding = embedding_model.encode([user_query])
43
+ _, indices = index.search(query_embedding, k=1)
44
+ retrieved_chunk = chunks[indices[0][0]]
45
+
46
+ # Use Groq API for completion
47
+ chat_completion = client.chat.completions.create(
48
+ messages=[{"role": "user", "content": retrieved_chunk}],
49
+ model="llama3-8b-8192",
50
+ )
51
+ response = chat_completion.choices[0].message.content
52
+ st.write("**Response:**")
53
+ st.write(response)