Spaces:

zeeshan4801
/

chunks

Sleeping

App Files Files Community

zeeshan4801 commited on Dec 24, 2024

Commit

cdb5969

verified ·

1 Parent(s): a8c7d8b

Create app.py

Browse files

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import streamlit as st
+import PyPDF2
+from sentence_transformers import SentenceTransformer
+import faiss
+from groq import Groq
+# Initialize Groq client
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Load embedding model
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Initialize FAISS Index
+dimension = 384  # Dimension of embeddings
+index = faiss.IndexFlatL2(dimension)
+# Streamlit App
+st.title("RAG Application with Groq and FAISS")
+# PDF Upload
+uploaded_file = st.file_uploader("Upload a PDF Document", type=["pdf"])
+if uploaded_file:
+    # Extract text from PDF
+    pdf_reader = PyPDF2.PdfReader(uploaded_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    # Split text into chunks
+    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
+    st.write(f"Document split into {len(chunks)} chunks.")
+    # Generate embeddings and store in FAISS
+    embeddings = embedding_model.encode(chunks)
+    index.add(embeddings)
+    st.success("Embeddings created and stored in FAISS.")
+    # Query and Response
+    user_query = st.text_input("Enter your query:")
+    if user_query:
+        query_embedding = embedding_model.encode([user_query])
+        _, indices = index.search(query_embedding, k=1)
+        retrieved_chunk = chunks[indices[0][0]]
+        # Use Groq API for completion
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": retrieved_chunk}],
+            model="llama3-8b-8192",
+        )
+        response = chat_completion.choices[0].message.content
+        st.write("**Response:**")
+        st.write(response)