Spaces:

RizwanSajad
/

Electrical_RAG_Based_App

Runtime error

App Files Files Community

RizwanSajad commited on Jan 2, 2025

Commit

cb681da

verified ·

1 Parent(s): 67b6dfd

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -58

app.py CHANGED Viewed

@@ -3,95 +3,82 @@ import streamlit as st
 import numpy as np
 import faiss
 from groq import Groq
 from sentence_transformers import SentenceTransformer
-from PyPDF2 import PdfReader
 # Constants
 DRIVE_FILE_LINK = "https://drive.google.com/file/d/1kYGomSibXW-wCFptEMcWP12jOz1390OK/view?usp=drive_link"
 GROQ_MODEL = "llama-3.3-70b-versatile"
-# Download the document
-def download_document(file_link):
-    from pydrive.auth import GoogleAuth
-    from pydrive.drive import GoogleDrive
-    st.info("Authenticating with Google Drive...")
     gauth = GoogleAuth()
     gauth.LocalWebserverAuth()
     drive = GoogleDrive(gauth)
-    file_id = file_link.split("/d/")[1].split("/view")[0]
-    downloaded_file = drive.CreateFile({"id": file_id})
     downloaded_file.GetContentFile("document.pdf")
     return "document.pdf"
-# Chunk the text
-def chunk_text(text, chunk_size=500, chunk_overlap=200):
-    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
-    return chunks
-# Create embeddings and store in FAISS
-def create_vector_database(chunks):
-    st.info("Creating embeddings...")
-    embedder = SentenceTransformer("all-MiniLM-L6-v2")
-    embeddings = []
-    # Process embeddings in batches
-    for i in range(0, len(chunks), 100):
-        batch = chunks[i:i+100]
-        embeddings.append(embedder.encode(batch, convert_to_tensor=True).detach().numpy())
-    embeddings = np.vstack(embeddings)
-    st.info("Initializing FAISS vector database...")
     vector_dim = embeddings.shape[1]
     index = faiss.IndexFlatL2(vector_dim)
     index.add(embeddings)
-    return index
-# Query the vector database
 def query_vector_db(query, chunks, index, embedder):
     query_embedding = embedder.encode([query], convert_to_tensor=True).detach().numpy()
-    D, I = index.search(query_embedding, k=1)  # Top 1 match
-    if I[0][0] != -1:
         return chunks[I[0][0]]
     return "No relevant content found."
-# Main Streamlit App
 def main():
     st.title("RAG-based Application with Groq")
-    # Step 1: Load Document
-    if st.button("Download and Load Document"):
-        document_path = download_document(DRIVE_FILE_LINK)
-        reader = PdfReader(document_path)
-        text = "".join([page.extract_text() for page in reader.pages])
-        chunks = chunk_text(text)
-        st.success("Document loaded and chunked!")
-        st.session_state["chunks"] = chunks
-    # Step 2: Create Vector Database
-    if st.button("Create Vector Database"):
-        if "chunks" not in st.session_state:
-            st.error("Please load the document first!")
-        else:
-            index = create_vector_database(st.session_state["chunks"])
-            st.session_state["index"] = index
-            st.success("Vector database created successfully!")
-    # Step 3: Query
     user_input = st.text_input("Enter your query:")
     if user_input:
-        if "index" not in st.session_state or "chunks" not in st.session_state:
-            st.error("Please load the document and create the vector database first!")
-        else:
-            embedder = SentenceTransformer("all-MiniLM-L6-v2")
-            context = query_vector_db(user_input, st.session_state["chunks"], st.session_state["index"], embedder)
-            st.write("**Relevant Context:**", context)
-            # Query Groq model
-            client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-            st.info("Querying Groq model...")
             chat_completion = client.chat.completions.create(
                 messages=[
                     {"role": "user", "content": f"Based on this context: {context}, {user_input}"}

 import numpy as np
 import faiss
 from groq import Groq
+from pydrive.auth import GoogleAuth
+from pydrive.drive import GoogleDrive
 from sentence_transformers import SentenceTransformer
 # Constants
 DRIVE_FILE_LINK = "https://drive.google.com/file/d/1kYGomSibXW-wCFptEMcWP12jOz1390OK/view?usp=drive_link"
 GROQ_MODEL = "llama-3.3-70b-versatile"
+# Authentication and setup for Google Drive
+@st.cache_resource
+def load_drive_content(file_link):
     gauth = GoogleAuth()
     gauth.LocalWebserverAuth()
     drive = GoogleDrive(gauth)
+    file_id = file_link.split('/d/')[1].split('/view')[0]
+    downloaded_file = drive.CreateFile({'id': file_id})
     downloaded_file.GetContentFile("document.pdf")
     return "document.pdf"
+# Chunking and embedding creation
+@st.cache_resource
+def prepare_embeddings(document_path):
+    from PyPDF2 import PdfReader
+    reader = PdfReader(document_path)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    # Create chunks of 500 characters with a sliding window of 200
+    chunk_size = 500
+    chunk_overlap = 200
+    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
+    # Embedding model
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = embedder.encode(chunks, convert_to_tensor=True).detach().numpy()
+    # Store in FAISS
     vector_dim = embeddings.shape[1]
     index = faiss.IndexFlatL2(vector_dim)
     index.add(embeddings)
+    return chunks, index
+# Groq setup
+@st.cache_resource
+def groq_client():
+    return Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Retrieve and query vector DB
 def query_vector_db(query, chunks, index, embedder):
     query_embedding = embedder.encode([query], convert_to_tensor=True).detach().numpy()
+    D, I = index.search(query_embedding, k=1)  # Find top result
+    if I[0][0] != -1:  # Valid match
         return chunks[I[0][0]]
     return "No relevant content found."
+# Streamlit application
 def main():
     st.title("RAG-based Application with Groq")
+    # Load document and prepare FAISS
+    st.info("Loading document and preparing FAISS...")
+    document_path = load_drive_content(DRIVE_FILE_LINK)
+    chunks, index = prepare_embeddings(document_path)
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    client = groq_client()
+    # Interface
     user_input = st.text_input("Enter your query:")
     if user_input:
+        context = query_vector_db(user_input, chunks, index, embedder)
+        st.write("**Relevant Context:**", context)
+        # Query Groq model
+        with st.spinner("Querying Groq model..."):
             chat_completion = client.chat.completions.create(
                 messages=[
                     {"role": "user", "content": f"Based on this context: {context}, {user_input}"}