Spaces:

usmanayaz
/

electrical_load

Sleeping

App Files Files Community

usmanayaz commited on Jan 1, 2025

Commit

b33709f

verified ·

1 Parent(s): 1a0c0d0

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import streamlit as st
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from groq import Groq
+import requests
+# Helper function to download and load the PDF from Google Drive
+def load_pdf_from_drive(output_path="downloaded_document.pdf"):
+    drive_link = "https://drive.google.com/file/d/1SzVEuEdKi4dHeKgDrUbmoq1MShB-hyG4/view?usp=drive_link"
+    file_id = drive_link.split("/d/")[1].split("/")[0]
+    download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+    response = requests.get(download_url)
+    with open(output_path, "wb") as f:
+        f.write(response.content)
+    return output_path
+# Helper function to parse the PDF
+def load_pdf_content(pdf_path):
+    reader = PdfReader(pdf_path)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    return text
+# Define the Streamlit app
+st.title("RAG-Based Application with Groq API")
+st.write("Processing a predefined PDF document from Google Drive to create a vector database and interact with it.")
+st.write("Downloading and processing the document...")
+# Download and load content from the PDF
+pdf_path = load_pdf_from_drive()
+document_text = load_pdf_content(pdf_path)
+# Split the text into manageable chunks
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1000, chunk_overlap=200
+)
+text_chunks = text_splitter.split_text(document_text)
+st.write(f"Document split into {len(text_chunks)} chunks.")
+# Initialize embedding function
+embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# Create FAISS vector database
+faiss_index = FAISS.from_texts(text_chunks, embedding=embedding_function)
+st.write("Vector database created successfully.")
+# Save the FAISS index
+faiss_index.save_local("faiss_index")
+# Initialize Groq client for querying
+GROQ_API_KEY = "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"
+client = Groq(api_key=GROQ_API_KEY)
+# Chat interaction setup
+st.write("Ask a question related to the document:")
+user_query = st.text_input("Your question:")
+if user_query:
+    query_response = client.chat.completions.create(
+        messages=[
+            {"role": "user", "content": user_query}
+        ],
+        model="llama-3.3-70b-versatile",
+    )
+    st.write("Response:")
+    st.write(query_response.choices[0].message.content)