Spaces:

Talha812
/

RAG-Based-Chatbot

Sleeping

App Files Files Community

Talha812 commited on Jan 1, 2025

Commit

a580ec9

verified ·

1 Parent(s): 55c29b3

Create app.py

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# app.py
+import os
+import json
+import faiss
+import numpy as np
+import PyPDF2
+import requests
+import streamlit as st
+from groq import Groq
+# Constants
+PDF_URL = "https://drive.google.com/uc?export=download&id=1YWX-RYxgtcKO1QETnz1N3rboZUhRZwcH"
+VECTOR_DIM = 768
+CHUNK_SIZE = 512
+# Function to download and extract text from the PDF
+def extract_text_from_pdf(url):
+    response = requests.get(url)
+    with open("document.pdf", "wb") as f:
+        f.write(response.content)
+    with open("document.pdf", "rb") as f:
+        reader = PyPDF2.PdfReader(f)
+        text = "\n".join(page.extract_text() for page in reader.pages)
+    return text
+# Function to split text into chunks
+def create_chunks(text, chunk_size):
+    words = text.split()
+    chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
+    return chunks
+# Function to create FAISS vector store
+def create_faiss_index(chunks, vector_dim):
+    index = faiss.IndexFlatL2(vector_dim)
+    embeddings = np.random.rand(len(chunks), vector_dim).astype('float32')  # Replace with real embeddings
+    index.add(embeddings)
+    return index, embeddings
+# Initialize Groq API client
+def get_groq_client():
+    return Groq(api_key="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")
+# Query Groq model
+def query_model(client, question):
+    chat_completion = client.chat.completions.create(
+        messages=[{"role": "user", "content": question}],
+        model="llama-3.3-70b-versatile",
+    )
+    return chat_completion.choices[0].message.content
+# Streamlit app
+def main():
+    st.title("RAG-Based Application")
+    # Step 1: Extract text from the document
+    st.header("Step 1: Extract Text")
+    if st.button("Extract Text from PDF"):
+        text = extract_text_from_pdf(PDF_URL)
+        st.session_state["text"] = text
+        st.success("Text extracted successfully!")
+    # Step 2: Chunk the text
+    st.header("Step 2: Create Chunks")
+    if "text" in st.session_state and st.button("Create Chunks"):
+        chunks = create_chunks(st.session_state["text"], CHUNK_SIZE)
+        st.session_state["chunks"] = chunks
+        st.success(f"Created {len(chunks)} chunks.")
+    # Step 3: Create FAISS index
+    st.header("Step 3: Create Vector Database")
+    if "chunks" in st.session_state and st.button("Create Vector Database"):
+        index, embeddings = create_faiss_index(st.session_state["chunks"], VECTOR_DIM)
+        st.session_state["index"] = index
+        st.success("FAISS vector database created.")
+    # Step 4: Ask a question
+    st.header("Step 4: Query the Model")
+    question = st.text_input("Ask a question about the document:")
+    if question and "index" in st.session_state:
+        client = get_groq_client()
+        answer = query_model(client, question)
+        st.write("Answer:", answer)
+if __name__ == "__main__":
+    main()