Spaces:

makhdoomnaeem
/

Questions_Answers

Sleeping

App Files Files Community

makhdoomnaeem commited on Jan 4, 2025

Commit

9a44baf

verified ·

1 Parent(s): f809e7b

Create app.py

Browse files

Files changed (1) hide show

app.py +113 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import streamlit as st
+import requests
+from PyPDF2 import PdfReader
+from langchain_community.vectorstores import FAISS
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from groq import Groq  # Import Groq client library
+# Function to initialize Groq client
+def initialize_groq_client(api_key):
+    try:
+        return Groq(api_key=api_key)
+    except Exception as e:
+        st.error(f"Failed to initialize Groq client: {e}")
+        return None
+# Function to download the PDF from Google Drive
+def download_pdf(drive_link):
+    file_id = drive_link.split("/d/")[1].split("/view")[0]
+    url = f"https://drive.google.com/uc?id={file_id}&export=download"
+    response = requests.get(url)
+    with open("document.pdf", "wb") as f:
+        f.write(response.content)
+    return "document.pdf"
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    reader = PdfReader(pdf_file)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    return text
+# Function to create FAISS vector database
+def create_vector_db(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = text_splitter.split_text(text)
+    # Use Hugging Face Embeddings
+    model_name = "all-MiniLM-L6-v2"
+    embeddings = HuggingFaceEmbeddings(model_name=model_name)
+    vector_db = FAISS.from_texts(chunks, embeddings)
+    return vector_db
+# Function to query Groq API using Groq client
+def query_groq_api(client, query, context, model="llama-3.3-70b-versatile"):
+    try:
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {"role": "system", "content": "You are an intelligent assistant."},
+                {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
+            ],
+            model=model,
+            stream=False,
+        )
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"Error querying Groq API: {e}"
+# Streamlit App
+st.title("PDF Q&A with Groq API")
+# Dynamic API Key Management
+default_api_key = os.getenv("GROQ_API_KEY", "")  # Check for API key in environment variable
+api_key = st.text_input(
+    "Enter Groq API Key (leave blank to use environment variable):",
+    value=default_api_key,
+    type="password",
+    help="Provide your Groq API key. If left blank, the app will use the key from the environment variable."
+)
+if api_key:
+    groq_client = initialize_groq_client(api_key)
+    if groq_client:
+        st.success("Groq client initialized successfully!")
+    else:
+        st.error("Failed to initialize Groq client. Please check the API key.")
+# Persistent state to store vector database
+if "vector_db" not in st.session_state:
+    st.session_state.vector_db = None
+# Upload PDF or use Google Drive link
+pdf_link = st.text_input("Enter Google Drive link to PDF:")
+upload_button = st.button("Process PDF")
+if pdf_link and upload_button:
+    if not api_key or not groq_client:
+        st.error("Please provide a valid Groq API Key before proceeding.")
+    else:
+        st.info("Downloading and processing the PDF...")
+        pdf_file = download_pdf(pdf_link)
+        pdf_text = extract_text_from_pdf(pdf_file)
+        st.success("PDF processed successfully!")
+        # Create FAISS vector database
+        st.info("Creating vector database...")
+        st.session_state.vector_db = create_vector_db(pdf_text)
+        st.success("Vector database created!")
+# Query the document
+if st.session_state.vector_db and groq_client:
+    user_query = st.text_input("Ask a question about the document:")
+    if st.button("Submit Query"):
+        with st.spinner("Processing your query..."):
+            # Retrieve similar text chunks
+            similar_docs = st.session_state.vector_db.similarity_search(user_query, k=3)
+            context = " ".join([doc.page_content for doc in similar_docs])
+            # Send query with context to Groq API
+            response = query_groq_api(groq_client, user_query, context)
+            st.write("**Answer:**", response)