Spaces:

makhdoomnaeem
/

Questions_Answers

Sleeping

App Files Files Community

makhdoomnaeem commited on Jan 5, 2025

Commit

2ca9eec

verified ·

1 Parent(s): 0632146

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -49

app.py CHANGED Viewed

@@ -7,49 +7,25 @@ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from groq import Groq
-# Initialize Groq Client
-GROQ_API_KEY = "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"  # Replace with your Groq API key
-os.environ["GROQ_API_KEY"] = GROQ_API_KEY
-client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# Hardcoded Google Drive link (replace with your valid link)
 GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1KCr8vXUGzuZhQZq-D9CadJEP0eLSSYN8/view?usp=sharing"
 # Function to download the PDF from Google Drive
 def download_pdf():
-    try:
-        if "/d/" in GOOGLE_DRIVE_LINK:
-            file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
-        elif "id=" in GOOGLE_DRIVE_LINK:
-            file_id = GOOGLE_DRIVE_LINK.split("id=")[1].split("&")[0]
-        else:
-            raise ValueError("Invalid Google Drive link format.")
-        file_id = "1KCr8vXUGzuZhQZq-D9CadJEP0eLSSYN8"
-        url = f"https://drive.google.com/uc?id={file_id}&export=download"
-        response = requests.get(url)
-        response.raise_for_status()  # Raise error for unsuccessful requests
-        with open("document.pdf", "wb") as f:
-            f.write(response.content)
-        return "document.pdf"
-    except Exception as e:
-        st.error(f"Failed to download PDF: {e}")
-        return None
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
-    try:
-        reader = PdfReader(pdf_file)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text()
-        return text
-    except Exception as e:
-        st.error(f"Failed to extract text from PDF: {e}")
-        return None
 # Function to create FAISS vector database
 def create_vector_db(text):
@@ -62,6 +38,30 @@ def create_vector_db(text):
     vector_db = FAISS.from_texts(chunks, embeddings)
     return vector_db
 # Streamlit App
 st.title("PDF Q&A with Groq API")
@@ -73,20 +73,13 @@ if "vector_db" not in st.session_state:
 if st.button("Process PDF"):
     st.info("Downloading and processing the PDF...")
     pdf_file = download_pdf()
-    if pdf_file is not None:  # Check if the file was downloaded successfully
-        pdf_text = extract_text_from_pdf(pdf_file)
-        if pdf_text is not None:  # Check if text was extracted successfully
-            st.success("PDF processed successfully!")
-            # Create FAISS vector database
-            st.info("Creating vector database...")
-            st.session_state.vector_db = create_vector_db(pdf_text)
-            st.success("Vector database created!")
-        else:
-            st.error("Failed to process the PDF text.")
-    else:
-        st.error("PDF processing failed. Please check the Google Drive link.")
 # Query the document
 if st.session_state.vector_db:

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from groq import Groq
+# Hardcoded Google Drive link
 GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1KCr8vXUGzuZhQZq-D9CadJEP0eLSSYN8/view?usp=sharing"
 # Function to download the PDF from Google Drive
 def download_pdf():
+    file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
+    url = f"https://drive.google.com/uc?id={file_id}&export=download"
+    response = requests.get(url)
+    with open("document.pdf", "wb") as f:
+        f.write(response.content)
+    return "document.pdf"
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
+    reader = PdfReader(pdf_file)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    return text
 # Function to create FAISS vector database
 def create_vector_db(text):
     vector_db = FAISS.from_texts(chunks, embeddings)
     return vector_db
+# Function to query Groq API
+def query_groq_api(query, context, model="llama-3.3-70b-versatile"):
+    url = "https://api.groq.com/openai/v1/chat/completions"
+    GROQ_API_KEY = "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"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}",
+    }
+    data = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": "You are an intelligent assistant."},
+            {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
+        ],
+    }
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        response.raise_for_status()  # Raise an error for bad responses
+        result = response.json()
+        return result.get("choices", [{}])[0].get("message", {}).get("content", "No response.")
+    except requests.exceptions.RequestException as e:
+        return f"Error: {e}"
 # Streamlit App
 st.title("PDF Q&A with Groq API")
 if st.button("Process PDF"):
     st.info("Downloading and processing the PDF...")
     pdf_file = download_pdf()
+    pdf_text = extract_text_from_pdf(pdf_file)
+    st.success("PDF processed successfully!")
+    # Create FAISS vector database
+    st.info("Creating vector database...")
+    st.session_state.vector_db = create_vector_db(pdf_text)
+    st.success("Vector database created!")
 # Query the document
 if st.session_state.vector_db: