makhdoomnaeem commited on
Commit
2ca9eec
·
verified ·
1 Parent(s): 0632146

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -49
app.py CHANGED
@@ -7,49 +7,25 @@ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from groq import Groq
9
 
10
-
11
- # Initialize Groq Client
12
- GROQ_API_KEY = "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" # Replace with your Groq API key
13
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
14
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
-
16
- # Hardcoded Google Drive link (replace with your valid link)
17
  GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1KCr8vXUGzuZhQZq-D9CadJEP0eLSSYN8/view?usp=sharing"
18
 
19
  # Function to download the PDF from Google Drive
20
  def download_pdf():
21
- try:
22
- if "/d/" in GOOGLE_DRIVE_LINK:
23
- file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
24
- elif "id=" in GOOGLE_DRIVE_LINK:
25
- file_id = GOOGLE_DRIVE_LINK.split("id=")[1].split("&")[0]
26
- else:
27
- raise ValueError("Invalid Google Drive link format.")
28
-
29
- file_id = "1KCr8vXUGzuZhQZq-D9CadJEP0eLSSYN8"
30
- url = f"https://drive.google.com/uc?id={file_id}&export=download"
31
-
32
- response = requests.get(url)
33
- response.raise_for_status() # Raise error for unsuccessful requests
34
-
35
- with open("document.pdf", "wb") as f:
36
- f.write(response.content)
37
- return "document.pdf"
38
- except Exception as e:
39
- st.error(f"Failed to download PDF: {e}")
40
- return None
41
 
42
  # Function to extract text from PDF
43
  def extract_text_from_pdf(pdf_file):
44
- try:
45
- reader = PdfReader(pdf_file)
46
- text = ""
47
- for page in reader.pages:
48
- text += page.extract_text()
49
- return text
50
- except Exception as e:
51
- st.error(f"Failed to extract text from PDF: {e}")
52
- return None
53
 
54
  # Function to create FAISS vector database
55
  def create_vector_db(text):
@@ -62,6 +38,30 @@ def create_vector_db(text):
62
  vector_db = FAISS.from_texts(chunks, embeddings)
63
  return vector_db
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  # Streamlit App
66
  st.title("PDF Q&A with Groq API")
67
 
@@ -73,20 +73,13 @@ if "vector_db" not in st.session_state:
73
  if st.button("Process PDF"):
74
  st.info("Downloading and processing the PDF...")
75
  pdf_file = download_pdf()
 
 
76
 
77
- if pdf_file is not None: # Check if the file was downloaded successfully
78
- pdf_text = extract_text_from_pdf(pdf_file)
79
- if pdf_text is not None: # Check if text was extracted successfully
80
- st.success("PDF processed successfully!")
81
-
82
- # Create FAISS vector database
83
- st.info("Creating vector database...")
84
- st.session_state.vector_db = create_vector_db(pdf_text)
85
- st.success("Vector database created!")
86
- else:
87
- st.error("Failed to process the PDF text.")
88
- else:
89
- st.error("PDF processing failed. Please check the Google Drive link.")
90
 
91
  # Query the document
92
  if st.session_state.vector_db:
 
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from groq import Groq
9
 
10
+ # Hardcoded Google Drive link
 
 
 
 
 
 
11
  GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1KCr8vXUGzuZhQZq-D9CadJEP0eLSSYN8/view?usp=sharing"
12
 
13
  # Function to download the PDF from Google Drive
14
  def download_pdf():
15
+ file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
16
+ url = f"https://drive.google.com/uc?id={file_id}&export=download"
17
+ response = requests.get(url)
18
+ with open("document.pdf", "wb") as f:
19
+ f.write(response.content)
20
+ return "document.pdf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Function to extract text from PDF
23
  def extract_text_from_pdf(pdf_file):
24
+ reader = PdfReader(pdf_file)
25
+ text = ""
26
+ for page in reader.pages:
27
+ text += page.extract_text()
28
+ return text
 
 
 
 
29
 
30
  # Function to create FAISS vector database
31
  def create_vector_db(text):
 
38
  vector_db = FAISS.from_texts(chunks, embeddings)
39
  return vector_db
40
 
41
+ # Function to query Groq API
42
+ def query_groq_api(query, context, model="llama-3.3-70b-versatile"):
43
+ url = "https://api.groq.com/openai/v1/chat/completions"
44
+ GROQ_API_KEY = "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"
45
+ headers = {
46
+ "Content-Type": "application/json",
47
+ "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}",
48
+ }
49
+ data = {
50
+ "model": model,
51
+ "messages": [
52
+ {"role": "system", "content": "You are an intelligent assistant."},
53
+ {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
54
+ ],
55
+ }
56
+
57
+ try:
58
+ response = requests.post(url, headers=headers, json=data)
59
+ response.raise_for_status() # Raise an error for bad responses
60
+ result = response.json()
61
+ return result.get("choices", [{}])[0].get("message", {}).get("content", "No response.")
62
+ except requests.exceptions.RequestException as e:
63
+ return f"Error: {e}"
64
+
65
  # Streamlit App
66
  st.title("PDF Q&A with Groq API")
67
 
 
73
  if st.button("Process PDF"):
74
  st.info("Downloading and processing the PDF...")
75
  pdf_file = download_pdf()
76
+ pdf_text = extract_text_from_pdf(pdf_file)
77
+ st.success("PDF processed successfully!")
78
 
79
+ # Create FAISS vector database
80
+ st.info("Creating vector database...")
81
+ st.session_state.vector_db = create_vector_db(pdf_text)
82
+ st.success("Vector database created!")
 
 
 
 
 
 
 
 
 
83
 
84
  # Query the document
85
  if st.session_state.vector_db: