waqasbm commited on
Commit
9ea18af
Β·
verified Β·
1 Parent(s): 55c68b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -21
app.py CHANGED
@@ -5,6 +5,7 @@ import time
5
  import tempfile
6
  import faiss
7
  import numpy as np
 
8
  from dotenv import load_dotenv
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -12,7 +13,7 @@ from langchain.vectorstores import FAISS
12
  from langchain.docstore.document import Document
13
  from keybert import KeyBERT
14
  from textblob import TextBlob
15
- from groq import Groq
16
 
17
  # Load environment
18
  load_dotenv()
@@ -25,7 +26,7 @@ st.title("πŸ’¬ Smart PDF ChatBot")
25
  st.markdown("""
26
  Upload one or more PDFs. Get summaries, insights, and interact with AI about the content using a persistent memory chat.
27
  """)
28
-
29
  uploaded_files = st.file_uploader("πŸ“ Upload PDF files", type=["pdf"], accept_multiple_files=True)
30
 
31
  # Utilities
@@ -37,7 +38,7 @@ def extract_text_from_pdf(file):
37
  return text
38
 
39
  def split_text(text):
40
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
41
  return splitter.split_text(text)
42
 
43
  def create_vector_store(chunks):
@@ -48,30 +49,47 @@ def create_vector_store(chunks):
48
  def summarize_chunks(chunks):
49
  chunk_summaries = []
50
  for i, chunk in enumerate(chunks):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
  response = client.chat.completions.create(
53
  model=GROQ_MODEL,
54
  messages=[
55
- {"role": "system", "content": "You are an AI that summarizes documents."},
56
- {"role": "user", "content": f"Summarize this chunk:\n{chunk}"}
57
  ]
58
  )
59
- chunk_summaries.append(response.choices[0].message.content)
 
 
 
 
 
60
  except Exception as e:
61
- chunk_summaries.append(f"[Error summarizing chunk {i}]: {str(e)}")
62
- return "\n".join(chunk_summaries)
63
-
64
- def ask_question(vectorstore, question):
65
- docs = vectorstore.similarity_search(question, k=3)
66
- context = "\n".join([d.page_content for d in docs])
67
- response = client.chat.completions.create(
68
- model=GROQ_MODEL,
69
- messages=[
70
- {"role": "system", "content": "You answer questions based on document context."},
71
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}"}
72
- ]
73
- )
74
- return response.choices[0].message.content
75
 
76
  def extract_keywords(text, top_n=10):
77
  kw_model = KeyBERT()
@@ -131,4 +149,4 @@ if uploaded_files:
131
  st.markdown(f"**You:** {q}")
132
  st.markdown(f"**AI:** {a}")
133
  else:
134
- st.info("πŸ“₯ Upload one or more PDF files to get started.")
 
5
  import tempfile
6
  import faiss
7
  import numpy as np
8
+ import json
9
  from dotenv import load_dotenv
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
13
  from langchain.docstore.document import Document
14
  from keybert import KeyBERT
15
  from textblob import TextBlob
16
+ from groq import Groq, RateLimitError
17
 
18
  # Load environment
19
  load_dotenv()
 
26
  st.markdown("""
27
  Upload one or more PDFs. Get summaries, insights, and interact with AI about the content using a persistent memory chat.
28
  """)
29
+
30
  uploaded_files = st.file_uploader("πŸ“ Upload PDF files", type=["pdf"], accept_multiple_files=True)
31
 
32
  # Utilities
 
38
  return text
39
 
40
  def split_text(text):
41
+ splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
42
  return splitter.split_text(text)
43
 
44
  def create_vector_store(chunks):
 
49
  def summarize_chunks(chunks):
50
  chunk_summaries = []
51
  for i, chunk in enumerate(chunks):
52
+ while True:
53
+ try:
54
+ response = client.chat.completions.create(
55
+ model=GROQ_MODEL,
56
+ messages=[
57
+ {"role": "system", "content": "You are an AI that summarizes documents."},
58
+ {"role": "user", "content": f"Summarize this chunk:\n{chunk}"}
59
+ ]
60
+ )
61
+ chunk_summaries.append(response.choices[0].message.content)
62
+ break
63
+ except RateLimitError as e:
64
+ error_data = json.loads(str(e).split(" - ", 1)[-1])
65
+ wait_time = float(error_data["error"]["message"].split("in ")[-1].split("s")[0])
66
+ st.warning(f"Rate limit hit while summarizing. Retrying in {wait_time:.2f} seconds...")
67
+ time.sleep(wait_time)
68
+ except Exception as e:
69
+ chunk_summaries.append(f"[Error summarizing chunk {i}]: {str(e)}")
70
+ break
71
+ return "\n".join(chunk_summaries)
72
+
73
+ def ask_question(vectorstore, question):
74
+ docs = vectorstore.similarity_search(question, k=3)
75
+ context = "\n".join([d.page_content for d in docs])
76
+ while True:
77
  try:
78
  response = client.chat.completions.create(
79
  model=GROQ_MODEL,
80
  messages=[
81
+ {"role": "system", "content": "You answer questions based on document context."},
82
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}"}
83
  ]
84
  )
85
+ return response.choices[0].message.content
86
+ except RateLimitError as e:
87
+ error_data = json.loads(str(e).split(" - ", 1)[-1])
88
+ wait_time = float(error_data["error"]["message"].split("in ")[-1].split("s")[0])
89
+ st.warning(f"Rate limit hit. Retrying in {wait_time:.2f} seconds...")
90
+ time.sleep(wait_time)
91
  except Exception as e:
92
+ return f"[Error answering question]: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  def extract_keywords(text, top_n=10):
95
  kw_model = KeyBERT()
 
149
  st.markdown(f"**You:** {q}")
150
  st.markdown(f"**AI:** {a}")
151
  else:
152
+ st.info("πŸ“₯ Upload one or more PDF files to get started.")