Lesterchia1 commited on
Commit
36698b9
·
verified ·
1 Parent(s): 6d744a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -6
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import re
3
  import uuid
 
4
  import tempfile
5
  import numpy as np
6
  import gradio as gr
@@ -14,13 +15,12 @@ from langchain_community.tools import DuckDuckGoSearchRun
14
  from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
15
  from langgraph.graph import StateGraph, END
16
  from langchain_groq import ChatGroq
17
- #from langchain_huggingface.embeddings import HuggingFaceEmbeddings
18
  from langchain_community.embeddings import HuggingFaceEmbeddings
19
  from langchain_community.vectorstores import Chroma
20
- #from langchain_chroma import Chroma #cant work
21
  from langchain_text_splitters import RecursiveCharacterTextSplitter
22
  from langchain_core.documents import Document
23
 
 
24
  # --- 1. INITIALIZATION & CORE TOOLS ---
25
  groq_api_key = os.getenv("GROQ_API_KEY")
26
 
@@ -89,10 +89,13 @@ def extract_and_store_document(file_path: str):
89
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
90
  chunks = splitter.split_text(text)
91
  documents = [Document(page_content=chunk, metadata={"source": os.path.basename(file_path)}) for chunk in chunks]
 
 
92
  vectorstore.add_documents(documents)
93
- vectorstore.persist()
 
94
  return True
95
-
96
  except Exception as e:
97
  print(f"Error processing {file_path}: {e}")
98
  return False
@@ -114,11 +117,101 @@ def sensing_node(state: AgentState):
114
  decision = chat_model.invoke([HumanMessage(content=prompt)]).content.strip().upper()
115
  return {"context": context, "decision": "RAG" if "RAG" in decision else "WEB"}
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def expansion_node(state: AgentState):
 
 
118
  if state["decision"] == "WEB":
119
  user_query = state["messages"][-1].content
120
- web_data = web_search_tool.run(user_query)
121
- return {"context": f"WEB INFO: {web_data}\nLOCAL: {state['context']}", "source": "Web + Local Documents"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  return {"source": "Local Documents Only"}
123
 
124
  def generation_node(state: AgentState):
 
1
  import os
2
  import re
3
  import uuid
4
+ import time # Add this
5
  import tempfile
6
  import numpy as np
7
  import gradio as gr
 
15
  from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
16
  from langgraph.graph import StateGraph, END
17
  from langchain_groq import ChatGroq
 
18
  from langchain_community.embeddings import HuggingFaceEmbeddings
19
  from langchain_community.vectorstores import Chroma
 
20
  from langchain_text_splitters import RecursiveCharacterTextSplitter
21
  from langchain_core.documents import Document
22
 
23
+
24
  # --- 1. INITIALIZATION & CORE TOOLS ---
25
  groq_api_key = os.getenv("GROQ_API_KEY")
26
 
 
89
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
90
  chunks = splitter.split_text(text)
91
  documents = [Document(page_content=chunk, metadata={"source": os.path.basename(file_path)}) for chunk in chunks]
92
+
93
+ # Chroma auto-persists in version 0.4.x+
94
  vectorstore.add_documents(documents)
95
+ # REMOVE THIS LINE: vectorstore.persist() # Delete line 93
96
+
97
  return True
98
+
99
  except Exception as e:
100
  print(f"Error processing {file_path}: {e}")
101
  return False
 
117
  decision = chat_model.invoke([HumanMessage(content=prompt)]).content.strip().upper()
118
  return {"context": context, "decision": "RAG" if "RAG" in decision else "WEB"}
119
 
120
+
121
+ #Alternative: Better Approach - Add Fallback Search Strategy
122
+ #Add this function for more robust searching:
123
+
124
+ def safe_web_search_with_fallback(query: str):
125
+ """Web search with multiple fallback strategies"""
126
+ global last_web_search_time
127
+
128
+ strategies = [
129
+ # Strategy 1: Direct search
130
+ lambda: web_search_tool.run(query),
131
+ # Strategy 2: Search with simplified query
132
+ lambda: web_search_tool.run(query.split("?")[0] if "?" in query else query),
133
+ # Strategy 3: Search with keywords only
134
+ lambda: web_search_tool.run(' '.join(query.split()[:10]))
135
+ ]
136
+
137
+ for i, strategy in enumerate(strategies):
138
+ try:
139
+ # Rate limiting check
140
+ current_time = time.time()
141
+ if current_time - last_web_search_time < 5: # 5 second cooldown
142
+ time.sleep(5 - (current_time - last_web_search_time))
143
+
144
+ result = strategy()
145
+ last_web_search_time = time.time()
146
+
147
+ if result and len(result) > 50: # Valid result
148
+ return result[:2000] # Truncate
149
+
150
+ except Exception as e:
151
+ if i == len(strategies) - 1: # Last strategy failed
152
+ return f"Web search unavailable. Error: {str(e)[:100]}"
153
+ continue
154
+
155
+ return "Web search temporarily unavailable."
156
+
157
+ # Add global variable for rate limiting
158
+ last_web_search_time = 0
159
+ WEB_SEARCH_COOLDOWN = 10 # 10 seconds between web searches
160
+
161
  def expansion_node(state: AgentState):
162
+ global last_web_search_time
163
+
164
  if state["decision"] == "WEB":
165
  user_query = state["messages"][-1].content
166
+ web_data = safe_web_search_with_fallback(user_query)
167
+
168
+ return {
169
+ "context": f"WEB INFO: {web_data}\nLOCAL: {state['context']}",
170
+ "source": "Web + Local Documents"
171
+ }
172
+
173
+ return {"source": "Local Documents Only"}
174
+
175
+
176
+ # Implement rate limiting
177
+ current_time = time.time()
178
+ time_since_last = current_time - last_web_search_time
179
+
180
+ # If we searched recently, wait or skip web search
181
+ if time_since_last < WEB_SEARCH_COOLDOWN:
182
+ # Option 1: Skip web search and use local docs only
183
+ # return {"context": state['context'], "source": "Local Documents Only (Rate limited)"}
184
+
185
+ # Option 2: Wait and then search (for demo)
186
+ wait_time = WEB_SEARCH_COOLDOWN - time_since_last
187
+ time.sleep(wait_time)
188
+
189
+ try:
190
+ web_data = web_search_tool.run(user_query)
191
+ last_web_search_time = time.time() # Update timestamp
192
+
193
+ # Truncate web data to avoid context overflow
194
+ if len(web_data) > 1500:
195
+ web_data = web_data[:1500] + "..."
196
+
197
+ return {
198
+ "context": f"WEB SEARCH RESULTS: {web_data}\nLOCAL DOCUMENTS: {state['context']}",
199
+ "source": "Web Search + Local Documents"
200
+ }
201
+ except Exception as e:
202
+ # If web search fails, use local docs with explanation
203
+ error_msg = str(e)
204
+ if "Ratelimit" in error_msg:
205
+ return {
206
+ "context": state['context'],
207
+ "source": "Local Documents Only (Search rate limit reached)"
208
+ }
209
+ else:
210
+ return {
211
+ "context": state['context'],
212
+ "source": f"Local Documents Only (Search error: {error_msg[:100]})"
213
+ }
214
+
215
  return {"source": "Local Documents Only"}
216
 
217
  def generation_node(state: AgentState):