Spaces:

Lesterchia1
/

Test_TutorAI_We

Build error

App Files Files Community

Lesterchia1 commited on 1 day ago

Commit

36698b9

verified ·

1 Parent(s): 6d744a1

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -6

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import re
 import uuid
 import tempfile
 import numpy as np
 import gradio as gr
@@ -14,13 +15,12 @@ from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
 from langgraph.graph import StateGraph, END
 from langchain_groq import ChatGroq
-#from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
-#from langchain_chroma import Chroma    #cant work
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 # --- 1. INITIALIZATION & CORE TOOLS ---
 groq_api_key = os.getenv("GROQ_API_KEY")
@@ -89,10 +89,13 @@ def extract_and_store_document(file_path: str):
         splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
         chunks = splitter.split_text(text)
         documents = [Document(page_content=chunk, metadata={"source": os.path.basename(file_path)}) for chunk in chunks]
         vectorstore.add_documents(documents)
-        vectorstore.persist()
         return True
     except Exception as e:
         print(f"Error processing {file_path}: {e}")
         return False
@@ -114,11 +117,101 @@ def sensing_node(state: AgentState):
     decision = chat_model.invoke([HumanMessage(content=prompt)]).content.strip().upper()
     return {"context": context, "decision": "RAG" if "RAG" in decision else "WEB"}
 def expansion_node(state: AgentState):
     if state["decision"] == "WEB":
         user_query = state["messages"][-1].content
-        web_data = web_search_tool.run(user_query)
-        return {"context": f"WEB INFO: {web_data}\nLOCAL: {state['context']}", "source": "Web + Local Documents"}
     return {"source": "Local Documents Only"}
 def generation_node(state: AgentState):

 import os
 import re
 import uuid
+import time  # Add this
 import tempfile
 import numpy as np
 import gradio as gr
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
 from langgraph.graph import StateGraph, END
 from langchain_groq import ChatGroq
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 # --- 1. INITIALIZATION & CORE TOOLS ---
 groq_api_key = os.getenv("GROQ_API_KEY")
         splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
         chunks = splitter.split_text(text)
         documents = [Document(page_content=chunk, metadata={"source": os.path.basename(file_path)}) for chunk in chunks]
+        # Chroma auto-persists in version 0.4.x+
         vectorstore.add_documents(documents)
+        # REMOVE THIS LINE: vectorstore.persist()  # Delete line 93
         return True
     except Exception as e:
         print(f"Error processing {file_path}: {e}")
         return False
     decision = chat_model.invoke([HumanMessage(content=prompt)]).content.strip().upper()
     return {"context": context, "decision": "RAG" if "RAG" in decision else "WEB"}
+#Alternative: Better Approach - Add Fallback Search Strategy
+#Add this function for more robust searching:
+def safe_web_search_with_fallback(query: str):
+    """Web search with multiple fallback strategies"""
+    global last_web_search_time
+    strategies = [
+        # Strategy 1: Direct search
+        lambda: web_search_tool.run(query),
+        # Strategy 2: Search with simplified query
+        lambda: web_search_tool.run(query.split("?")[0] if "?" in query else query),
+        # Strategy 3: Search with keywords only
+        lambda: web_search_tool.run(' '.join(query.split()[:10]))
+    ]
+    for i, strategy in enumerate(strategies):
+        try:
+            # Rate limiting check
+            current_time = time.time()
+            if current_time - last_web_search_time < 5:  # 5 second cooldown
+                time.sleep(5 - (current_time - last_web_search_time))
+            result = strategy()
+            last_web_search_time = time.time()
+            if result and len(result) > 50:  # Valid result
+                return result[:2000]  # Truncate
+        except Exception as e:
+            if i == len(strategies) - 1:  # Last strategy failed
+                return f"Web search unavailable. Error: {str(e)[:100]}"
+            continue
+    return "Web search temporarily unavailable."
+# Add global variable for rate limiting
+last_web_search_time = 0
+WEB_SEARCH_COOLDOWN = 10  # 10 seconds between web searches
 def expansion_node(state: AgentState):
+    global last_web_search_time
     if state["decision"] == "WEB":
         user_query = state["messages"][-1].content
+        web_data = safe_web_search_with_fallback(user_query)
+                return {
+            "context": f"WEB INFO: {web_data}\nLOCAL: {state['context']}",
+            "source": "Web + Local Documents"
+        }
+    return {"source": "Local Documents Only"}
+        # Implement rate limiting
+        current_time = time.time()
+        time_since_last = current_time - last_web_search_time
+        # If we searched recently, wait or skip web search
+        if time_since_last < WEB_SEARCH_COOLDOWN:
+            # Option 1: Skip web search and use local docs only
+            # return {"context": state['context'], "source": "Local Documents Only (Rate limited)"}
+            # Option 2: Wait and then search (for demo)
+            wait_time = WEB_SEARCH_COOLDOWN - time_since_last
+            time.sleep(wait_time)
+        try:
+            web_data = web_search_tool.run(user_query)
+            last_web_search_time = time.time()  # Update timestamp
+            # Truncate web data to avoid context overflow
+            if len(web_data) > 1500:
+                web_data = web_data[:1500] + "..."
+            return {
+                "context": f"WEB SEARCH RESULTS: {web_data}\nLOCAL DOCUMENTS: {state['context']}",
+                "source": "Web Search + Local Documents"
+            }
+        except Exception as e:
+            # If web search fails, use local docs with explanation
+            error_msg = str(e)
+            if "Ratelimit" in error_msg:
+                return {
+                    "context": state['context'],
+                    "source": "Local Documents Only (Search rate limit reached)"
+                }
+            else:
+                return {
+                    "context": state['context'],
+                    "source": f"Local Documents Only (Search error: {error_msg[:100]})"
+                }
     return {"source": "Local Documents Only"}
 def generation_node(state: AgentState):