Spaces:

manohargottam
/

agent

Sleeping

App Files Files Community

manohargottam commited on Jun 7, 2025

Commit

d14dccf

verified ·

1 Parent(s): 1551d5f

Upload agent.py

Browse files

Files changed (1) hide show

agent.py +185 -145

agent.py CHANGED Viewed

@@ -1,180 +1,156 @@
-"""LangGraph Agent"""
 import os
 import json
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
-from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_groq import ChatGroq
-from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
-from langchain_community.vectorstores import SupabaseVectorStore
-from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.tools import tool
-from langchain.tools.retriever import create_retriever_tool
 from supabase.client import Client, create_client
 load_dotenv()
-def safe_get_metadata(doc, key, default=""):
-    """Safely extract metadata from document, handling string and dict formats"""
-    try:
-        if isinstance(doc.metadata, str):
-            # Try to parse as JSON if it's a string
-            metadata = json.loads(doc.metadata)
-        elif isinstance(doc.metadata, dict):
-            metadata = doc.metadata
-        else:
-            return default
-        return metadata.get(key, default)
-    except (json.JSONDecodeError, AttributeError):
-        return default
 @tool
 def multiply(a: int, b: int) -> int:
-    """Multiply two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
-    """Add two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
-    """Subtract two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a - b
 @tool
 def divide(a: int, b: int) -> int:
-    """Divide two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
-    """Get the modulus of two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
-    """Search Wikipedia for a query and return maximum 2 results.
-    Args:
-        query: The search query."""
     try:
         search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'<Document source="{safe_get_metadata(doc, "source")}" page="{safe_get_metadata(doc, "page")}"/>\n{doc.page_content}\n</Document>'
-                for doc in search_docs
-            ])
-        return {"wiki_results": formatted_search_docs}
     except Exception as e:
-        return {"wiki_results": f"Error searching Wikipedia: {str(e)}"}
 @tool
 def web_search(query: str) -> str:
-    """Search Tavily for a query and return maximum 3 results.
-    Args:
-        query: The search query."""
     try:
         search_tool = TavilySearchResults(max_results=3)
-        search_results = search_tool.invoke(query)
-        # Handle the case where search_results might be a list of dicts or Document objects
-        if isinstance(search_results, list):
-            formatted_search_docs = "\n\n---\n\n".join(
-                [
-                    f'<Document source="{result.get("url", "")}" />\n{result.get("content", "")}\n</Document>'
-                    if isinstance(result, dict) else
-                    f'<Document source="{safe_get_metadata(result, "source")}" page="{safe_get_metadata(result, "page")}"/>\n{result.page_content}\n</Document>'
-                    for result in search_results
-                ])
-        else:
-            formatted_search_docs = str(search_results)
-        return {"web_results": formatted_search_docs}
     except Exception as e:
-        return {"web_results": f"Error searching web: {str(e)}"}
 @tool
-def arvix_search(query: str) -> str:
-    """Search Arxiv for a query and return maximum 3 result.
-    Args:
-        query: The search query."""
     try:
         search_docs = ArxivLoader(query=query, load_max_docs=3).load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'<Document source="{safe_get_metadata(doc, "source")}" page="{safe_get_metadata(doc, "page")}"/>\n{doc.page_content[:1000]}\n</Document>'
-                for doc in search_docs
-            ])
-        return {"arvix_results": formatted_search_docs}
     except Exception as e:
-        return {"arvix_results": f"Error searching Arxiv: {str(e)}"}
-# load the system prompt from the file
 try:
     with open("system_prompt.txt", "r", encoding="utf-8") as f:
         system_prompt = f.read()
 except FileNotFoundError:
     system_prompt = "You are a helpful AI assistant."
-# System message
 sys_msg = SystemMessage(content=system_prompt)
-# build a retriever
-embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") #  dim=768
 supabase_url = "https://ajnakgegqblhwltzkzbz.supabase.co"
 supabase_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImFqbmFrZ2VncWJsaHdsdHpremJ6Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDkyMDgxODgsImV4cCI6MjA2NDc4NDE4OH0.b9RPF-5otedg4yiaQu_uhOgYpXVXd9D_0oR-9cluUjo"
 try:
-    supabase: Client = create_client(supabase_url, supabase_key)
-    vector_store = SupabaseVectorStore(
-        client=supabase,
-        embedding= embeddings,
-        table_name="documents",
-        query_name="match_documents_langchain",
-    )
-    create_retriever_tool = create_retriever_tool(
-        retriever=vector_store.as_retriever(),
-        name="Question Search",
-        description="A tool to retrieve similar questions from a vector store.",
-    )
 except Exception as e:
-    print(f"Warning: Could not initialize vector store: {e}")
-    vector_store = None
 tools = [
     multiply,
@@ -184,59 +160,123 @@ tools = [
     modulus,
     wiki_search,
     web_search,
-    arvix_search,
 ]
-# Build graph function
 def build_graph(provider: str = "groq"):
-    """Build the graph"""
-    # Load environment variables from .env file
     if provider == "groq":
-        # Groq https://console.groq.com/docs/models
-        llm = ChatGroq(model="qwen-qwq-32b",api_key="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", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
     else:
-        raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
-    # Bind tools to LLM
-    llm_with_tools = llm.bind_tools(tools)
-    # Node
-    def assistant(state: MessagesState):
-        """Assistant node"""
-        return {"messages": [llm_with_tools.invoke(state["messages"])]}
-    from langchain_core.messages import AIMessage
     def retriever(state: MessagesState):
-        """Retriever node with error handling"""
         try:
-            if vector_store is None:
-                return {"messages": [AIMessage(content="Vector store not available.")]}
             query = state["messages"][-1].content
-            similar_docs = vector_store.similarity_search(query, k=1)
-            if not similar_docs:
-                return {"messages": [AIMessage(content="No similar documents found.")]}
-            similar_doc = similar_docs[0]
-            content = similar_doc.page_content
-            if "Final answer :" in content:
-                answer = content.split("Final answer :")[-1].strip()
             else:
-                answer = content.strip()
-            return {"messages": [AIMessage(content=answer)]}
         except Exception as e:
-            return {"messages": [AIMessage(content=f"Error in retriever: {str(e)}")]}
     builder = StateGraph(MessagesState)
     builder.add_node("retriever", retriever)
-    # Retriever ist Start und Endpunkt
     builder.set_entry_point("retriever")
     builder.set_finish_point("retriever")
-    # Compile graph
-    return builder.compile()

+"""LangGraph Agent - Complete bypass of problematic vector store"""
 import os
 import json
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
 from langchain_groq import ChatGroq
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_core.tools import tool
 from supabase.client import Client, create_client
 load_dotenv()
 @tool
 def multiply(a: int, b: int) -> int:
+    """Multiply two numbers."""
     return a * b
 @tool
 def add(a: int, b: int) -> int:
+    """Add two numbers."""
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
+    """Subtract two numbers."""
     return a - b
 @tool
 def divide(a: int, b: int) -> int:
+    """Divide two numbers."""
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers."""
     return a % b
 @tool
 def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results."""
     try:
         search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        formatted_docs = []
+        for doc in search_docs:
+            source = "Wikipedia"
+            if hasattr(doc, 'metadata') and isinstance(doc.metadata, dict):
+                source = doc.metadata.get('source', 'Wikipedia')
+            formatted_docs.append(f"Source: {source}\n{doc.page_content[:1000]}...")
+        return "\n\n---\n\n".join(formatted_docs)
     except Exception as e:
+        return f"Error searching Wikipedia: {str(e)}"
 @tool
 def web_search(query: str) -> str:
+    """Search the web using Tavily."""
     try:
         search_tool = TavilySearchResults(max_results=3)
+        results = search_tool.invoke(query)
+        if isinstance(results, list):
+            formatted_results = []
+            for result in results:
+                if isinstance(result, dict):
+                    url = result.get('url', 'Unknown')
+                    content = result.get('content', '')[:1000]
+                    formatted_results.append(f"Source: {url}\n{content}...")
+            return "\n\n---\n\n".join(formatted_results)
+        return str(results)
     except Exception as e:
+        return f"Error searching web: {str(e)}"
 @tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for academic papers."""
     try:
         search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+        formatted_docs = []
+        for doc in search_docs:
+            source = "ArXiv"
+            if hasattr(doc, 'metadata') and isinstance(doc.metadata, dict):
+                source = doc.metadata.get('source', 'ArXiv')
+            formatted_docs.append(f"Source: {source}\n{doc.page_content[:1000]}...")
+        return "\n\n---\n\n".join(formatted_docs)
     except Exception as e:
+        return f"Error searching ArXiv: {str(e)}"
+# Raw Supabase search function that bypasses LangChain entirely
+def raw_supabase_search(query: str, supabase_client):
+    """Direct Supabase search without any LangChain components"""
+    try:
+        # Simple text-based search using Supabase's built-in functions
+        # This assumes you have a simple text search function in your database
+        result = supabase_client.table('documents').select('content').text_search('content', query).limit(1).execute()
+        if result.data:
+            return result.data[0]['content']
+        else:
+            # Fallback: get any document (for testing)
+            result = supabase_client.table('documents').select('content').limit(1).execute()
+            if result.data:
+                return result.data[0]['content']
+            return "No documents found in database"
+    except Exception as e:
+        return f"Database search error: {str(e)}"
+# Alternative: Use simple SQL query
+def simple_sql_search(query: str, supabase_client):
+    """Simple SQL-based search"""
+    try:
+        # Use a simple SQL query to avoid metadata issues
+        sql_query = f"""
+        SELECT content
+        FROM documents
+        WHERE content ILIKE '%{query}%'
+        LIMIT 1
+        """
+        result = supabase_client.rpc('execute_sql', {'query': sql_query}).execute()
+        if result.data:
+            return result.data[0]['content']
+        return "No matching documents found"
+    except Exception as e:
+        return f"SQL search error: {str(e)}"
+# Load system prompt
 try:
     with open("system_prompt.txt", "r", encoding="utf-8") as f:
         system_prompt = f.read()
 except FileNotFoundError:
     system_prompt = "You are a helpful AI assistant."
 sys_msg = SystemMessage(content=system_prompt)
+# Initialize Supabase without vector store
 supabase_url = "https://ajnakgegqblhwltzkzbz.supabase.co"
 supabase_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImFqbmFrZ2VncWJsaHdsdHpremJ6Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDkyMDgxODgsImV4cCI6MjA2NDc4NDE4OH0.b9RPF-5otedg4yiaQu_uhOgYpXVXd9D_0oR-9cluUjo"
 try:
+    supabase_client = create_client(supabase_url, supabase_key)
 except Exception as e:
+    print(f"Warning: Could not initialize Supabase client: {e}")
+    supabase_client = None
 tools = [
     multiply,
     modulus,
     wiki_search,
     web_search,
+    arxiv_search,
 ]
 def build_graph(provider: str = "groq"):
+    """Build the graph without problematic vector store operations"""
     if provider == "groq":
+        llm = ChatGroq(
+            model="qwen-qwq-32b",
+            api_key="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",
+            temperature=0
+        )
     else:
+        raise ValueError("Invalid provider. Choose 'groq'.")
     def retriever(state: MessagesState):
+        """Simple retriever that avoids all Document/metadata validation"""
         try:
             query = state["messages"][-1].content
+            if supabase_client is None:
+                return {"messages": [AIMessage(content="Database not available. Try using the web search tools instead.")]}
+            # Try different approaches in order of preference
+            content = None
+            # Approach 1: Simple table query
+            try:
+                result = supabase_client.table('documents').select('content').limit(1).execute()
+                if result.data and len(result.data) > 0:
+                    content = result.data[0].get('content', '')
+            except Exception as e:
+                print(f"Table query failed: {e}")
+            # Approach 2: Raw supabase search
+            if not content:
+                content = raw_supabase_search(query, supabase_client)
+            # Process the content
+            if content and content.strip():
+                # Look for final answer pattern
+                if "Final answer :" in content:
+                    answer = content.split("Final answer :")[-1].strip()
+                else:
+                    # Take first 500 characters as answer
+                    answer = content.strip()[:500]
+                    if len(content) > 500:
+                        answer += "..."
+                return {"messages": [AIMessage(content=answer)]}
             else:
+                return {"messages": [AIMessage(content="No relevant information found. Please try using the search tools.")]}
         except Exception as e:
+            return {"messages": [AIMessage(content=f"Search unavailable: {str(e)}. Please try using the web search tools.")]}
+    # Build simple graph
     builder = StateGraph(MessagesState)
     builder.add_node("retriever", retriever)
     builder.set_entry_point("retriever")
     builder.set_finish_point("retriever")
+    return builder.compile()
+# Alternative: Build graph without retriever at all
+def build_assistant_graph(provider: str = "groq"):
+    """Build a graph with just assistant and tools (no problematic retriever)"""
+    if provider == "groq":
+        llm = ChatGroq(
+            model="qwen-qwq-32b",
+            api_key="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",
+            temperature=0
+        )
+    else:
+        raise ValueError("Invalid provider.")
+    llm_with_tools = llm.bind_tools(tools)
+    def assistant(state: MessagesState):
+        """Assistant node that can use tools"""
+        messages = [sys_msg] + state["messages"]
+        return {"messages": [llm_with_tools.invoke(messages)]}
+    builder = StateGraph(MessagesState)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.set_entry_point("assistant")
+    builder.add_conditional_edges("assistant", tools_condition)
+    builder.add_edge("tools", "assistant")
+    return builder.compile()
+# Test function
+def test_graph():
+    """Test the graph builds successfully"""
+    try:
+        print("Testing retriever-based graph...")
+        graph1 = build_graph()
+        print("✓ Retriever graph built successfully!")
+        return graph1
+    except Exception as e:
+        print(f"✗ Retriever graph failed: {e}")
+        print("Testing assistant-only graph...")
+        try:
+            graph2 = build_assistant_graph()
+            print("✓ Assistant graph built successfully!")
+            return graph2
+        except Exception as e2:
+            print(f"✗ Assistant graph also failed: {e2}")
+            return None
+if __name__ == "__main__":
+    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
+    graph = build_graph(provider="groq")
+    messages = [HumanMessage(content=question)]
+    messages = graph.invoke({"messages": messages})
+    for m in messages["messages"]:
+        m.pretty_print()