Spaces:

Tingusto
/

Final-Assignment-Agent

Sleeping

App Files Files Community

Tingusto commited on May 14, 2025

Commit

2f65b93

1 Parent(s): 6eb1e80

Build agent & graph

Browse files

Files changed (1) hide show

agent.py +125 -160

agent.py CHANGED Viewed

@@ -1,27 +1,90 @@
 import os
-from typing import Dict, List, Optional
 from dotenv import load_dotenv
 from langchain_groq import ChatGroq
-from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
-import json
 import requests
 from bs4 import BeautifulSoup
 import urllib.parse
-import pandas as pd
-import re
 load_dotenv()
-class BasicAgent:
-    def __init__(self):
-        self.llm = ChatGroq(
-            model="meta-llama/llama-4-maverick-17b-128e-instruct",
-            temperature=0.1
-        )
-        self.system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
 Key Rules:
 1. Answer Format:
@@ -55,152 +118,54 @@ Key Rules:
    - For scientific terms: Use the standard scientific notation
    - For geographical locations: Use official names without abbreviations
    - For audio/video questions: Focus on the specific detail requested"""
-        # Initialize tools
-        self.tools = [
-            self.wiki_search,
-            self.web_search,
-            self.arxiv_search
-        ]
-    def wiki_search(self, query: str) -> str:
-        """Search Wikipedia for information."""
-        try:
-            search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-            return "\n".join([doc.page_content for doc in search_docs])
-        except Exception as e:
-            return f"Error searching Wikipedia: {str(e)}"
-    def web_search(self, query: str) -> str:
-        """Search the web using DuckDuckGo."""
-        try:
-            encoded_query = urllib.parse.quote(query)
-            url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            response = requests.get(url, headers=headers)
-            response.raise_for_status()
-            soup = BeautifulSoup(response.text, 'html.parser')
-            results = []
-            for result in soup.find_all('div', class_='result__body'):
-                title = result.find('h2', class_='result__title')
-                snippet = result.find('a', class_='result__snippet')
-                if title and snippet:
-                    results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
-                if len(results) >= 3:
-                    break
-            return "\n\n".join(results) if results else "No results found"
-        except Exception as e:
-            return f"Error searching web: {str(e)}"
-    def arxiv_search(self, query: str) -> str:
-        """Search Arxiv for scientific papers."""
-        try:
-            search_docs = ArxivLoader(query=query, load_max_docs=2).load()
-            return "\n".join([doc.page_content[:1000] for doc in search_docs])
-        except Exception as e:
-            return f"Error searching Arxiv: {str(e)}"
-    def process_file(self, file_name: str, question: str) -> str:
-        """Process different types of files based on extension."""
-        try:
-            if not file_name:
-                return "No file provided"
-            file_ext = file_name.split('.')[-1].lower()
-            if file_ext == 'xlsx':
-                df = pd.read_excel(file_name)
-                return f"Excel file loaded with {len(df)} rows"
-            elif file_ext == 'mp3':
-                return "Audio file detected - requires speech processing"
-            elif file_ext == 'png':
-                return "Image file detected - requires image processing"
-            elif file_ext == 'py':
-                with open(file_name, 'r') as f:
-                    code = f.read()
-                return f"Python code loaded: {len(code)} characters"
-            else:
-                return f"Unsupported file type: {file_ext}"
-        except Exception as e:
-            return f"Error processing file: {str(e)}"
-    def __call__(self, question: str, file_name: str = None) -> str:
-        try:
-            if question.startswith('.'):
-                question = question[::-1]
-            file_info = ""
-            if file_name:
-                file_info = self.process_file(file_name, question)
-            analysis_prompt = f"""Analyze this question and determine its type and required format:
-            Question: {question}
-            File Info: {file_info}
-            Provide a JSON response with:
-            1. question_type: (number/text/list/date/name/multiple_choice/file_processing)
-            2. required_format: (specific format requirements)
-            3. key_terms: (important terms to search for)
-            4. file_processing_needed: (true/false)"""
-            analysis_messages = [
-                SystemMessage(content="You are a question analyzer. Provide a JSON response."),
-                HumanMessage(content=analysis_prompt)
-            ]
-            analysis = self.llm.invoke(analysis_messages)
-            try:
-                analysis_data = json.loads(analysis.content)
-            except:
-                analysis_data = {
-                    "question_type": "text",
-                    "required_format": "direct",
-                    "key_terms": question,
-                    "file_processing_needed": bool(file_name)
-                }
-            messages = [
-                SystemMessage(content=self.system_prompt),
-                HumanMessage(content=f"""Question Type: {analysis_data['question_type']}
-                Required Format: {analysis_data['required_format']}
-                Key Terms: {analysis_data['key_terms']}
-                File Processing: {analysis_data.get('file_processing_needed', False)}
-                Question: {question}""")
-            ]
-            response = self.llm.invoke(messages)
-            answer = response.content.strip()
-            if answer.lower().startswith("final answer:"):
-                answer = answer[len("final answer:"):].strip()
-            if analysis_data['question_type'] == 'number':
-                answer = ''.join(c for c in answer if c.isdigit() or c in '.-')
-            elif analysis_data['question_type'] == 'list':
-                answer = ','.join(item.strip() for item in answer.split(','))
-            elif analysis_data['question_type'] == 'country_code':
-                answer = answer[:3].upper()
-            elif analysis_data['question_type'] == 'chess_move':
-                answer = re.sub(r'[^a-h1-8x+=#]', '', answer)
-            return answer
-        except Exception as e:
-            print(f"Error in agent response: {e}")
-            return f"Error processing question: {str(e)}"

 import os
 from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
 from langchain_groq import ChatGroq
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
 import requests
 from bs4 import BeautifulSoup
 import urllib.parse
 load_dotenv()
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for information.
+    Args:
+        query: The search query."""
+    try:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in search_docs
+            ])
+        return {"wiki_results": formatted_search_docs}
+    except Exception as e:
+        return f"Error searching Wikipedia: {str(e)}"
+@tool
+def web_search(query: str) -> str:
+    """Search the web using DuckDuckGo.
+    Args:
+        query: The search query."""
+    try:
+        encoded_query = urllib.parse.quote(query)
+        url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        results = []
+        for result in soup.find_all('div', class_='result__body'):
+            title = result.find('h2', class_='result__title')
+            snippet = result.find('a', class_='result__snippet')
+            if title and snippet:
+                results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
+            if len(results) >= 3:
+                break
+        return {"web_results": "\n\n".join(results) if results else "No results found"}
+    except Exception as e:
+        return f"Error searching web: {str(e)}"
+@tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for scientific papers.
+    Args:
+        query: The search query."""
+    try:
+        search_docs = ArxivLoader(query=query, load_max_docs=2).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+                for doc in search_docs
+            ])
+        return {"arxiv_results": formatted_search_docs}
+    except Exception as e:
+        return f"Error searching Arxiv: {str(e)}"
+# System prompt
+system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
 Key Rules:
 1. Answer Format:
    - For scientific terms: Use the standard scientific notation
    - For geographical locations: Use official names without abbreviations
    - For audio/video questions: Focus on the specific detail requested"""
+# System message
+sys_msg = SystemMessage(content=system_prompt)
+# Tools list
+tools = [
+    wiki_search,
+    web_search,
+    arxiv_search,
+]
+def build_graph():
+    """Build the graph"""
+    # Initialize Groq LLM
+    llm = ChatGroq(
+        model="meta-llama/llama-4-maverick-17b-128e-instruct",
+        temperature=0.1
+    )
+    # Bind tools to LLM
+    llm_with_tools = llm.bind_tools(tools)
+    # Node
+    def assistant(state: MessagesState):
+        """Assistant node"""
+        return {"messages": [llm_with_tools.invoke(state["messages"])]}
+    # Build graph
+    builder = StateGraph(MessagesState)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        tools_condition,
+    )
+    builder.add_edge("tools", "assistant")
+    # Compile graph
+    return builder.compile()
+# Test
+if __name__ == "__main__":
+    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
+    # Build the graph
+    graph = build_graph()
+    # Run the graph
+    messages = [HumanMessage(content=question)]
+    messages = graph.invoke({"messages": messages})
+    for m in messages["messages"]:
+        m.pretty_print()