Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- agent.py +164 -0
- system_prompt.txt +5 -0
agent.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from typing import List, Dict, Any, Optional
|
| 4 |
+
import tempfile
|
| 5 |
+
import requests
|
| 6 |
+
from urllib.parse import urlparse
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
from image_processing import *
|
| 11 |
+
|
| 12 |
+
"""Langraph"""
|
| 13 |
+
from langgraph.graph import START, StateGraph, MessagesState
|
| 14 |
+
from langchain_community.tools import DuckDuckGoSearchResults
|
| 15 |
+
from langchain_community.document_loaders import WikipediaLoader
|
| 16 |
+
from langchain_community.document_loaders import ArxivLoader
|
| 17 |
+
from langgraph.prebuilt import ToolNode, tools_condition
|
| 18 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 19 |
+
from langchain_groq import ChatGroq
|
| 20 |
+
from langchain_huggingface import (
|
| 21 |
+
ChatHuggingFace,
|
| 22 |
+
HuggingFaceEndpoint,
|
| 23 |
+
HuggingFaceEmbeddings,
|
| 24 |
+
)
|
| 25 |
+
from langchain_community.vectorstores import SupabaseVectorStore
|
| 26 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 27 |
+
from langchain_core.tools import tool
|
| 28 |
+
from langchain.tools.retriever import create_retriever_tool
|
| 29 |
+
from supabase.client import Client, create_client
|
| 30 |
+
from langchain_openai import ChatOpenAI
|
| 31 |
+
|
| 32 |
+
load_dotenv()
|
| 33 |
+
|
| 34 |
+
### =============== SEARCH TOOLS =============== ###
|
| 35 |
+
|
| 36 |
+
@tool
|
| 37 |
+
def web_search(query: str) -> str:
|
| 38 |
+
"""Search DuckDuckGo for a query and return maximum 3 results.
|
| 39 |
+
Args:
|
| 40 |
+
query: The search query."""
|
| 41 |
+
search_results = DuckDuckGoSearchResults(max_results=3).invoke(query=query)
|
| 42 |
+
# DuckDuckGo returns results as a string, so we need to parse it
|
| 43 |
+
# The format is typically [snippet: text, title: title, link: url]
|
| 44 |
+
formatted_results = []
|
| 45 |
+
|
| 46 |
+
# Remove outer quotes if present and format properly
|
| 47 |
+
if isinstance(search_results, str):
|
| 48 |
+
# Simple parsing of the DuckDuckGo result format
|
| 49 |
+
result_entries = search_results.split('], [')
|
| 50 |
+
for entry in result_entries:
|
| 51 |
+
# Clean up the entry
|
| 52 |
+
entry = entry.replace('[', '').replace(']', '').strip()
|
| 53 |
+
if entry:
|
| 54 |
+
# Extract components
|
| 55 |
+
parts = {}
|
| 56 |
+
for part in entry.split(', '):
|
| 57 |
+
if ': ' in part:
|
| 58 |
+
key, value = part.split(': ', 1)
|
| 59 |
+
parts[key] = value
|
| 60 |
+
|
| 61 |
+
# Format as document
|
| 62 |
+
if 'snippet' in parts and 'link' in parts:
|
| 63 |
+
doc_content = parts.get('snippet', '')
|
| 64 |
+
doc_source = parts.get('link', '')
|
| 65 |
+
formatted_results.append(
|
| 66 |
+
f'<Document source="{doc_source}" page=""/>\n{doc_content}\n</Document>'
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
if not formatted_results:
|
| 70 |
+
return {"web_results": "No results found"}
|
| 71 |
+
|
| 72 |
+
return {"web_results": "\n\n---\n\n".join(formatted_results)}
|
| 73 |
+
|
| 74 |
+
### =============== DOCUMENT PROCESSING TOOLS =============== ###
|
| 75 |
+
# File handling still requires external tools
|
| 76 |
+
|
| 77 |
+
@tool
|
| 78 |
+
def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
|
| 79 |
+
"""
|
| 80 |
+
Save content to a file and return the path.
|
| 81 |
+
Args:
|
| 82 |
+
content (str): the content to save to the file
|
| 83 |
+
filename (str, optional): the name of the file. If not provided, a random name file will be created.
|
| 84 |
+
"""
|
| 85 |
+
temp_dir = tempfile.gettempdir()
|
| 86 |
+
if filename is None:
|
| 87 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
|
| 88 |
+
filepath = temp_file.name
|
| 89 |
+
else:
|
| 90 |
+
filepath = os.path.join(temp_dir, filename)
|
| 91 |
+
|
| 92 |
+
with open(filepath, "w") as f:
|
| 93 |
+
f.write(content)
|
| 94 |
+
|
| 95 |
+
return f"File saved to {filepath}. You can read this file to process its contents."
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# load the system prompt from the file
|
| 99 |
+
with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
| 100 |
+
system_prompt = f.read()
|
| 101 |
+
print(system_prompt)
|
| 102 |
+
|
| 103 |
+
# System message
|
| 104 |
+
sys_msg = SystemMessage(content=system_prompt)
|
| 105 |
+
|
| 106 |
+
tools = [
|
| 107 |
+
web_search,
|
| 108 |
+
save_and_read_file,
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# Build graph function
|
| 113 |
+
def build_graph(provider: str = "openai"):
|
| 114 |
+
"""Build the graph"""
|
| 115 |
+
# Load environment variables from .env file
|
| 116 |
+
if provider == "openai":
|
| 117 |
+
llm = ChatOpenAI(model="gpt-4.1", temperature=0)
|
| 118 |
+
elif provider == "groq":
|
| 119 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
|
| 120 |
+
elif provider == "huggingface":
|
| 121 |
+
llm = ChatHuggingFace(
|
| 122 |
+
llm=HuggingFaceEndpoint(
|
| 123 |
+
repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 124 |
+
task="text-generation",
|
| 125 |
+
max_new_tokens=1024,
|
| 126 |
+
do_sample=False,
|
| 127 |
+
repetition_penalty=1.03,
|
| 128 |
+
temperature=0,
|
| 129 |
+
),
|
| 130 |
+
verbose=True,
|
| 131 |
+
)
|
| 132 |
+
else:
|
| 133 |
+
raise ValueError("Invalid provider. Choose 'openai', 'groq', or 'huggingface'.")
|
| 134 |
+
|
| 135 |
+
# Bind tools to LLM
|
| 136 |
+
llm_with_tools = llm.bind_tools(tools)
|
| 137 |
+
|
| 138 |
+
# Node
|
| 139 |
+
def assistant(state: MessagesState):
|
| 140 |
+
"""Assistant node"""
|
| 141 |
+
return {"messages": [llm_with_tools.invoke(state["messages"])]}
|
| 142 |
+
|
| 143 |
+
builder = StateGraph(MessagesState)
|
| 144 |
+
builder.add_node("assistant", assistant)
|
| 145 |
+
builder.add_node("tools", ToolNode(tools))
|
| 146 |
+
builder.add_edge(START, "assistant")
|
| 147 |
+
builder.add_conditional_edges(
|
| 148 |
+
"assistant",
|
| 149 |
+
tools_condition,
|
| 150 |
+
)
|
| 151 |
+
builder.add_edge("tools", "assistant")
|
| 152 |
+
|
| 153 |
+
# Compile graph
|
| 154 |
+
return builder.compile()
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# test
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
|
| 160 |
+
graph = build_graph(provider="openai")
|
| 161 |
+
messages = [HumanMessage(content=question)]
|
| 162 |
+
messages = graph.invoke({"messages": messages})
|
| 163 |
+
for m in messages["messages"]:
|
| 164 |
+
m.pretty_print()
|
system_prompt.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a helpful assistant tasked with answering questions using a set of tools.
|
| 2 |
+
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
| 3 |
+
FINAL ANSWER: [YOUR FINAL ANSWER].
|
| 4 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, Apply the rules above for each element (number or string), ensure there is exactly one space after each comma.
|
| 5 |
+
Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
|