layonsan commited on
Commit
f58978f
·
1 Parent(s): 81917a3

Draft code for agents

Browse files
Files changed (8) hide show
  1. .gitignore +203 -0
  2. .python-version +1 -0
  3. agent.py +208 -0
  4. app.py +10 -3
  5. main.py +6 -0
  6. pyproject.toml +24 -0
  7. utility.py +24 -0
  8. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Marimo
198
+ marimo/_static/
199
+ marimo/_lsp/
200
+ __marimo__/
201
+
202
+ # Streamlit
203
+ .streamlit/secrets.toml
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
agent.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LangGraph Agent"""
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+
6
+ from langchain_core.tools import tool
7
+ from langchain_tavily import TavilySearch
8
+ from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
9
+ from langchain_core.messages import AIMessage
10
+ from langgraph.graph import StateGraph, MessagesState
11
+ from langchain_google_genai import ChatGoogleGenerativeAI
12
+ from langchain_groq import ChatGroq
13
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
14
+ from langchain_community.vectorstores import SupabaseVectorStore
15
+ from langchain.tools.retriever import create_retriever_tool
16
+ from supabase.client import Client, create_client
17
+
18
+
19
+ @tool
20
+ def multiply(a: int, b: int) -> int:
21
+ """Multiply two numbers.
22
+ Args:
23
+ a: first int
24
+ b: second int
25
+ """
26
+ return a * b
27
+
28
+ @tool
29
+ def add(a: int, b: int) -> int:
30
+ """Add two numbers.
31
+
32
+ Args:
33
+ a: first int
34
+ b: second int
35
+ """
36
+ return a + b
37
+
38
+ @tool
39
+ def subtract(a: int, b: int) -> int:
40
+ """Subtract two numbers.
41
+
42
+ Args:
43
+ a: first int
44
+ b: second int
45
+ """
46
+ return a - b
47
+
48
+ @tool
49
+ def divide(a: int, b: int) -> int:
50
+ """Divide two numbers.
51
+
52
+ Args:
53
+ a: first int
54
+ b: second int
55
+ """
56
+ if b == 0:
57
+ raise ValueError("Cannot divide by zero.")
58
+ return a / b
59
+
60
+ @tool
61
+ def modulus(a: int, b: int) -> int:
62
+ """Get the modulus of two numbers.
63
+
64
+ Args:
65
+ a: first int
66
+ b: second int
67
+ """
68
+ return a % b
69
+
70
+ @tool
71
+ def web_search(query: str) -> str:
72
+ """Search the web for a query.
73
+
74
+ Args:
75
+ query: The search query string.
76
+
77
+ Returns:
78
+ The search results as a string.
79
+ """
80
+ raw_result = TavilySearch(max_results=3).invoke(query)
81
+ search_results = raw_result.get("results", [])
82
+
83
+ formatted_search_results = "\n\n---\n\n".join(
84
+ [
85
+ f'<Document source="{res.get("url")}" page=""/>\n{res.get("content", "")}\n</Document>'
86
+ for res in search_results
87
+ ])
88
+ return {"web_results": formatted_search_results}
89
+
90
+ @tool
91
+ def arxiv_search(query: str) -> str:
92
+ """Search Arxiv for a query and return maximum 3 result.
93
+
94
+ Args:
95
+ query: The search query."""
96
+ loader = ArxivLoader(query=query, load_max_docs=3).load()
97
+ docs = loader.load()
98
+
99
+ formatted_list = []
100
+ for doc in docs:
101
+ if "id" in doc:
102
+ arxiv_id = doc["id"]
103
+ source = f"https://arxiv.org/abs/{arxiv_id}"
104
+
105
+ formatted = f'<Document Source="{source}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
106
+ formatted_list.append(formatted)
107
+
108
+ formatted_search_docs = "\n\n---\n\n".join(formatted_list)
109
+
110
+ return {"arxiv_results": formatted_search_docs}
111
+
112
+ @tool
113
+ def wiki_search(query: str) -> str:
114
+ """Search Wikipedia for a query and return maximum 3 result.
115
+
116
+ Args:
117
+ query: The search query."""
118
+
119
+ loader = WikipediaLoader(query=query, load_max_docs=3)
120
+ docs = loader.load()
121
+
122
+ formatted_docs = "\n\n---\n\n".join(
123
+ f'<Document Source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
124
+ for doc in docs
125
+ )
126
+
127
+ return {"wiki_results": formatted_docs}
128
+
129
+ tools = [
130
+ multiply,
131
+ add,
132
+ subtract,
133
+ divide,
134
+ modulus,
135
+ wiki_search,
136
+ web_search,
137
+ arxiv_search,
138
+ ]
139
+
140
+ # Build retriever
141
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
142
+ supabase: Client = create_client(
143
+ os.environ.get("SUPABASE_URL"),
144
+ os.environ.get("SUPABASE_SERVICE_KEY"))
145
+ vector_store = SupabaseVectorStore(
146
+ client=supabase,
147
+ embedding= embeddings,
148
+ table_name="documents",
149
+ query_name="match_documents_langchain",
150
+ )
151
+ create_retriever_tool = create_retriever_tool(
152
+ retriever=vector_store.as_retriever(),
153
+ name="Question Search",
154
+ description="A tool to retrieve similar questions from a vector store.",
155
+ )
156
+
157
+
158
+ # Build graph function
159
+ def build_graph(provider: str = "google"):
160
+ """Build the graph"""
161
+ # Load environment variables from .env file
162
+ if provider == "google":
163
+ # Google Gemini
164
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
165
+ elif provider == "groq":
166
+ # Groq https://console.groq.com/docs/models
167
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
168
+ elif provider == "huggingface":
169
+ # TODO: Add huggingface endpoint
170
+ llm = ChatHuggingFace(
171
+ llm=HuggingFaceEndpoint(
172
+ url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
173
+ temperature=0,
174
+ ),
175
+ )
176
+ else:
177
+ raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
178
+
179
+ # Bind tools to LLM
180
+ llm_with_tools = llm.bind_tools(tools)
181
+
182
+ def retriever(state: MessagesState):
183
+ query = state["messages"][-1].content
184
+ similar_doc = vector_store.similarity_search(query, k=1)[0]
185
+
186
+ content = similar_doc.page_content
187
+ if "Final answer :" in content:
188
+ answer = content.split("Final answer :")[-1].strip()
189
+ else:
190
+ answer = content.strip()
191
+
192
+ return {"messages": [AIMessage(content=answer)]}
193
+
194
+ builder = StateGraph(MessagesState)
195
+ builder.add_node("retriever", retriever)
196
+
197
+ # Retriever start and end points
198
+ builder.set_entry_point("retriever")
199
+ builder.set_finish_point("retriever")
200
+
201
+ # Compile graph
202
+ return builder.compile()
203
+
204
+
205
+ if __name__ == "__main__":
206
+ # Example usage
207
+ print("testing agent tools")
208
+ print(web_search("LangGraph Agent")) # Outputs search results as a string
app.py CHANGED
@@ -4,6 +4,10 @@ import requests
4
  import inspect
5
  import pandas as pd
6
 
 
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -13,11 +17,14 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
  class BasicAgent:
14
  def __init__(self):
15
  print("BasicAgent initialized.")
 
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
 
4
  import inspect
5
  import pandas as pd
6
 
7
+ from langchain_core.messages import HumanMessage
8
+
9
+ from agent import build_graph
10
+
11
  # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
17
  class BasicAgent:
18
  def __init__(self):
19
  print("BasicAgent initialized.")
20
+ self.graph = build_graph()
21
+
22
  def __call__(self, question: str) -> str:
23
  print(f"Agent received question (first 50 chars): {question[:50]}...")
24
+ messages = [HumanMessage(content=question)]
25
+ result = self.graph.invoke({"messages": messages})
26
+ answer = result['messages'][-1].content
27
+ return answer
28
 
29
  def run_and_submit_all( profile: gr.OAuthProfile | None):
30
  """
main.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def main():
2
+ print("Hello from final-assignment!")
3
+
4
+
5
+ if __name__ == "__main__":
6
+ main()
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "final-assignment"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "arxiv>=2.2.0",
9
+ "gradio[oauth]>=5.36.2",
10
+ "ipykernel>=6.30.0",
11
+ "langchain>=0.3.27",
12
+ "langchain-community>=0.3.27",
13
+ "langchain-google-genai>=2.1.9",
14
+ "langchain-groq>=0.3.7",
15
+ "langchain-huggingface>=0.3.1",
16
+ "langchain-tavily>=0.2.11",
17
+ "langgraph>=0.6.2",
18
+ "pymupdf>=1.26.3",
19
+ "python-dotenv>=1.1.1",
20
+ "requests>=2.32.4",
21
+ "ruff>=0.12.1",
22
+ "supabase>=2.18.0",
23
+ "wikipedia>=1.4.0",
24
+ ]
utility.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ def read_jsonl_file(file_path):
4
+ """
5
+ Reads a .jsonl file and returns a list of Python dictionaries,
6
+ where each dictionary represents a JSON object from a line in the file.
7
+ """
8
+ data = []
9
+ try:
10
+ with open(file_path, 'r', encoding='utf-8') as f:
11
+ for line in f:
12
+ # Strip whitespace and check if the line is not empty
13
+ stripped_line = line.strip()
14
+ if stripped_line:
15
+ try:
16
+ json_object = json.loads(stripped_line)
17
+ data.append(json_object)
18
+ except json.JSONDecodeError as e:
19
+ print(f"Error decoding JSON on line: {stripped_line}. Error: {e}")
20
+ except FileNotFoundError:
21
+ print(f"Error: The file '{file_path}' was not found.")
22
+ except Exception as e:
23
+ print(f"An unexpected error occurred: {e}")
24
+ return data
uv.lock ADDED
The diff for this file is too large to render. See raw diff