mathidot commited on
Commit
884eda5
·
1 Parent(s): 47afa16
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ./knowledge_base
2
+ knowledge_base/raw/pdf/*.pdf
Gradio_UI.py CHANGED
@@ -259,38 +259,41 @@ class GradioUI:
259
  )
260
 
261
  def launch(self, **kwargs):
262
- import gradio as gr
263
-
264
- with gr.Blocks(fill_height=True) as demo:
265
- stored_messages = gr.State([])
266
- file_uploads_log = gr.State([])
267
- chatbot = gr.Chatbot(
268
- label="Agent",
269
- type="messages",
270
- avatar_images=(
271
- None,
272
- "https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/communication/Alfred.png",
273
- ),
274
- resizeable=True,
275
- scale=1,
276
- )
277
- # If an upload folder is provided, enable the upload feature
278
- if self.file_upload_folder is not None:
279
- upload_file = gr.File(label="Upload a file")
280
- upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
281
- upload_file.change(
282
- self.upload_file,
283
- [upload_file, file_uploads_log],
284
- [upload_status, file_uploads_log],
285
  )
286
- text_input = gr.Textbox(lines=1, label="Chat Message")
287
- text_input.submit(
288
- self.log_user_message,
289
- [text_input, file_uploads_log],
290
- [stored_messages, text_input],
291
- ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
292
-
293
- demo.launch(debug=True, share=True, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
 
296
- __all__ = ["stream_to_gradio", "GradioUI"]
 
259
  )
260
 
261
  def launch(self, **kwargs):
262
+ import gradio as gr
263
+
264
+ with gr.Blocks(fill_height=True) as demo:
265
+ stored_messages = gr.State([])
266
+ file_uploads_log = gr.State([])
267
+
268
+ # 1. 适配 Gradio 5.x 的 Chatbot 组件定义
269
+ chatbot = gr.Chatbot(
270
+ label="Agent",
271
+ scale=1,
272
+ height=600,
 
 
 
 
 
 
 
 
 
 
 
 
273
  )
274
+
275
+ if self.file_upload_folder is not None:
276
+ upload_file = gr.File(label="Upload a file")
277
+ upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
278
+ upload_file.change(
279
+ self.upload_file,
280
+ [upload_file, file_uploads_log],
281
+ [upload_status, file_uploads_log],
282
+ )
283
+
284
+ text_input = gr.Textbox(lines=1, label="Chat Message")
285
+
286
+ text_input.submit(
287
+ self.log_user_message,
288
+ [text_input, file_uploads_log],
289
+ [stored_messages, text_input],
290
+ ).then(
291
+ self.interact_with_agent,
292
+ [stored_messages, chatbot],
293
+ [chatbot]
294
+ )
295
+
296
+ demo.launch(debug=True, share=True, **kwargs)
297
 
298
 
299
+ __all__ = ["stream_to_gradio", "GradioUI"]
README.md CHANGED
@@ -16,3 +16,4 @@ tags:
16
  ---
17
 
18
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
16
  ---
17
 
18
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
19
+ # OptionAgent
app.py CHANGED
@@ -1,9 +1,11 @@
1
- from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
 
2
  import datetime
3
  import requests
4
  import pytz
5
  import yaml
6
  import json
 
7
  from tools.final_answer import FinalAnswerTool
8
  import yfinance as yf
9
  from Gradio_UI import GradioUI
@@ -100,35 +102,31 @@ def get_current_time_in_timezone(timezone: str) -> str:
100
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
101
 
102
 
103
- final_answer = FinalAnswerTool()
104
-
105
- # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
106
- # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
107
-
108
- model = HfApiModel(
109
- max_tokens=2096,
110
- temperature=0.5,
111
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
112
- custom_role_conversions=None,
113
- )
114
-
115
- # Import tool from Hub
116
- image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
117
-
118
- with open("prompts.yaml", 'r') as stream:
119
- prompt_templates = yaml.safe_load(stream)
120
 
121
- agent = CodeAgent(
122
- model=model,
123
- tools=[get_current_time_in_timezone, final_answer], ## add your tools here (don't remove final answer)
124
- max_steps=6,
125
- verbosity_level=1,
126
- grammar=None,
127
- planning_interval=None,
128
- name=None,
129
- description=None,
130
- prompt_templates=prompt_templates
131
- )
132
-
133
-
134
- GradioUI(agent).launch()
 
 
 
 
 
 
 
 
 
1
+ from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool, LiteLLMModel
2
+ import os
3
  import datetime
4
  import requests
5
  import pytz
6
  import yaml
7
  import json
8
+ from dotenv import load_dotenv
9
  from tools.final_answer import FinalAnswerTool
10
  import yfinance as yf
11
  from Gradio_UI import GradioUI
 
102
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
103
 
104
 
105
+ if __name__ == "__main__":
106
+ final_answer = FinalAnswerTool()
107
+ load_dotenv()
108
+ hf_token = os.getenv("HF_TOKEN")
109
+ gemini_api_key = os.getenv("GEMINI_API_KEY");
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ model = LiteLLMModel(
112
+ model_id="gemini/gemini-2.5-flash",
113
+ temperature=0.2
114
+ )
115
+
116
+ with open("prompts.yaml", 'r') as stream:
117
+ prompt_templates = yaml.safe_load(stream)
118
+
119
+ agent = CodeAgent(
120
+ model=model,
121
+ tools=[query_market_asset, get_current_time_in_timezone, final_answer],
122
+ max_steps=6,
123
+ verbosity_level=1,
124
+ grammar=None,
125
+ planning_interval=None,
126
+ name=None,
127
+ description=None,
128
+ prompt_templates=prompt_templates
129
+ )
130
+
131
+
132
+ GradioUI(agent).launch()
homework.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Create a CodeAgent with DuckDuckGo search capability
2
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
3
+
4
+ search_tool = DuckDuckGoSearchTool()
5
+
6
+ model = InferenceClientModel()
7
+
8
+ agent = CodeAgent(
9
+ tools=[search_tool], # Add search tool here
10
+ model=model # Add model here
11
+ )
12
+
13
+ # ============================================
14
+
15
+
16
+ from smolagents import (
17
+ CodeAgent,
18
+ ToolCallingAgent,
19
+ InferenceClientModel,
20
+ WebSearchTool,
21
+ )
22
+ import re
23
+ import requests
24
+ from markdownify import markdownify
25
+ from requests.exceptions import RequestException
26
+ from smolagents import tool
27
+
28
+ def visit_webpage(url: str) -> str:
29
+ """Visits a webpage at the given URL and returns its content as a markdown string.
30
+
31
+ Args:
32
+ url: The URL of the webpage to visit.
33
+
34
+ Returns:
35
+ The content of the webpage converted to Markdown, or an error message if the request fails.
36
+ """
37
+ try:
38
+ # Send a GET request to the URL
39
+ response = requests.get(url)
40
+ response.raise_for_status() # Raise an exception for bad status codes
41
+
42
+ # Convert the HTML content to Markdown
43
+ markdown_content = markdownify(response.text).strip()
44
+
45
+ # Remove multiple line breaks
46
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
47
+
48
+ return markdown_content
49
+
50
+ except RequestException as e:
51
+ return f"Error fetching the webpage: {str(e)}"
52
+ except Exception as e:
53
+ return f"An unexpected error occurred: {str(e)}"
54
+
55
+ web_agent = ToolCallingAgent(
56
+ tools=[DuckDuckGoSearchTool(), visit_webpage],
57
+ model=model,
58
+ max_steps=10,
59
+ name="search",
60
+ description="Runs web searches for you."
61
+ )
62
+
63
+ manager_agent = CodeAgent(
64
+ tools=[],
65
+ model=model,
66
+ managed_agents=[web_agent],
67
+ additional_authorized_imports=["time", "numpy", "pandas"],
68
+ )
69
+
70
+ agent = CodeAgent(
71
+ tools=[],
72
+ model=model,
73
+ sandbox=EX2Sandbox(),
74
+ additional_authorized_imports=["numpy"]
75
+ )
load_docs.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import hashlib
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Iterable, List
6
+ from dotenv import load_dotenv
7
+ import chromadb
8
+ from chromadb.errors import NotFoundError
9
+ from pypdf import PdfReader
10
+
11
+ from llama_index.core import StorageContext, VectorStoreIndex
12
+ from llama_index.core.schema import Document, BaseNode
13
+ from llama_index.core.node_parser import SentenceSplitter
14
+ from llama_index.vector_stores.chroma import ChromaVectorStore
15
+
16
+
17
+ BASE_DIR = Path(__file__).resolve().parent
18
+ KNOWLEDGE_BASE_DIR = BASE_DIR / "knowledge_base"
19
+ RAW_DIR = KNOWLEDGE_BASE_DIR / "raw"
20
+ CHROMA_DB_DIR = KNOWLEDGE_BASE_DIR / "chroma_db"
21
+ HF_CACHE_DIR = BASE_DIR / "hf_cache"
22
+ COLLECTION_NAME = "options_knowledge"
23
+
24
+ EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
25
+ CHUNK_SIZE = 1000
26
+ CHUNK_OVERLAP = 150
27
+
28
+ REQUIRED_METADATA = [
29
+ "source_file",
30
+ "file_name",
31
+ "file_type",
32
+ "document_title",
33
+ "file_hash",
34
+ "chunk_id",
35
+ "chunk_index",
36
+ ]
37
+
38
+
39
+ def configure_model_cache() -> None:
40
+ HF_CACHE_DIR.mkdir(parents=True, exist_ok=True)
41
+ os.environ.setdefault("HF_HOME", str(HF_CACHE_DIR))
42
+ os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(HF_CACHE_DIR / "sentence_transformers"))
43
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
44
+
45
+
46
+ def file_sha256(path: Path) -> str:
47
+ digest = hashlib.sha256()
48
+ with path.open("rb") as file:
49
+ for block in iter(lambda: file.read(1024 * 1024), b""):
50
+ digest.update(block)
51
+ return digest.hexdigest()
52
+
53
+
54
+ def load_md_file(path: Path) -> Document:
55
+ text = path.read_text(encoding="utf-8")
56
+
57
+ return Document(
58
+ text=text,
59
+ metadata={
60
+ "source_file": str(path.resolve()),
61
+ "file_name": path.name,
62
+ "file_type": "md",
63
+ "document_title": path.stem,
64
+ "file_hash": file_sha256(path),
65
+ },
66
+ )
67
+
68
+
69
+ def load_pdf_file(path: Path) -> List[Document]:
70
+ reader = PdfReader(str(path))
71
+ documents = []
72
+
73
+ for page_index, page in enumerate(reader.pages, start=1):
74
+ text = page.extract_text() or ""
75
+
76
+ if not text.strip():
77
+ continue
78
+
79
+ documents.append(
80
+ Document(
81
+ text=text,
82
+ metadata={
83
+ "source_file": str(path.resolve()),
84
+ "file_name": path.name,
85
+ "file_type": "pdf",
86
+ "document_title": path.stem,
87
+ "file_hash": file_sha256(path),
88
+ "page_number": page_index,
89
+ },
90
+ )
91
+ )
92
+
93
+ return documents
94
+
95
+
96
+ def iter_source_files(raw_dir: Path) -> Iterable[Path]:
97
+ supported_suffixes = {".md", ".markdown", ".pdf"}
98
+ for path in sorted(raw_dir.rglob("*")):
99
+ if path.is_file() and path.suffix.lower() in supported_suffixes:
100
+ yield path
101
+
102
+
103
+ def load_docs(raw_dir: Path = RAW_DIR) -> List[Document]:
104
+ documents: List[Document] = []
105
+
106
+ for path in iter_source_files(raw_dir):
107
+ suffix = path.suffix.lower()
108
+
109
+ if suffix in {".md", ".markdown"}:
110
+ documents.append(load_md_file(path))
111
+ elif suffix == ".pdf":
112
+ documents.extend(load_pdf_file(path))
113
+
114
+ if not documents:
115
+ raise ValueError(f"No supported documents found under {raw_dir}")
116
+
117
+ return documents
118
+
119
+
120
+ def add_chunk_metadata(nodes: List[BaseNode]) -> List[BaseNode]:
121
+ counters: dict[str, int] = {}
122
+
123
+ for node in nodes:
124
+ source_file = node.metadata["source_file"]
125
+ chunk_index = counters.get(source_file, 0)
126
+ counters[source_file] = chunk_index + 1
127
+
128
+ file_hash = node.metadata["file_hash"][:12]
129
+ page_number = node.metadata.get("page_number", "na")
130
+ chunk_id = f"{Path(source_file).stem}-{file_hash}-p{page_number}-c{chunk_index}"
131
+
132
+ node.metadata["chunk_id"] = chunk_id
133
+ node.metadata["chunk_index"] = chunk_index
134
+ node.id_ = chunk_id
135
+
136
+ return nodes
137
+
138
+
139
+ def validate_nodes(nodes: List[BaseNode]) -> None:
140
+ if not nodes:
141
+ raise ValueError("No chunks were created from the source documents.")
142
+
143
+ for node in nodes:
144
+ missing = [key for key in REQUIRED_METADATA if key not in node.metadata]
145
+ if missing:
146
+ raise ValueError(f"Node {node.node_id} is missing metadata fields: {missing}")
147
+
148
+ if node.metadata["file_type"] == "pdf" and "page_number" not in node.metadata:
149
+ raise ValueError(f"PDF node {node.node_id} is missing page_number metadata.")
150
+
151
+
152
+ def build_nodes(raw_dir: Path = RAW_DIR) -> List[BaseNode]:
153
+ documents = load_docs(raw_dir)
154
+ splitter = SentenceSplitter(
155
+ chunk_size=CHUNK_SIZE,
156
+ chunk_overlap=CHUNK_OVERLAP,
157
+ )
158
+ nodes = splitter.get_nodes_from_documents(documents)
159
+ add_chunk_metadata(nodes)
160
+ validate_nodes(nodes)
161
+ return nodes
162
+
163
+
164
+ async def build_index(raw_dir: Path = RAW_DIR, rebuild: bool = False) -> VectorStoreIndex:
165
+ configure_model_cache()
166
+
167
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
168
+
169
+ load_dotenv()
170
+ CHROMA_DB_DIR.mkdir(parents=True, exist_ok=True)
171
+
172
+ db = chromadb.PersistentClient(path=str(CHROMA_DB_DIR))
173
+
174
+ if rebuild:
175
+ try:
176
+ db.delete_collection(COLLECTION_NAME)
177
+ except (NotFoundError, ValueError):
178
+ pass
179
+
180
+ chroma_collection = db.get_or_create_collection(COLLECTION_NAME)
181
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
182
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
183
+ embed_model = HuggingFaceEmbedding(
184
+ model_name=EMBED_MODEL_NAME,
185
+ cache_folder=str(HF_CACHE_DIR / "sentence_transformers"),
186
+ )
187
+
188
+ if rebuild or chroma_collection.count() == 0:
189
+ nodes = build_nodes(raw_dir)
190
+ index = VectorStoreIndex(
191
+ nodes,
192
+ storage_context=storage_context,
193
+ embed_model=embed_model,
194
+ show_progress=True,
195
+ )
196
+ print(f"Indexed {len(nodes)} chunks into collection '{COLLECTION_NAME}'.")
197
+ return index
198
+
199
+ print(f"Loaded existing collection '{COLLECTION_NAME}' with {chroma_collection.count()} chunks.")
200
+ return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
201
+
202
+
203
+ if __name__ == "__main__":
204
+ index = asyncio.run(build_index(rebuild=True))
205
+ retriever = index.as_retriever(similarity_top_k=5)
206
+ results = retriever.retrieve("What is volatility smile?")
207
+
208
+ print("\nTop retrieved chunks:")
209
+ for result in results:
210
+ metadata = result.node.metadata
211
+ source = metadata.get("file_name", "unknown")
212
+ page = metadata.get("page_number", "n/a")
213
+ score = result.score
214
+ print(f"- {source}, page {page}, score={score:.4f}")
215
+ print(result.node.get_content()[:500].replace("\n", " "))
216
+ print()
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "first-agent-template"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "mathidot", email = "c1216440698@126.com" }
8
+ ]
9
+ requires-python = ">=3.12"
10
+ dependencies = [
11
+ "chromadb>=1.0.0",
12
+ "google-genai>=2.3.0",
13
+ "llama-index-core>=0.14.0",
14
+ "llama-index-embeddings-huggingface>=0.6.0",
15
+ "llama-index-vector-stores-chroma>=0.5.0",
16
+ "litellm>=1.85.0",
17
+ "pypdf>=6.0.0",
18
+ "tokenizers>=0.22.0,<=0.23.0",
19
+ "transformers<5",
20
+ ]
21
+
22
+ [build-system]
23
+ requires = ["uv_build>=0.10.9,<0.11.0"]
24
+ build-backend = "uv_build"
pyrightconfig.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "venvPath": ".",
3
+ "venv": ".venv"
4
+ }
requirements.txt CHANGED
@@ -3,3 +3,9 @@ smolagents==1.13.0
3
  requests
4
  duckduckgo_search
5
  pandas
 
 
 
 
 
 
 
3
  requests
4
  duckduckgo_search
5
  pandas
6
+ pypdf
7
+ chromadb
8
+ llama-index-core
9
+ llama-index-embeddings-huggingface
10
+ llama-index-vector-stores-chroma
11
+ transformers<5
src/first_agent_template/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ def hello() -> str:
2
+ return "Hello from first-agent-template!"
src/first_agent_template/py.typed ADDED
File without changes
test.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ # Load the .env file
6
+ load_dotenv()
7
+
8
+ # Retrieve HF_TOKEN from the environment variables
9
+ hf_token = os.getenv("HF_TOKEN")
10
+ gemini_api_key = os.getenv("GEMINI_API_KEY");
11
+
12
+
13
+ model = LiteLLMModel(
14
+ model_id="gemini/gemini-2.5-flash",
15
+ temperature=0.2
16
+ )
17
+
18
+ agent = CodeAgent(
19
+ tools=[query_market_asset],
20
+ model=model,
21
+ max_steps=5
22
+ )
23
+
24
+ llm = HuggingFaceInferenceAPI(
25
+ model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
26
+ temperature=0.7,
27
+ max_tokens=100,
28
+ token=hf_token,
29
+ provider="auto"
30
+ )
31
+
32
+ response = llm.complete("Hello, how are you?")
33
+ print(response)
34
+ # I am good, how can I help you today?
uv.lock ADDED
The diff for this file is too large to render. See raw diff