First_agent_template

Runtime error

App Files Files Community

mathidot commited on 10 days ago

Commit

884eda5

1 Parent(s): 47afa16

BUG FIX

Browse files

Files changed (13) hide show

.gitignore +2 -0
Gradio_UI.py +35 -32
README.md +1 -0
app.py +30 -32
homework.py +75 -0
load_docs.py +216 -0
pyproject.toml +24 -0
pyrightconfig.json +4 -0
requirements.txt +6 -0
src/first_agent_template/__init__.py +2 -0
src/first_agent_template/py.typed +0 -0
test.py +34 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ./knowledge_base
2	+ knowledge_base/raw/pdf/*.pdf

Gradio_UI.py CHANGED Viewed

@@ -259,38 +259,41 @@ class GradioUI:
         )
     def launch(self, **kwargs):
-        import gradio as gr
-        with gr.Blocks(fill_height=True) as demo:
-            stored_messages = gr.State([])
-            file_uploads_log = gr.State([])
-            chatbot = gr.Chatbot(
-                label="Agent",
-                type="messages",
-                avatar_images=(
-                    None,
-                    "https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/communication/Alfred.png",
-                ),
-                resizeable=True,
-                scale=1,
-            )
-            # If an upload folder is provided, enable the upload feature
-            if self.file_upload_folder is not None:
-                upload_file = gr.File(label="Upload a file")
-                upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
-                upload_file.change(
-                    self.upload_file,
-                    [upload_file, file_uploads_log],
-                    [upload_status, file_uploads_log],
                 )
-            text_input = gr.Textbox(lines=1, label="Chat Message")
-            text_input.submit(
-                self.log_user_message,
-                [text_input, file_uploads_log],
-                [stored_messages, text_input],
-            ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
-        demo.launch(debug=True, share=True, **kwargs)
-__all__ = ["stream_to_gradio", "GradioUI"]

         )
     def launch(self, **kwargs):
+            import gradio as gr
+            with gr.Blocks(fill_height=True) as demo:
+                stored_messages = gr.State([])
+                file_uploads_log = gr.State([])
+                # 1. 适配 Gradio 5.x 的 Chatbot 组件定义
+                chatbot = gr.Chatbot(
+                    label="Agent",
+                    scale=1,
+                    height=600,
                 )
+                if self.file_upload_folder is not None:
+                    upload_file = gr.File(label="Upload a file")
+                    upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
+                    upload_file.change(
+                        self.upload_file,
+                        [upload_file, file_uploads_log],
+                        [upload_status, file_uploads_log],
+                    )
+                text_input = gr.Textbox(lines=1, label="Chat Message")
+                text_input.submit(
+                    self.log_user_message,
+                    [text_input, file_uploads_log],
+                    [stored_messages, text_input],
+                ).then(
+                    self.interact_with_agent,
+                    [stored_messages, chatbot],
+                    [chatbot]
+                )
+            demo.launch(debug=True, share=True, **kwargs)
+__all__ = ["stream_to_gradio", "GradioUI"]

README.md CHANGED Viewed

@@ -16,3 +16,4 @@ tags:
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# OptionAgent

app.py CHANGED Viewed

@@ -1,9 +1,11 @@
-from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
 import datetime
 import requests
 import pytz
 import yaml
 import json
 from tools.final_answer import FinalAnswerTool
 import yfinance as yf
 from Gradio_UI import GradioUI
@@ -100,35 +102,31 @@ def get_current_time_in_timezone(timezone: str) -> str:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
-final_answer = FinalAnswerTool()
-# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
-# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
-model = HfApiModel(
-max_tokens=2096,
-temperature=0.5,
-model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
-custom_role_conversions=None,
-)
-# Import tool from Hub
-image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
-with open("prompts.yaml", 'r') as stream:
-    prompt_templates = yaml.safe_load(stream)
-agent = CodeAgent(
-    model=model,
-    tools=[get_current_time_in_timezone, final_answer], ## add your tools here (don't remove final answer)
-    max_steps=6,
-    verbosity_level=1,
-    grammar=None,
-    planning_interval=None,
-    name=None,
-    description=None,
-    prompt_templates=prompt_templates
-)
-GradioUI(agent).launch()

+from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool, LiteLLMModel
+import os
 import datetime
 import requests
 import pytz
 import yaml
 import json
+from dotenv import load_dotenv
 from tools.final_answer import FinalAnswerTool
 import yfinance as yf
 from Gradio_UI import GradioUI
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
+if __name__ == "__main__":
+    final_answer = FinalAnswerTool()
+    load_dotenv()
+    hf_token = os.getenv("HF_TOKEN")
+    gemini_api_key = os.getenv("GEMINI_API_KEY");
+    model = LiteLLMModel(
+        model_id="gemini/gemini-2.5-flash",
+        temperature=0.2
+    )
+    with open("prompts.yaml", 'r') as stream:
+        prompt_templates = yaml.safe_load(stream)
+    agent = CodeAgent(
+        model=model,
+        tools=[query_market_asset, get_current_time_in_timezone, final_answer],
+        max_steps=6,
+        verbosity_level=1,
+        grammar=None,
+        planning_interval=None,
+        name=None,
+        description=None,
+        prompt_templates=prompt_templates
+    )
+    GradioUI(agent).launch()

homework.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# Create a CodeAgent with DuckDuckGo search capability
+from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
+search_tool = DuckDuckGoSearchTool()
+model = InferenceClientModel()
+agent = CodeAgent(
+    tools=[search_tool],           # Add search tool here
+    model=model          # Add model here
+)
+# ============================================
+from smolagents import (
+    CodeAgent,
+    ToolCallingAgent,
+    InferenceClientModel,
+    WebSearchTool,
+)
+import re
+import requests
+from markdownify import markdownify
+from requests.exceptions import RequestException
+from smolagents import tool
+def visit_webpage(url: str) -> str:
+    """Visits a webpage at the given URL and returns its content as a markdown string.
+    Args:
+        url: The URL of the webpage to visit.
+    Returns:
+        The content of the webpage converted to Markdown, or an error message if the request fails.
+    """
+    try:
+        # Send a GET request to the URL
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Convert the HTML content to Markdown
+        markdown_content = markdownify(response.text).strip()
+        # Remove multiple line breaks
+        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+        return markdown_content
+    except RequestException as e:
+        return f"Error fetching the webpage: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
+web_agent = ToolCallingAgent(
+    tools=[DuckDuckGoSearchTool(), visit_webpage],
+    model=model,
+    max_steps=10,
+    name="search",
+    description="Runs web searches for you."
+)
+manager_agent = CodeAgent(
+    tools=[],
+    model=model,
+    managed_agents=[web_agent],
+    additional_authorized_imports=["time", "numpy", "pandas"],
+)
+agent = CodeAgent(
+    tools=[],
+    model=model,
+    sandbox=EX2Sandbox(),
+    additional_authorized_imports=["numpy"]
+)

load_docs.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import asyncio
+import hashlib
+import os
+from pathlib import Path
+from typing import Iterable, List
+from dotenv import load_dotenv
+import chromadb
+from chromadb.errors import NotFoundError
+from pypdf import PdfReader
+from llama_index.core import StorageContext, VectorStoreIndex
+from llama_index.core.schema import Document, BaseNode
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.vector_stores.chroma import ChromaVectorStore
+BASE_DIR = Path(__file__).resolve().parent
+KNOWLEDGE_BASE_DIR = BASE_DIR / "knowledge_base"
+RAW_DIR = KNOWLEDGE_BASE_DIR / "raw"
+CHROMA_DB_DIR = KNOWLEDGE_BASE_DIR / "chroma_db"
+HF_CACHE_DIR = BASE_DIR / "hf_cache"
+COLLECTION_NAME = "options_knowledge"
+EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 150
+REQUIRED_METADATA = [
+    "source_file",
+    "file_name",
+    "file_type",
+    "document_title",
+    "file_hash",
+    "chunk_id",
+    "chunk_index",
+]
+def configure_model_cache() -> None:
+    HF_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    os.environ.setdefault("HF_HOME", str(HF_CACHE_DIR))
+    os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(HF_CACHE_DIR / "sentence_transformers"))
+    os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+def file_sha256(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as file:
+        for block in iter(lambda: file.read(1024 * 1024), b""):
+            digest.update(block)
+    return digest.hexdigest()
+def load_md_file(path: Path) -> Document:
+    text = path.read_text(encoding="utf-8")
+    return Document(
+        text=text,
+        metadata={
+            "source_file": str(path.resolve()),
+            "file_name": path.name,
+            "file_type": "md",
+            "document_title": path.stem,
+            "file_hash": file_sha256(path),
+        },
+    )
+def load_pdf_file(path: Path) -> List[Document]:
+    reader = PdfReader(str(path))
+    documents = []
+    for page_index, page in enumerate(reader.pages, start=1):
+        text = page.extract_text() or ""
+        if not text.strip():
+            continue
+        documents.append(
+            Document(
+                text=text,
+                metadata={
+                    "source_file": str(path.resolve()),
+                    "file_name": path.name,
+                    "file_type": "pdf",
+                    "document_title": path.stem,
+                    "file_hash": file_sha256(path),
+                    "page_number": page_index,
+                },
+            )
+        )
+    return documents
+def iter_source_files(raw_dir: Path) -> Iterable[Path]:
+    supported_suffixes = {".md", ".markdown", ".pdf"}
+    for path in sorted(raw_dir.rglob("*")):
+        if path.is_file() and path.suffix.lower() in supported_suffixes:
+            yield path
+def load_docs(raw_dir: Path = RAW_DIR) -> List[Document]:
+    documents: List[Document] = []
+    for path in iter_source_files(raw_dir):
+        suffix = path.suffix.lower()
+        if suffix in {".md", ".markdown"}:
+            documents.append(load_md_file(path))
+        elif suffix == ".pdf":
+            documents.extend(load_pdf_file(path))
+    if not documents:
+        raise ValueError(f"No supported documents found under {raw_dir}")
+    return documents
+def add_chunk_metadata(nodes: List[BaseNode]) -> List[BaseNode]:
+    counters: dict[str, int] = {}
+    for node in nodes:
+        source_file = node.metadata["source_file"]
+        chunk_index = counters.get(source_file, 0)
+        counters[source_file] = chunk_index + 1
+        file_hash = node.metadata["file_hash"][:12]
+        page_number = node.metadata.get("page_number", "na")
+        chunk_id = f"{Path(source_file).stem}-{file_hash}-p{page_number}-c{chunk_index}"
+        node.metadata["chunk_id"] = chunk_id
+        node.metadata["chunk_index"] = chunk_index
+        node.id_ = chunk_id
+    return nodes
+def validate_nodes(nodes: List[BaseNode]) -> None:
+    if not nodes:
+        raise ValueError("No chunks were created from the source documents.")
+    for node in nodes:
+        missing = [key for key in REQUIRED_METADATA if key not in node.metadata]
+        if missing:
+            raise ValueError(f"Node {node.node_id} is missing metadata fields: {missing}")
+        if node.metadata["file_type"] == "pdf" and "page_number" not in node.metadata:
+            raise ValueError(f"PDF node {node.node_id} is missing page_number metadata.")
+def build_nodes(raw_dir: Path = RAW_DIR) -> List[BaseNode]:
+    documents = load_docs(raw_dir)
+    splitter = SentenceSplitter(
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=CHUNK_OVERLAP,
+    )
+    nodes = splitter.get_nodes_from_documents(documents)
+    add_chunk_metadata(nodes)
+    validate_nodes(nodes)
+    return nodes
+async def build_index(raw_dir: Path = RAW_DIR, rebuild: bool = False) -> VectorStoreIndex:
+    configure_model_cache()
+    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    load_dotenv()
+    CHROMA_DB_DIR.mkdir(parents=True, exist_ok=True)
+    db = chromadb.PersistentClient(path=str(CHROMA_DB_DIR))
+    if rebuild:
+        try:
+            db.delete_collection(COLLECTION_NAME)
+        except (NotFoundError, ValueError):
+            pass
+    chroma_collection = db.get_or_create_collection(COLLECTION_NAME)
+    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
+    storage_context = StorageContext.from_defaults(vector_store=vector_store)
+    embed_model = HuggingFaceEmbedding(
+        model_name=EMBED_MODEL_NAME,
+        cache_folder=str(HF_CACHE_DIR / "sentence_transformers"),
+    )
+    if rebuild or chroma_collection.count() == 0:
+        nodes = build_nodes(raw_dir)
+        index = VectorStoreIndex(
+            nodes,
+            storage_context=storage_context,
+            embed_model=embed_model,
+            show_progress=True,
+        )
+        print(f"Indexed {len(nodes)} chunks into collection '{COLLECTION_NAME}'.")
+        return index
+    print(f"Loaded existing collection '{COLLECTION_NAME}' with {chroma_collection.count()} chunks.")
+    return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
+if __name__ == "__main__":
+    index = asyncio.run(build_index(rebuild=True))
+    retriever = index.as_retriever(similarity_top_k=5)
+    results = retriever.retrieve("What is volatility smile?")
+    print("\nTop retrieved chunks:")
+    for result in results:
+        metadata = result.node.metadata
+        source = metadata.get("file_name", "unknown")
+        page = metadata.get("page_number", "n/a")
+        score = result.score
+        print(f"- {source}, page {page}, score={score:.4f}")
+        print(result.node.get_content()[:500].replace("\n", " "))
+        print()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "first-agent-template"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "mathidot", email = "c1216440698@126.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "chromadb>=1.0.0",
+    "google-genai>=2.3.0",
+    "llama-index-core>=0.14.0",
+    "llama-index-embeddings-huggingface>=0.6.0",
+    "llama-index-vector-stores-chroma>=0.5.0",
+    "litellm>=1.85.0",
+    "pypdf>=6.0.0",
+    "tokenizers>=0.22.0,<=0.23.0",
+    "transformers<5",
+]
+[build-system]
+requires = ["uv_build>=0.10.9,<0.11.0"]
+build-backend = "uv_build"

pyrightconfig.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "venvPath": ".",
+  "venv": ".venv"
+}

requirements.txt CHANGED Viewed

@@ -3,3 +3,9 @@ smolagents==1.13.0
 requests
 duckduckgo_search
 pandas

 requests
 duckduckgo_search
 pandas
+pypdf
+chromadb
+llama-index-core
+llama-index-embeddings-huggingface
+llama-index-vector-stores-chroma
+transformers<5

src/first_agent_template/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ def hello() -> str:
2	+ return "Hello from first-agent-template!"

src/first_agent_template/py.typed ADDED Viewed

File without changes

test.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+import os
+from dotenv import load_dotenv
+# Load the .env file
+load_dotenv()
+# Retrieve HF_TOKEN from the environment variables
+hf_token = os.getenv("HF_TOKEN")
+gemini_api_key = os.getenv("GEMINI_API_KEY");
+model = LiteLLMModel(
+    model_id="gemini/gemini-2.5-flash",
+    temperature=0.2
+)
+agent = CodeAgent(
+    tools=[query_market_asset],
+    model=model,
+    max_steps=5
+)
+llm = HuggingFaceInferenceAPI(
+    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
+    temperature=0.7,
+    max_tokens=100,
+    token=hf_token,
+    provider="auto"
+)
+response = llm.complete("Hello, how are you?")
+print(response)
+# I am good, how can I help you today?

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff