Agent-Example

Runtime error

App Files Files Community

SolshineMisfit commited on Mar 6

Commit

8b3a261

verified ·

1 Parent(s): 894372d

Added Google and DuckDuckGo Tools plus changed structured dataset handling for upload to hub

Browse files

Files changed (1) hide show

app.py +61 -15

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
 import datetime
 import requests
 import pytz
@@ -19,6 +19,10 @@ Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the w
 # Set up API key in environment variable as expected by HfApiModel
 os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
 #@weave.op()
 def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
     """Enhanced Perplexity API call with explicit model tracking."""
@@ -56,7 +60,8 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
     Args:
         dataset_name: Name for the dataset (will be prefixed with username)
-        conversation_data: String representing the conversation data
     Returns:
         URL of the created dataset or error message
@@ -79,21 +84,60 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
         print(f"Creating dataset: {repo_id}")
-        # Create a simple dataset from a dictionary
-        data = {
-            "text": [conversation_data],
-            "timestamp": [datetime.datetime.now().isoformat()],
-            "id": [str(uuid.uuid4())]
-        }
-        # Create the dataset directly
-        dataset = Dataset.from_dict(data)
-        # Push to Hugging Face Hub using the simpler method from documentation
         dataset.push_to_hub(
-            repo_id=repo_id,  # Include username in repo_id
-            token=api_key,    # Pass token explicitly
-            private=False     # Make it public
         )
         # Generate the URL for the dataset
@@ -217,7 +261,9 @@ agent = CodeAgent(
     model=model,
     tools=[
         final_answer,
-        Sonar_Web_Search_Tool,
         get_current_time_in_timezone,
         image_generation_tool,
         Dataset_Creator_Tool,

+from smolagents import CodeAgent, DuckDuckGoSearchTool, GoogleSearchTool, HfApiModel, load_tool, tool
 import datetime
 import requests
 import pytz
 # Set up API key in environment variable as expected by HfApiModel
 os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
+# Initialize the standard search tools
+ddg_search_tool = DuckDuckGoSearchTool(max_results=10)  # Default is 10 results
+google_search_tool = GoogleSearchTool(provider='serpapi')  # Using serpapi as the provider
 #@weave.op()
 def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
     """Enhanced Perplexity API call with explicit model tracking."""
     Args:
         dataset_name: Name for the dataset (will be prefixed with username)
+        conversation_data: String representing the conversation data, can be structured
+                          with pipe-separated values (col1 | col2 | col3) for tabular data
     Returns:
         URL of the created dataset or error message
         print(f"Creating dataset: {repo_id}")
+        # Check if data is structured (contains pipe separators and multiple lines)
+        lines = conversation_data.strip().split('\n')
+        is_structured = '|' in conversation_data and len(lines) > 1
+        if is_structured:
+            print("Detected structured data with multiple rows")
+            # Parse the header row for column names
+            header = lines[0].strip()
+            headers = [col.strip() for col in header.split('|')]
+            # Parse the data rows
+            data_dict = {header: [] for header in headers}
+            # Add a timestamp and id column
+            data_dict['timestamp'] = []
+            data_dict['id'] = []
+            # Process each data row
+            for i, line in enumerate(lines[1:]):
+                if not line.strip():
+                    continue
+                values = [val.strip() for val in line.split('|')]
+                # Ensure we have the right number of values
+                if len(values) == len(headers):
+                    for j, header in enumerate(headers):
+                        data_dict[header].append(values[j])
+                    # Add timestamp and ID for each row
+                    data_dict['timestamp'].append(datetime.datetime.now().isoformat())
+                    data_dict['id'].append(str(uuid.uuid4()))
+                else:
+                    print(f"Warning: Skipping row {i+1} due to mismatch in column count")
+            # Create dataset from structured data
+            dataset = Dataset.from_dict(data_dict)
+            print(f"Created structured dataset with {len(data_dict[headers[0]])} rows and {len(data_dict)} columns")
+        else:
+            # Handle as regular text data (single row)
+            print("Processing as regular text data")
+            data = {
+                "text": [conversation_data],
+                "timestamp": [datetime.datetime.now().isoformat()],
+                "id": [str(uuid.uuid4())]
+            }
+            dataset = Dataset.from_dict(data)
+        # Push to Hugging Face Hub
         dataset.push_to_hub(
+            repo_id=repo_id,
+            token=api_key,
+            private=False
         )
         # Generate the URL for the dataset
     model=model,
     tools=[
         final_answer,
+        Sonar_Web_Search_Tool,
+        ddg_search_tool,  # Added DuckDuckGo search tool
+        google_search_tool,  # Added Google search tool
         get_current_time_in_timezone,
         image_generation_tool,
         Dataset_Creator_Tool,