SolshineMisfit commited on
Commit
8b3a261
·
verified ·
1 Parent(s): 894372d

Added Google and DuckDuckGo Tools plus changed structured dataset handling for upload to hub

Browse files
Files changed (1) hide show
  1. app.py +61 -15
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
2
  import datetime
3
  import requests
4
  import pytz
@@ -19,6 +19,10 @@ Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the w
19
  # Set up API key in environment variable as expected by HfApiModel
20
  os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
21
 
 
 
 
 
22
  #@weave.op()
23
  def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
24
  """Enhanced Perplexity API call with explicit model tracking."""
@@ -56,7 +60,8 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
56
 
57
  Args:
58
  dataset_name: Name for the dataset (will be prefixed with username)
59
- conversation_data: String representing the conversation data
 
60
 
61
  Returns:
62
  URL of the created dataset or error message
@@ -79,21 +84,60 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
79
 
80
  print(f"Creating dataset: {repo_id}")
81
 
82
- # Create a simple dataset from a dictionary
83
- data = {
84
- "text": [conversation_data],
85
- "timestamp": [datetime.datetime.now().isoformat()],
86
- "id": [str(uuid.uuid4())]
87
- }
88
 
89
- # Create the dataset directly
90
- dataset = Dataset.from_dict(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Push to Hugging Face Hub using the simpler method from documentation
93
  dataset.push_to_hub(
94
- repo_id=repo_id, # Include username in repo_id
95
- token=api_key, # Pass token explicitly
96
- private=False # Make it public
97
  )
98
 
99
  # Generate the URL for the dataset
@@ -217,7 +261,9 @@ agent = CodeAgent(
217
  model=model,
218
  tools=[
219
  final_answer,
220
- Sonar_Web_Search_Tool,
 
 
221
  get_current_time_in_timezone,
222
  image_generation_tool,
223
  Dataset_Creator_Tool,
 
1
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, GoogleSearchTool, HfApiModel, load_tool, tool
2
  import datetime
3
  import requests
4
  import pytz
 
19
  # Set up API key in environment variable as expected by HfApiModel
20
  os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
21
 
22
+ # Initialize the standard search tools
23
+ ddg_search_tool = DuckDuckGoSearchTool(max_results=10) # Default is 10 results
24
+ google_search_tool = GoogleSearchTool(provider='serpapi') # Using serpapi as the provider
25
+
26
  #@weave.op()
27
  def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
28
  """Enhanced Perplexity API call with explicit model tracking."""
 
60
 
61
  Args:
62
  dataset_name: Name for the dataset (will be prefixed with username)
63
+ conversation_data: String representing the conversation data, can be structured
64
+ with pipe-separated values (col1 | col2 | col3) for tabular data
65
 
66
  Returns:
67
  URL of the created dataset or error message
 
84
 
85
  print(f"Creating dataset: {repo_id}")
86
 
87
+ # Check if data is structured (contains pipe separators and multiple lines)
88
+ lines = conversation_data.strip().split('\n')
89
+ is_structured = '|' in conversation_data and len(lines) > 1
 
 
 
90
 
91
+ if is_structured:
92
+ print("Detected structured data with multiple rows")
93
+
94
+ # Parse the header row for column names
95
+ header = lines[0].strip()
96
+ headers = [col.strip() for col in header.split('|')]
97
+
98
+ # Parse the data rows
99
+ data_dict = {header: [] for header in headers}
100
+
101
+ # Add a timestamp and id column
102
+ data_dict['timestamp'] = []
103
+ data_dict['id'] = []
104
+
105
+ # Process each data row
106
+ for i, line in enumerate(lines[1:]):
107
+ if not line.strip():
108
+ continue
109
+
110
+ values = [val.strip() for val in line.split('|')]
111
+
112
+ # Ensure we have the right number of values
113
+ if len(values) == len(headers):
114
+ for j, header in enumerate(headers):
115
+ data_dict[header].append(values[j])
116
+
117
+ # Add timestamp and ID for each row
118
+ data_dict['timestamp'].append(datetime.datetime.now().isoformat())
119
+ data_dict['id'].append(str(uuid.uuid4()))
120
+ else:
121
+ print(f"Warning: Skipping row {i+1} due to mismatch in column count")
122
+
123
+ # Create dataset from structured data
124
+ dataset = Dataset.from_dict(data_dict)
125
+ print(f"Created structured dataset with {len(data_dict[headers[0]])} rows and {len(data_dict)} columns")
126
+ else:
127
+ # Handle as regular text data (single row)
128
+ print("Processing as regular text data")
129
+ data = {
130
+ "text": [conversation_data],
131
+ "timestamp": [datetime.datetime.now().isoformat()],
132
+ "id": [str(uuid.uuid4())]
133
+ }
134
+ dataset = Dataset.from_dict(data)
135
 
136
+ # Push to Hugging Face Hub
137
  dataset.push_to_hub(
138
+ repo_id=repo_id,
139
+ token=api_key,
140
+ private=False
141
  )
142
 
143
  # Generate the URL for the dataset
 
261
  model=model,
262
  tools=[
263
  final_answer,
264
+ Sonar_Web_Search_Tool,
265
+ ddg_search_tool, # Added DuckDuckGo search tool
266
+ google_search_tool, # Added Google search tool
267
  get_current_time_in_timezone,
268
  image_generation_tool,
269
  Dataset_Creator_Tool,