Spaces:
Runtime error
Runtime error
Added Google and DuckDuckGo Tools plus changed structured dataset handling for upload to hub
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
|
| 2 |
import datetime
|
| 3 |
import requests
|
| 4 |
import pytz
|
|
@@ -19,6 +19,10 @@ Perplex_Assistant_Prompt = """You are a helpful AI assistant that searches the w
|
|
| 19 |
# Set up API key in environment variable as expected by HfApiModel
|
| 20 |
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
#@weave.op()
|
| 23 |
def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
|
| 24 |
"""Enhanced Perplexity API call with explicit model tracking."""
|
|
@@ -56,7 +60,8 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 56 |
|
| 57 |
Args:
|
| 58 |
dataset_name: Name for the dataset (will be prefixed with username)
|
| 59 |
-
conversation_data: String representing the conversation data
|
|
|
|
| 60 |
|
| 61 |
Returns:
|
| 62 |
URL of the created dataset or error message
|
|
@@ -79,21 +84,60 @@ def Dataset_Creator_Function(dataset_name: str, conversation_data: str) -> str:
|
|
| 79 |
|
| 80 |
print(f"Creating dataset: {repo_id}")
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
"timestamp": [datetime.datetime.now().isoformat()],
|
| 86 |
-
"id": [str(uuid.uuid4())]
|
| 87 |
-
}
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
# Push to Hugging Face Hub
|
| 93 |
dataset.push_to_hub(
|
| 94 |
-
repo_id=repo_id,
|
| 95 |
-
token=api_key,
|
| 96 |
-
private=False
|
| 97 |
)
|
| 98 |
|
| 99 |
# Generate the URL for the dataset
|
|
@@ -217,7 +261,9 @@ agent = CodeAgent(
|
|
| 217 |
model=model,
|
| 218 |
tools=[
|
| 219 |
final_answer,
|
| 220 |
-
Sonar_Web_Search_Tool,
|
|
|
|
|
|
|
| 221 |
get_current_time_in_timezone,
|
| 222 |
image_generation_tool,
|
| 223 |
Dataset_Creator_Tool,
|
|
|
|
| 1 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool, GoogleSearchTool, HfApiModel, load_tool, tool
|
| 2 |
import datetime
|
| 3 |
import requests
|
| 4 |
import pytz
|
|
|
|
| 19 |
# Set up API key in environment variable as expected by HfApiModel
|
| 20 |
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 21 |
|
| 22 |
+
# Initialize the standard search tools
|
| 23 |
+
ddg_search_tool = DuckDuckGoSearchTool(max_results=10) # Default is 10 results
|
| 24 |
+
google_search_tool = GoogleSearchTool(provider='serpapi') # Using serpapi as the provider
|
| 25 |
+
|
| 26 |
#@weave.op()
|
| 27 |
def tracked_perplexity_call(prompt: str, system_messages: str, model_name: str = "sonar-pro", assistant_meta: bool = False):
|
| 28 |
"""Enhanced Perplexity API call with explicit model tracking."""
|
|
|
|
| 60 |
|
| 61 |
Args:
|
| 62 |
dataset_name: Name for the dataset (will be prefixed with username)
|
| 63 |
+
conversation_data: String representing the conversation data, can be structured
|
| 64 |
+
with pipe-separated values (col1 | col2 | col3) for tabular data
|
| 65 |
|
| 66 |
Returns:
|
| 67 |
URL of the created dataset or error message
|
|
|
|
| 84 |
|
| 85 |
print(f"Creating dataset: {repo_id}")
|
| 86 |
|
| 87 |
+
# Check if data is structured (contains pipe separators and multiple lines)
|
| 88 |
+
lines = conversation_data.strip().split('\n')
|
| 89 |
+
is_structured = '|' in conversation_data and len(lines) > 1
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
+
if is_structured:
|
| 92 |
+
print("Detected structured data with multiple rows")
|
| 93 |
+
|
| 94 |
+
# Parse the header row for column names
|
| 95 |
+
header = lines[0].strip()
|
| 96 |
+
headers = [col.strip() for col in header.split('|')]
|
| 97 |
+
|
| 98 |
+
# Parse the data rows
|
| 99 |
+
data_dict = {header: [] for header in headers}
|
| 100 |
+
|
| 101 |
+
# Add a timestamp and id column
|
| 102 |
+
data_dict['timestamp'] = []
|
| 103 |
+
data_dict['id'] = []
|
| 104 |
+
|
| 105 |
+
# Process each data row
|
| 106 |
+
for i, line in enumerate(lines[1:]):
|
| 107 |
+
if not line.strip():
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
values = [val.strip() for val in line.split('|')]
|
| 111 |
+
|
| 112 |
+
# Ensure we have the right number of values
|
| 113 |
+
if len(values) == len(headers):
|
| 114 |
+
for j, header in enumerate(headers):
|
| 115 |
+
data_dict[header].append(values[j])
|
| 116 |
+
|
| 117 |
+
# Add timestamp and ID for each row
|
| 118 |
+
data_dict['timestamp'].append(datetime.datetime.now().isoformat())
|
| 119 |
+
data_dict['id'].append(str(uuid.uuid4()))
|
| 120 |
+
else:
|
| 121 |
+
print(f"Warning: Skipping row {i+1} due to mismatch in column count")
|
| 122 |
+
|
| 123 |
+
# Create dataset from structured data
|
| 124 |
+
dataset = Dataset.from_dict(data_dict)
|
| 125 |
+
print(f"Created structured dataset with {len(data_dict[headers[0]])} rows and {len(data_dict)} columns")
|
| 126 |
+
else:
|
| 127 |
+
# Handle as regular text data (single row)
|
| 128 |
+
print("Processing as regular text data")
|
| 129 |
+
data = {
|
| 130 |
+
"text": [conversation_data],
|
| 131 |
+
"timestamp": [datetime.datetime.now().isoformat()],
|
| 132 |
+
"id": [str(uuid.uuid4())]
|
| 133 |
+
}
|
| 134 |
+
dataset = Dataset.from_dict(data)
|
| 135 |
|
| 136 |
+
# Push to Hugging Face Hub
|
| 137 |
dataset.push_to_hub(
|
| 138 |
+
repo_id=repo_id,
|
| 139 |
+
token=api_key,
|
| 140 |
+
private=False
|
| 141 |
)
|
| 142 |
|
| 143 |
# Generate the URL for the dataset
|
|
|
|
| 261 |
model=model,
|
| 262 |
tools=[
|
| 263 |
final_answer,
|
| 264 |
+
Sonar_Web_Search_Tool,
|
| 265 |
+
ddg_search_tool, # Added DuckDuckGo search tool
|
| 266 |
+
google_search_tool, # Added Google search tool
|
| 267 |
get_current_time_in_timezone,
|
| 268 |
image_generation_tool,
|
| 269 |
Dataset_Creator_Tool,
|