Spaces:
Running
Running
Add search_kaggle_datasets
Browse files
app.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
-
from smolagents import CodeAgent,DuckDuckGoSearchTool,
|
| 2 |
import datetime
|
| 3 |
import requests
|
| 4 |
import pytz
|
| 5 |
import yaml
|
| 6 |
from tools.final_answer import FinalAnswerTool
|
| 7 |
|
|
|
|
|
|
|
| 8 |
from Gradio_UI import GradioUI
|
| 9 |
|
| 10 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
|
@@ -18,6 +20,58 @@ def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return
|
|
| 18 |
"""
|
| 19 |
return "What magic will you build ?"
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
@tool
|
| 22 |
def get_current_time_in_timezone(timezone: str) -> str:
|
| 23 |
"""A tool that fetches the current local time in a specified timezone.
|
|
|
|
| 1 |
+
from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
|
| 2 |
import datetime
|
| 3 |
import requests
|
| 4 |
import pytz
|
| 5 |
import yaml
|
| 6 |
from tools.final_answer import FinalAnswerTool
|
| 7 |
|
| 8 |
+
from kaggle.api.kaggle_api_extended import KaggleApi
|
| 9 |
+
|
| 10 |
from Gradio_UI import GradioUI
|
| 11 |
|
| 12 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
|
|
|
| 20 |
"""
|
| 21 |
return "What magic will you build ?"
|
| 22 |
|
| 23 |
+
@tool
|
| 24 |
+
def search_kaggle_datasets(search_term:str, kaggle_username=None:str, kaggle_key=None:str, max_results:int)-> str:
|
| 25 |
+
"""Search for datasets on Kaggle based on a search term.
|
| 26 |
+
Args:
|
| 27 |
+
search_term: The term to search for.
|
| 28 |
+
kaggle_username: Your Kaggle username.
|
| 29 |
+
kaggle_key: Your Kaggle API key.
|
| 30 |
+
max_results: Maximum number of results to return.
|
| 31 |
+
"""
|
| 32 |
+
# Initialize the Kaggle API
|
| 33 |
+
api = KaggleApi()
|
| 34 |
+
|
| 35 |
+
# Authenticate using provided credentials
|
| 36 |
+
if kaggle_username and kaggle_key:
|
| 37 |
+
# Create a temporary kaggle.json file
|
| 38 |
+
kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
|
| 39 |
+
kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
|
| 40 |
+
os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
|
| 41 |
+
with open(kaggle_json_path, "w") as f:
|
| 42 |
+
f.write(kaggle_json_content)
|
| 43 |
+
os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
|
| 44 |
+
else:
|
| 45 |
+
# Use the default kaggle.json file if no credentials are provided
|
| 46 |
+
return 'Error in searching Kaggle datasets: No username or key provided.'
|
| 47 |
+
|
| 48 |
+
api.authenticate()
|
| 49 |
+
|
| 50 |
+
# Search for datasets
|
| 51 |
+
datasets = api.dataset_list(search=search_term)
|
| 52 |
+
|
| 53 |
+
# Limit the number of results
|
| 54 |
+
datasets = datasets[:max_results]
|
| 55 |
+
|
| 56 |
+
# Extract relevant information
|
| 57 |
+
results = []
|
| 58 |
+
for dataset in datasets:
|
| 59 |
+
dataset_info = api.dataset_view(dataset)
|
| 60 |
+
results.append({
|
| 61 |
+
'title': dataset_info['title'],
|
| 62 |
+
'url': f"https://www.kaggle.com/{dataset_info['ref']}",
|
| 63 |
+
'size': dataset_info['size'],
|
| 64 |
+
'files': dataset_info['files'],
|
| 65 |
+
'last_updated': dataset_info['lastUpdated']
|
| 66 |
+
})
|
| 67 |
+
|
| 68 |
+
# Clean up the temporary kaggle.json file if it was created
|
| 69 |
+
if kaggle_username and kaggle_key:
|
| 70 |
+
os.remove(kaggle_json_path)
|
| 71 |
+
|
| 72 |
+
return results
|
| 73 |
+
|
| 74 |
+
|
| 75 |
@tool
|
| 76 |
def get_current_time_in_timezone(timezone: str) -> str:
|
| 77 |
"""A tool that fetches the current local time in a specified timezone.
|