Spaces:
Build error
Build error
File size: 6,625 Bytes
ab9ff53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import os
from io import BytesIO
from pathlib import Path
from typing import Any
import pandas as pd
import requests
from huggingface_hub import InferenceClient
from smolagents import Tool, tool
from .db import ShelveDB
from .wiki import get_wiki_content
### convert table to markdown
@tool
def convert_pandas_table_to_markdown(table: pd.DataFrame) -> str:
"""
Converts a pandas DataFrame to a markdown table.
Args:
table (pd.DataFrame): The DataFrame to convert.
Returns:
str: The markdown representation of the table.
"""
return str(table.to_markdown())
### fetch text tool
@tool
def fetch_text_content(url: str) -> str:
"""
Fetches the text content from a given URL.
Args:
url (str): The URL to fetch the text from.
Returns:
str: The text content of the page.
"""
try:
response = requests.get(url, timeout=30)
response.raise_for_status() # Raise an error for bad responses
return response.text
except requests.RequestException as e:
return f"Error fetching URL: {e}"
### Storage Tool
class RetrieveCSVStorageTool(Tool):
name = "retrieve_csv_storage_tool"
description = "Retrieves a CSV file from the storage and returns it as a pandas DataFrame."
inputs = {
"key": {
"type": "string",
"description": "The key to retrieve data from the table.",
},
}
output_type = "any"
def __init__(self, table_name: str, init_storage: bool, storage_path: str | None = None, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)
if storage_path is not None:
ShelveDB.dir_path = Path(storage_path)
self.storage = ShelveDB[pd.DataFrame](table_name, init=init_storage)
def get_storage(self) -> ShelveDB[pd.DataFrame]:
return self.storage
def forward(self, key: str) -> pd.DataFrame:
try:
# Retrieve the CSV file from storage
dataframe = self.storage.fetch(key)
except Exception as e:
return f"Error retrieving data: {e}"
else:
if dataframe is None:
raise ValueError(f"No data found for key: {key}")
return dataframe
### Wikipedia Content Extraction Tool
class WikiTool(Tool):
name = "wiki_tool"
description = """Get Wikipedia page content and tables.
Returns a tuple containing the page content and a dictionary of tables extracted from the page.
The page content is prefixed with the retrieved table key ({{table_1}}, {{table_2}}, ...).
To understand what is contained in the tables, it is recommended to first display the content.
Example 1:
content, tables = get_wiki_content("Python_(programming_language)")
print(content)
The retrieved table object is are stored in storage.
They can be retrieved using "retrieve_csv_storage_tool".
Example 2:
table:pd.DataFrame = retrieve_csv_storage_tool("table_1")
"""
inputs = {
"query": {
"type": "string",
"description": "The title of the Wikipedia page to visit. For example, 'Python_(programming_language)'.",
},
"language": {
"type": "string",
"description": "The language of the Wikipedia page. For example, 'en' for English, 'ja' for Japanese.",
},
}
output_type = "array"
def __init__(self, storage: ShelveDB[Any], *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.storage = storage
def forward(self, query: str, language: str) -> tuple[str, dict[str, pd.DataFrame]]:
content, tables = get_wiki_content(query, language)
self.storage.clear()
for table_key, df in tables.items():
self.storage.save(table_key, df)
return content, tables
### Visual Question Answering Tool
def request_visual_qa(client: InferenceClient, question: str, image_url: str) -> str:
contents = [{"type": "text", "text": question}, {"type": "image_url", "image_url": {"url": image_url}}]
res = client.chat_completion(messages=[{"role": "user", "content": contents}], model="qwen/qwen2.5-vl-32b-instruct")
content = res.choices[0].message.content
if content is None:
raise ValueError("No content returned from the model.")
return content
class VisualQATool(Tool):
name = "visual_qa_tool"
description = "A tool that can answer questions about image."
inputs = {
"image_url": {
"type": "string",
"description": "The URL of the image to analyze. No extension needed.",
},
"question": {
"type": "string",
"description": "The question to ask about the image.",
},
}
output_type = "string"
client = InferenceClient(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ["OPENROUTER_API_KEY"],
)
def forward(self, image_url: str, question: str) -> str:
try:
answer = request_visual_qa(self.client, question, image_url)
except Exception as e:
return f"Error: {str(e)}"
else:
return answer
### Speech Recognition Tool
def request_speech_recognition(client: InferenceClient, audio_file: str, model: str = "openai/whisper-large-v3") -> str:
output = client.automatic_speech_recognition(audio_file, model=model)
return output.text
class SpeechRecognitionTool(Tool):
name = "speech_recognition"
description = "Converts audio contents to text"
inputs = {"audio_url": {"type": "string", "description": "URL of the audio file to transcribe. No extension needed."}}
output_type = "string"
client = InferenceClient(provider="fal-ai")
_model = "openai/whisper-large-v3"
def forward(self, audio_url: str) -> str:
try:
transcription = request_speech_recognition(self.client, audio_url, model=self._model)
except Exception as e:
return f"Error: {str(e)}"
else:
return transcription
### Excel Tool
@tool
def read_excel(file_url: str) -> pd.DataFrame:
"""
Reads an Excel file from a given URL and returns the data as a DataFrame.
Args:
file_url (str): URL of the Excel file to read. No extension needed.
Returns:
pd.DataFrame: DataFrame containing the data from the first sheet of the Excel file
"""
res = requests.get(file_url, timeout=30)
res.raise_for_status()
excel_data = BytesIO(res.content)
df = pd.read_excel(excel_data)
return df
|