|
|
from smolagents import DuckDuckGoSearchTool |
|
|
from smolagents import Tool |
|
|
from huggingface_hub import InferenceClient |
|
|
import soundfile as sf |
|
|
import torch |
|
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline |
|
|
from datasets import load_dataset |
|
|
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor |
|
|
from qwen_vl_utils import process_vision_info |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
from pydub import AudioSegment |
|
|
import numpy as np |
|
|
from scipy.io import wavfile |
|
|
import pandas as pd |
|
|
from PIL import Image |
|
|
import html5lib |
|
|
|
|
|
class read_python_file(Tool): |
|
|
name = "reader_python" |
|
|
description = "Read a python file. Will return the entire code in the file" |
|
|
inputs = { |
|
|
"file_name": { |
|
|
"type": "string", |
|
|
"description": "The python file path" |
|
|
}, |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, file_name): |
|
|
|
|
|
try: |
|
|
with open(file_name, "r", encoding="utf-8") as fichier: |
|
|
contenu = fichier.read() |
|
|
print("Contenu du fichier :\n") |
|
|
return contenu |
|
|
|
|
|
except FileNotFoundError: |
|
|
print(f"Erreur : le fichier '{fichier_cible}' n'existe pas.") |
|
|
return |
|
|
except Exception as e: |
|
|
print(f"Une erreur est survenue : {e}") |
|
|
return |
|
|
|
|
|
class read_excel_file(Tool): |
|
|
name = "reader_excel" |
|
|
description = "Read a excel file. Will return the entire info in the file" |
|
|
inputs = { |
|
|
"file_name": { |
|
|
"type": "string", |
|
|
"description": "The excel file path" |
|
|
}, |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, file_name): |
|
|
|
|
|
try: |
|
|
|
|
|
xls = pd.ExcelFile(file_name) |
|
|
print("Sheets :", xls.sheet_names) |
|
|
|
|
|
|
|
|
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0]) |
|
|
print("\nContent of the sheet :\n") |
|
|
return df |
|
|
|
|
|
except FileNotFoundError: |
|
|
print(f"Erreur : le fichier '{fichier_excel}' n'existe pas.") |
|
|
return |
|
|
except Exception as e: |
|
|
print(f"Une erreur est survenue : {e}") |
|
|
return |
|
|
|
|
|
|
|
|
class is_commutative(Tool): |
|
|
name = "commutative" |
|
|
description = "Performs a study on a table set to see if it is commutative." |
|
|
inputs = { |
|
|
"set": { |
|
|
"type": "array", |
|
|
"items": {"type": "string"}, |
|
|
"description": "The set defined, for example : {'a','b','c'}." |
|
|
}, |
|
|
"table": { |
|
|
"type": "string", |
|
|
"description": "The table in markdown format with rows separated by '\n'. Give only the table after the '|---|---|---|---|---|---|' symbol, starting with the '\n', ending with '\n\n', as a string." |
|
|
} |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, set, table): |
|
|
set_0=list(sorted(set)) |
|
|
table_0=np.empty((len(set_0)+1,len(set_0)+1),dtype='<U5') |
|
|
|
|
|
for i in range(len(set_0)): |
|
|
table_0[0,i+1]=set_0[i] |
|
|
|
|
|
k=1 |
|
|
l=0 |
|
|
for item in table: |
|
|
if item=='\n' or item=='|': |
|
|
table_0=table_0 |
|
|
else: |
|
|
table_0[k,l]=item |
|
|
l=l+1 |
|
|
if(l==6): |
|
|
l=0 |
|
|
k=k+1 |
|
|
|
|
|
for i in range(1,len(set_0)+1): |
|
|
for j in range(1,len(set_0)+1): |
|
|
if (table_0[i,j]!=table_0[j,i]): |
|
|
return f"Not commutative, because of the elements: {set_0[i-1]} and {set_0[j-1]}" |
|
|
return "It is commutative" |
|
|
|
|
|
class Web_research(Tool): |
|
|
name="web_search" |
|
|
description = "Web search on a specific topic." |
|
|
inputs = { |
|
|
"topic": { |
|
|
"type": "string", |
|
|
"description": "The topic on which the user wants the latest news" |
|
|
} |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, topic: str): |
|
|
search_tool = DuckDuckGoSearchTool() |
|
|
|
|
|
results = search_tool(f"{topic}") |
|
|
return f"Here is what we can find on the web for {topic} : str({results})" |
|
|
|
|
|
class Find_wikipedia_URL(Tool): |
|
|
name="wiki_url_tool" |
|
|
description = "Always use to check a wikipedia ENGLISH URL page before trying to acces the URL. For another langage, you just have to change the beginning of the url (here, it is en for english)" |
|
|
inputs = { |
|
|
"subject": { |
|
|
"type": "string", |
|
|
"description": "The name or topic on which you want the Wikipedia URL" |
|
|
} |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, subject: str): |
|
|
words=subject.split() |
|
|
url_wiki="https://en.wikipedia.org/wiki/" |
|
|
for i in range(len(words)): |
|
|
if(i==0): |
|
|
url_wiki+=str(words[i]) |
|
|
if(i!=0): |
|
|
url_wiki+='_'+str(words[i]) |
|
|
return f"Here is what we url to use : str({url_wiki}). If it does not work, change the first letters of {subject} to be upper or lower, but never change anything else" |
|
|
|
|
|
class translate_everything(Tool): |
|
|
name="translator" |
|
|
description = "You do not understand a sentence? It does not look like any language you know? Try this tool, maybe the sentence is just reversed!" |
|
|
inputs = { |
|
|
"sentence": { |
|
|
"type": "string", |
|
|
"description": "The sentence to translate" |
|
|
} |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, sentence: str): |
|
|
|
|
|
reversed_words = sentence.split() |
|
|
right_sentence=[] |
|
|
for word in reversed_words: |
|
|
right_sentence.append(word[::-1]) |
|
|
|
|
|
translated_sentence = " ".join(right_sentence[::-1]) |
|
|
return f"The translated sentence is : {translated_sentence}" |
|
|
|
|
|
class image_interpreter(Tool): |
|
|
name = "image_tool" |
|
|
description = "Allows you to convert an image to text (the function will describe the image)." |
|
|
inputs = { |
|
|
'image': {"type": "image", "description": "The image of interest, png format or jpeg (must be the local path of the image)"}, |
|
|
'prompt': {"type": "string", "description": "The function returns a description of the image, but you can request specific details with this prompt to ensure they are included in the description."}, |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, image, prompt): |
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model = Qwen2VLForConditionalGeneration.from_pretrained( |
|
|
"Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto" |
|
|
) |
|
|
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct") |
|
|
|
|
|
messages = [ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{"type": "image", "image": Image.open(image).convert("RGB")}, |
|
|
{"type": "text", "text": "describe this image, with as much details as you can" + str(prompt)}, |
|
|
], |
|
|
} |
|
|
] |
|
|
|
|
|
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
|
image_inputs, video_inputs = process_vision_info(messages) |
|
|
inputs = processor( |
|
|
text=[text], |
|
|
images=image_inputs, |
|
|
padding=True, |
|
|
return_tensors="pt", |
|
|
).to(device) |
|
|
|
|
|
generated_ids = model.generate(**inputs, max_new_tokens=128) |
|
|
generated_ids_trimmed = [ |
|
|
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) |
|
|
] |
|
|
output_text = processor.batch_decode( |
|
|
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False |
|
|
) |
|
|
|
|
|
return output_text[0] |
|
|
|
|
|
|
|
|
class audio_or_mp3__interpreter(Tool): |
|
|
name="audio_tool" |
|
|
description = "Allows you to convert audio into text. It uses Whisper, it is a state-of-the-art model for automatic speech recognition (ASR) and speech translation" |
|
|
inputs = { |
|
|
'audio': {"type": "audio", "description": "the audio of interest (must be the local path to this audio file). Must be in the format mp3."} |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, audio): |
|
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
model_id = "openai/whisper-large-v3" |
|
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype) |
|
|
model.to(device) |
|
|
processor = AutoProcessor.from_pretrained(model_id) |
|
|
|
|
|
pipe = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=model, |
|
|
tokenizer=processor.tokenizer, |
|
|
feature_extractor=processor.feature_extractor, |
|
|
torch_dtype=torch_dtype, |
|
|
device=device |
|
|
) |
|
|
|
|
|
audio = AudioSegment.from_mp3(audio) |
|
|
audio = audio.set_channels(1).set_frame_rate(16000) |
|
|
audio.export("output.wav", format="wav") |
|
|
|
|
|
result = pipe("output.wav", return_timestamps=True) |
|
|
return result["text"] |
|
|
|
|
|
class Wikipedia_reader(Tool): |
|
|
name="wiki_tool" |
|
|
description = "To be used whenever you need to read a Wikipedia page. Will return all wikitables, to easily read it and find information" |
|
|
inputs = { |
|
|
"url": { |
|
|
"type": "string", |
|
|
"description": "The wikippedia url page" |
|
|
} |
|
|
} |
|
|
output_type = "string" |
|
|
|
|
|
def forward(self, url: str): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tables=pd.read_html(url) |
|
|
return tables |