Final_Assignment_Template_V2

Sleeping

App Files Files Community

Final_Assignment_Template_V2 / tools.py

CindyDelage

Update tools.py

c0a5526 verified 12 months ago

raw

history blame

7.38 kB

	from smolagents import DuckDuckGoSearchTool
	from smolagents import Tool
	from huggingface_hub import InferenceClient
	import soundfile as sf
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	from datasets import load_dataset
	from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
	from qwen_vl_utils import process_vision_info

	class Web_research(Tool):
	name="web_research"
	description = "Web search on a specific topic."
	inputs = {
	"topic": {
	"type": "string",
	"description": "The topic on which the user wants the latest news"
	}
	}
	output_type = "string"

	def forward(self, topic: str):
	search_tool = DuckDuckGoSearchTool()
	# Example usage
	results = search_tool(f"{topic}")
	return f"Here is what we can find on the web for {topic} : str({results})"

	class Find_wikipedia_URL(Tool):
	name="wiki_url"
	description = "Always use to check a wikipedia ENGLISH URL page before trying to acces the URL. For another langage, you just have to change the beginning of the url (here, it is en for english)"
	inputs = {
	"subject": {
	"type": "string",
	"description": "The name or topic on which you want the Wikipedia URL"
	}
	}
	output_type = "string"

	def forward(self, subject: str):
	words=subject.split()
	url_wiki="https://en.wikipedia.org/wiki/"
	for i in range(len(words)):
	if(i==0):
	url_wiki+=str(words[i])
	if(i!=0):
	url_wiki+='_'+str(words[i])
	return f"Here is what we url to use : str({url_wiki}). If it does not work, change the first letters of {subject} to be upper or lower, but never change anything else"

	class translate_everything(Tool):
	name="translator"
	description = "You do not understand a sentence? It does not look like any language you know? Try this tool, maybe the sentence is just reversed!"
	inputs = {
	"sentence": {
	"type": "string",
	"description": "The sentence to translate"
	}
	}
	output_type = "string"

	def forward(self, sentence: str):
	# Input string
	reversed_words = sentence.split() #' '.join(s.split()[::-1])
	right_sentence=[]
	for word in reversed_words:
	right_sentence.append(word[::-1])

	translated_sentence = " ".join(right_sentence[::-1])
	return f"The translated sentence is : {translated_sentence}"

	class multimodal_interpreter(Tool):
	name="multimodal_tool"
	description = "Allows you to answer any question which relies on image or video input."
	inputs = {
	'image': {"type": "image", "description": "the image or video of interest"},
	'prompt': {"type": "string", "description": "Any specific question you have on the image. For example, the prompt can be : Describe this image."}
	}
	output_type = "string"

	def forward(self, prompt, image):

	# default: Load the model on the available device(s)
	model = Qwen2VLForConditionalGeneration.from_pretrained(
	"Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
	)

	# We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
	# model = Qwen2VLForConditionalGeneration.from_pretrained(
	# "Qwen/Qwen2-VL-7B-Instruct",
	# torch_dtype=torch.bfloat16,
	# attn_implementation="flash_attention_2",
	# device_map="auto",
	# )

	# default processer
	processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")

	# The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
	# min_pixels = 2562828
	# max_pixels = 12802828
	# processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)

	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "image",
	"image": {image},
	},
	{"type": "text", "text": {prompt}},
	],
	}
	]

	# Preparation for inference
	text = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	image_inputs, video_inputs = process_vision_info(messages)
	inputs = processor(
	text=[text],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	)
	inputs = inputs.to("cuda")

	# Inference: Generation of the output
	generated_ids = model.generate(**inputs, max_new_tokens=128)
	generated_ids_trimmed = [
	out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]
	output_text = processor.batch_decode(
	generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)
	return output_text

	class audio_or_mp3__interpreter(Tool):
	name="multimodal_tool"
	description = "Allows you to convert audio into text. It uses Whisper, it is a state-of-the-art model for automatic speech recognition (ASR) and speech translation"
	inputs = {
	'audio': {"type": "audio", "description": "the audio of interest"}
	}
	output_type = "string"

	def forward(self, prompt, audio):
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model_id = "openai/whisper-large-v3"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	sample = {audio} #sample must be of the type dataset[0]["audio"]

	result = pipe(sample)
	return result["text"]

	class Wikipedia_reader(Tool):
	name="wikipedia_tool"
	description = "To be used whenever you need to read a Wikipedia page. Will return all the text of the Wikipedia page, to easily read it and find information"
	inputs = {
	"url": {
	"type": "string",
	"description": "The wikippedia url page"
	}
	}
	output_type = "string"

	def forward(self, url: str):
	try:
	page = requests.get(url)
	except Exception as e:
	print('Error downloading page: ',e)
	soup = BeautifulSoup(page.text, 'html.parser')
	return soup.text