First_agent_template

Sleeping

App Files Files Community

sam9407287 commited on Feb 19, 2025

Commit

ca94c97

verified ·

1 Parent(s): bae8644

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -17

app.py CHANGED Viewed

@@ -4,35 +4,50 @@ import requests
 import pytz
 import yaml
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
-# Below is an example of a tool that does nothing. Amaze us with your creativity !
 @tool
-def generate_image(prompt: str) -> bytes:
     """
-    Generates an image from a text prompt using a Hugging Face model.
     Args:
-        prompt: The text prompt to generate the image from.  A detailed and descriptive prompt will yield better results. Consider including details about the style, composition, and subject of the image you want. For example: "A photorealistic image of a cat wearing a hat, sitting in a field of flowers, under a blue sky."
     Returns:
-        bytes: The image data in bytes format, or an error message as bytes.
     """
     try:
-        payload = {
-            "inputs": prompt,
-        }
-        response = requests.post(API_URL, headers=headers, json=payload)
-        response.raise_for_status()
-        image = response.content
-        return image
-    except requests.exceptions.RequestException as e:
-        return f"Error generating image: {str(e)}".encode('utf-8')
     except Exception as e:
-        return f"An unexpected error occurred: {str(e)}".encode('utf-8')
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
     """A tool that fetches the current local time in a specified timezone.
@@ -70,7 +85,7 @@ with open("prompts.yaml", 'r') as stream:
 agent = CodeAgent(
     model=model,
-    tools=[final_answer], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,

 import pytz
 import yaml
 from tools.final_answer import FinalAnswerTool
+import os
+import scipy.io.wavfile
+from transformers import BarkModel, BarkTokenizer
 from Gradio_UI import GradioUI
+# Load Bark model and tokenizer
+tokenizer = BarkTokenizer.from_pretrained("suno/bark-small")
+model = BarkModel.from_pretrained("suno/bark-small")
+# Define the text-to-speech tool
 @tool
+def text_to_speech(text: str) -> bytes:
     """
+    Converts text to speech using the Bark model.
     Args:
+        text: The text to be converted to speech.
     Returns:
+        bytes: The audio data in WAV format.
     """
     try:
+        inputs = tokenizer(text, return_tensors="pt")
+        audio_array = model.generate(**inputs)
+        # Convert the generated audio array to a NumPy array
+        audio_array = audio_array.cpu().numpy()
+        # Save the audio array as a WAV file
+        rate = 16000 # Bark model sample rate
+        filename = "output.wav"
+        scipy.io.wavfile.write(filename, rate, audio_array)
+        # Read the WAV file and convert it to bytes
+        with open(filename, "rb") as f:
+            audio_bytes = f.read()
+        return audio_bytes
     except Exception as e:
+        return f"Error converting text to speech: {str(e)}".encode('utf-8')
+# Define the get current time in timezone tool
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
     """A tool that fetches the current local time in a specified timezone.
 agent = CodeAgent(
     model=model,
+    tools=[final_answer, text_to_speech], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,