Spaces:

onlycaps
/

audio_palette

Sleeping

App Files Files Community

manasch commited on Dec 3, 2023

Commit

3e07b9c

verified ·

1 Parent(s): f2d4c46

update readme, add error messages, time logging

Browse files

Files changed (4) hide show

README.md +16 -3
lib/sentiment_analyser.py +5 -3
utils/audio_palette.py +32 -8
utils/gradio_helper.py +2 -2

README.md CHANGED Viewed

@@ -8,8 +8,21 @@ sdk_version: 4.7.1
 app_file: app.py
 pinned: false
 license: mit
-models: ["onlycaps/pace_model_weights", "Salesforce/blip-image-captioning-large", "facebook/musicgen-small"]
-tags: ["image2music-generation", "image-captioning"]
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: mit
+models:
+- onlycaps/pace_model_weights
+- Salesforce/blip-image-captioning-large
+- facebook/musicgen-small
+tags:
+- "image2music-generation"
+- "image-captioning"
 ---
+# Audio Palette
+### Usage
+Since this space is running on CPU, it is not possible to generate music in a reasonable time.
+To address this, we have provided a [python notebook](./notebooks/AudioPalette.ipynb) that handles the music generation part which can be run locally (if you have GPU) or elsewhere.
+This uses fastAPI to accept api requests and ngrok to expose the server. The same ngrok link needs to be pasted in the input box. (Make sure to include the trailing `/`).

lib/sentiment_analyser.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import string
 from collections import Counter
-from datetime import datetime
 from pathlib import Path
 import nltk
@@ -13,9 +13,11 @@ from nltk.tokenize import word_tokenize
 from utils import *
 datetime_format = "%d/%m/%Y %H:%M:%S"
 def now():
-    return datetime.now().strftime(datetime_format)
 class SentimentAnalyser:
     def __init__(self):

 import os
 import string
 from collections import Counter
+from datetime import datetime, timezone, timedelta
 from pathlib import Path
 import nltk
 from utils import *
 datetime_format = "%d/%m/%Y %H:%M:%S"
+ist_offset = timedelta(hours=5, minutes=30)
 def now():
+    utc_time = datetime.now(timezone.utc)
+    ist_time = utc_time.astimezone(timezone(ist_offset))
+    return ist_time.strftime(datetime_format)
 class SentimentAnalyser:
     def __init__(self):

utils/audio_palette.py CHANGED Viewed

@@ -1,15 +1,19 @@
 import typing
-from datetime import datetime
 import PIL
 from PIL import Image
 from moviepy.editor import *
 from lib import *
 datetime_format = "%d/%m/%Y %H:%M:%S"
 def now():
-    return datetime.now().strftime(datetime_format)
 class AudioPalette:
     def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
@@ -36,12 +40,22 @@ class AudioPalette:
         return prompt
-    def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
         pace = self.pace_model.predict(input_image)
         print(f"[{now()}]", pace)
         print(f"[{now()}] Pace Prediction Done")
-        generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
         print(f"[{now()}]", generated_text)
         print(f"[{now()}] Captioning Done")
@@ -71,7 +85,12 @@ class AudioPalette:
         concat_clip.write_videofile(file_name, fps=24)
         return file_name
-    def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
         images = [Image.open(image_path) for image_path in file_paths]
         pace = []
         generated_text = []
@@ -86,9 +105,14 @@ class AudioPalette:
         print(f"[{now()}] Pace Prediction Done")
         # Generating the caption for all the images
-        for image in images:
-            caption = self.image_captioning.query(image)[0].get("generated_text")
-            generated_text.append(caption)
         print(f"[{now()}]", generated_text)
         print(f"[{now()}] Captioning Done")

 import typing
+from datetime import datetime, timezone, timedelta
 import PIL
 from PIL import Image
 from moviepy.editor import *
+from gradio import Error
 from lib import *
 datetime_format = "%d/%m/%Y %H:%M:%S"
+ist_offset = timedelta(hours=5, minutes=30)
 def now():
+    utc_time = datetime.now(timezone.utc)
+    ist_time = utc_time.astimezone(timezone(ist_offset))
+    return ist_time.strftime(datetime_format)
 class AudioPalette:
     def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
         return prompt
+    def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: typing.Union[str, None]):
+        if not ngrok_endpoint:
+            print(f"[{now()}] ngrok endpoint missing")
+            raise Error("ngrok endpoint missing")
+        print(f"[{now()}] {ngrok_endpoint}")
         pace = self.pace_model.predict(input_image)
         print(f"[{now()}]", pace)
         print(f"[{now()}] Pace Prediction Done")
+        try:
+            generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
+        except Exception as e:
+            print(f"[{now()}] image captioning error")
+            raise Error(repr(e))
         print(f"[{now()}]", generated_text)
         print(f"[{now()}] Captioning Done")
         concat_clip.write_videofile(file_name, fps=24)
         return file_name
+    def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: typing.Union[str, None]):
+        if not ngrok_endpoint:
+            print(f"[{now()}] ngrok endpoint missing")
+            raise Error("ngrok endpoint missing")
+        print(f"[{now()}] {ngrok_endpoint}")
         images = [Image.open(image_path) for image_path in file_paths]
         pace = []
         generated_text = []
         print(f"[{now()}] Pace Prediction Done")
         # Generating the caption for all the images
+        try:
+            for image in images:
+                caption = self.image_captioning.query(image)[0].get("generated_text")
+                generated_text.append(caption)
+        except Exception as e:
+            print(f"[{now()}] image captioning error")
+            raise Error(repr(e))
         print(f"[{now()}]", generated_text)
         print(f"[{now()}] Captioning Done")

utils/gradio_helper.py CHANGED Viewed

@@ -21,7 +21,7 @@ def single_image_interface(model: AudioPalette):
             gr.Textbox(
                 lines=1,
                 placeholder="ngrok endpoint",
-                label="colab endpoint",
                 show_label=True,
                 container=True,
                 type="text",
@@ -95,7 +95,7 @@ def multi_image_interface(model: AudioPalette):
             gr.Textbox(
                 lines=1,
                 placeholder="ngrok endpoint",
-                label="colab endpoint",
                 show_label=True,
                 container=True,
                 type="text",

             gr.Textbox(
                 lines=1,
                 placeholder="ngrok endpoint",
+                label="ngrok endpoint",
                 show_label=True,
                 container=True,
                 type="text",
             gr.Textbox(
                 lines=1,
                 placeholder="ngrok endpoint",
+                label="ngrok endpoint",
                 show_label=True,
                 container=True,
                 type="text",