Spaces:
Sleeping
Sleeping
update readme, add error messages, time logging
Browse files- README.md +16 -3
- lib/sentiment_analyser.py +5 -3
- utils/audio_palette.py +32 -8
- utils/gradio_helper.py +2 -2
README.md
CHANGED
|
@@ -8,8 +8,21 @@ sdk_version: 4.7.1
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
models:
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
---
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
models:
|
| 12 |
+
- onlycaps/pace_model_weights
|
| 13 |
+
- Salesforce/blip-image-captioning-large
|
| 14 |
+
- facebook/musicgen-small
|
| 15 |
+
tags:
|
| 16 |
+
- "image2music-generation"
|
| 17 |
+
- "image-captioning"
|
| 18 |
---
|
| 19 |
|
| 20 |
+
# Audio Palette
|
| 21 |
+
|
| 22 |
+
### Usage
|
| 23 |
+
|
| 24 |
+
Since this space is running on CPU, it is not possible to generate music in a reasonable time.
|
| 25 |
+
|
| 26 |
+
To address this, we have provided a [python notebook](./notebooks/AudioPalette.ipynb) that handles the music generation part which can be run locally (if you have GPU) or elsewhere.
|
| 27 |
+
|
| 28 |
+
This uses fastAPI to accept api requests and ngrok to expose the server. The same ngrok link needs to be pasted in the input box. (Make sure to include the trailing `/`).
|
lib/sentiment_analyser.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import string
|
| 3 |
from collections import Counter
|
| 4 |
-
from datetime import datetime
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
import nltk
|
|
@@ -13,9 +13,11 @@ from nltk.tokenize import word_tokenize
|
|
| 13 |
from utils import *
|
| 14 |
|
| 15 |
datetime_format = "%d/%m/%Y %H:%M:%S"
|
| 16 |
-
|
| 17 |
def now():
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
class SentimentAnalyser:
|
| 21 |
def __init__(self):
|
|
|
|
| 1 |
import os
|
| 2 |
import string
|
| 3 |
from collections import Counter
|
| 4 |
+
from datetime import datetime, timezone, timedelta
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
import nltk
|
|
|
|
| 13 |
from utils import *
|
| 14 |
|
| 15 |
datetime_format = "%d/%m/%Y %H:%M:%S"
|
| 16 |
+
ist_offset = timedelta(hours=5, minutes=30)
|
| 17 |
def now():
|
| 18 |
+
utc_time = datetime.now(timezone.utc)
|
| 19 |
+
ist_time = utc_time.astimezone(timezone(ist_offset))
|
| 20 |
+
return ist_time.strftime(datetime_format)
|
| 21 |
|
| 22 |
class SentimentAnalyser:
|
| 23 |
def __init__(self):
|
utils/audio_palette.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
| 1 |
import typing
|
| 2 |
-
from datetime import datetime
|
| 3 |
|
| 4 |
import PIL
|
| 5 |
from PIL import Image
|
| 6 |
from moviepy.editor import *
|
|
|
|
| 7 |
|
| 8 |
from lib import *
|
| 9 |
|
| 10 |
datetime_format = "%d/%m/%Y %H:%M:%S"
|
|
|
|
| 11 |
def now():
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
|
| 14 |
class AudioPalette:
|
| 15 |
def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
|
|
@@ -36,12 +40,22 @@ class AudioPalette:
|
|
| 36 |
|
| 37 |
return prompt
|
| 38 |
|
| 39 |
-
def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
pace = self.pace_model.predict(input_image)
|
| 41 |
print(f"[{now()}]", pace)
|
| 42 |
print(f"[{now()}] Pace Prediction Done")
|
| 43 |
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
print(f"[{now()}]", generated_text)
|
| 46 |
print(f"[{now()}] Captioning Done")
|
| 47 |
|
|
@@ -71,7 +85,12 @@ class AudioPalette:
|
|
| 71 |
concat_clip.write_videofile(file_name, fps=24)
|
| 72 |
return file_name
|
| 73 |
|
| 74 |
-
def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
images = [Image.open(image_path) for image_path in file_paths]
|
| 76 |
pace = []
|
| 77 |
generated_text = []
|
|
@@ -86,9 +105,14 @@ class AudioPalette:
|
|
| 86 |
print(f"[{now()}] Pace Prediction Done")
|
| 87 |
|
| 88 |
# Generating the caption for all the images
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
print(f"[{now()}]", generated_text)
|
| 93 |
print(f"[{now()}] Captioning Done")
|
| 94 |
|
|
|
|
| 1 |
import typing
|
| 2 |
+
from datetime import datetime, timezone, timedelta
|
| 3 |
|
| 4 |
import PIL
|
| 5 |
from PIL import Image
|
| 6 |
from moviepy.editor import *
|
| 7 |
+
from gradio import Error
|
| 8 |
|
| 9 |
from lib import *
|
| 10 |
|
| 11 |
datetime_format = "%d/%m/%Y %H:%M:%S"
|
| 12 |
+
ist_offset = timedelta(hours=5, minutes=30)
|
| 13 |
def now():
|
| 14 |
+
utc_time = datetime.now(timezone.utc)
|
| 15 |
+
ist_time = utc_time.astimezone(timezone(ist_offset))
|
| 16 |
+
return ist_time.strftime(datetime_format)
|
| 17 |
|
| 18 |
class AudioPalette:
|
| 19 |
def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
|
|
|
|
| 40 |
|
| 41 |
return prompt
|
| 42 |
|
| 43 |
+
def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: typing.Union[str, None]):
|
| 44 |
+
if not ngrok_endpoint:
|
| 45 |
+
print(f"[{now()}] ngrok endpoint missing")
|
| 46 |
+
raise Error("ngrok endpoint missing")
|
| 47 |
+
print(f"[{now()}] {ngrok_endpoint}")
|
| 48 |
+
|
| 49 |
pace = self.pace_model.predict(input_image)
|
| 50 |
print(f"[{now()}]", pace)
|
| 51 |
print(f"[{now()}] Pace Prediction Done")
|
| 52 |
|
| 53 |
+
try:
|
| 54 |
+
generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"[{now()}] image captioning error")
|
| 57 |
+
raise Error(repr(e))
|
| 58 |
+
|
| 59 |
print(f"[{now()}]", generated_text)
|
| 60 |
print(f"[{now()}] Captioning Done")
|
| 61 |
|
|
|
|
| 85 |
concat_clip.write_videofile(file_name, fps=24)
|
| 86 |
return file_name
|
| 87 |
|
| 88 |
+
def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: typing.Union[str, None]):
|
| 89 |
+
if not ngrok_endpoint:
|
| 90 |
+
print(f"[{now()}] ngrok endpoint missing")
|
| 91 |
+
raise Error("ngrok endpoint missing")
|
| 92 |
+
print(f"[{now()}] {ngrok_endpoint}")
|
| 93 |
+
|
| 94 |
images = [Image.open(image_path) for image_path in file_paths]
|
| 95 |
pace = []
|
| 96 |
generated_text = []
|
|
|
|
| 105 |
print(f"[{now()}] Pace Prediction Done")
|
| 106 |
|
| 107 |
# Generating the caption for all the images
|
| 108 |
+
try:
|
| 109 |
+
for image in images:
|
| 110 |
+
caption = self.image_captioning.query(image)[0].get("generated_text")
|
| 111 |
+
generated_text.append(caption)
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"[{now()}] image captioning error")
|
| 114 |
+
raise Error(repr(e))
|
| 115 |
+
|
| 116 |
print(f"[{now()}]", generated_text)
|
| 117 |
print(f"[{now()}] Captioning Done")
|
| 118 |
|
utils/gradio_helper.py
CHANGED
|
@@ -21,7 +21,7 @@ def single_image_interface(model: AudioPalette):
|
|
| 21 |
gr.Textbox(
|
| 22 |
lines=1,
|
| 23 |
placeholder="ngrok endpoint",
|
| 24 |
-
label="
|
| 25 |
show_label=True,
|
| 26 |
container=True,
|
| 27 |
type="text",
|
|
@@ -95,7 +95,7 @@ def multi_image_interface(model: AudioPalette):
|
|
| 95 |
gr.Textbox(
|
| 96 |
lines=1,
|
| 97 |
placeholder="ngrok endpoint",
|
| 98 |
-
label="
|
| 99 |
show_label=True,
|
| 100 |
container=True,
|
| 101 |
type="text",
|
|
|
|
| 21 |
gr.Textbox(
|
| 22 |
lines=1,
|
| 23 |
placeholder="ngrok endpoint",
|
| 24 |
+
label="ngrok endpoint",
|
| 25 |
show_label=True,
|
| 26 |
container=True,
|
| 27 |
type="text",
|
|
|
|
| 95 |
gr.Textbox(
|
| 96 |
lines=1,
|
| 97 |
placeholder="ngrok endpoint",
|
| 98 |
+
label="ngrok endpoint",
|
| 99 |
show_label=True,
|
| 100 |
container=True,
|
| 101 |
type="text",
|