manasch commited on
Commit
3e07b9c
·
verified ·
1 Parent(s): f2d4c46

update readme, add error messages, time logging

Browse files
README.md CHANGED
@@ -8,8 +8,21 @@ sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- models: ["onlycaps/pace_model_weights", "Salesforce/blip-image-captioning-large", "facebook/musicgen-small"]
12
- tags: ["image2music-generation", "image-captioning"]
 
 
 
 
 
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ models:
12
+ - onlycaps/pace_model_weights
13
+ - Salesforce/blip-image-captioning-large
14
+ - facebook/musicgen-small
15
+ tags:
16
+ - "image2music-generation"
17
+ - "image-captioning"
18
  ---
19
 
20
+ # Audio Palette
21
+
22
+ ### Usage
23
+
24
+ Since this space is running on CPU, it is not possible to generate music in a reasonable time.
25
+
26
+ To address this, we have provided a [python notebook](./notebooks/AudioPalette.ipynb) that handles the music generation part which can be run locally (if you have GPU) or elsewhere.
27
+
28
+ This uses fastAPI to accept api requests and ngrok to expose the server. The same ngrok link needs to be pasted in the input box. (Make sure to include the trailing `/`).
lib/sentiment_analyser.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import string
3
  from collections import Counter
4
- from datetime import datetime
5
  from pathlib import Path
6
 
7
  import nltk
@@ -13,9 +13,11 @@ from nltk.tokenize import word_tokenize
13
  from utils import *
14
 
15
  datetime_format = "%d/%m/%Y %H:%M:%S"
16
-
17
  def now():
18
- return datetime.now().strftime(datetime_format)
 
 
19
 
20
  class SentimentAnalyser:
21
  def __init__(self):
 
1
  import os
2
  import string
3
  from collections import Counter
4
+ from datetime import datetime, timezone, timedelta
5
  from pathlib import Path
6
 
7
  import nltk
 
13
  from utils import *
14
 
15
  datetime_format = "%d/%m/%Y %H:%M:%S"
16
+ ist_offset = timedelta(hours=5, minutes=30)
17
  def now():
18
+ utc_time = datetime.now(timezone.utc)
19
+ ist_time = utc_time.astimezone(timezone(ist_offset))
20
+ return ist_time.strftime(datetime_format)
21
 
22
  class SentimentAnalyser:
23
  def __init__(self):
utils/audio_palette.py CHANGED
@@ -1,15 +1,19 @@
1
  import typing
2
- from datetime import datetime
3
 
4
  import PIL
5
  from PIL import Image
6
  from moviepy.editor import *
 
7
 
8
  from lib import *
9
 
10
  datetime_format = "%d/%m/%Y %H:%M:%S"
 
11
  def now():
12
- return datetime.now().strftime(datetime_format)
 
 
13
 
14
  class AudioPalette:
15
  def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
@@ -36,12 +40,22 @@ class AudioPalette:
36
 
37
  return prompt
38
 
39
- def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
 
 
 
 
 
40
  pace = self.pace_model.predict(input_image)
41
  print(f"[{now()}]", pace)
42
  print(f"[{now()}] Pace Prediction Done")
43
 
44
- generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
 
 
 
 
 
45
  print(f"[{now()}]", generated_text)
46
  print(f"[{now()}] Captioning Done")
47
 
@@ -71,7 +85,12 @@ class AudioPalette:
71
  concat_clip.write_videofile(file_name, fps=24)
72
  return file_name
73
 
74
- def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
 
 
 
 
 
75
  images = [Image.open(image_path) for image_path in file_paths]
76
  pace = []
77
  generated_text = []
@@ -86,9 +105,14 @@ class AudioPalette:
86
  print(f"[{now()}] Pace Prediction Done")
87
 
88
  # Generating the caption for all the images
89
- for image in images:
90
- caption = self.image_captioning.query(image)[0].get("generated_text")
91
- generated_text.append(caption)
 
 
 
 
 
92
  print(f"[{now()}]", generated_text)
93
  print(f"[{now()}] Captioning Done")
94
 
 
1
  import typing
2
+ from datetime import datetime, timezone, timedelta
3
 
4
  import PIL
5
  from PIL import Image
6
  from moviepy.editor import *
7
+ from gradio import Error
8
 
9
  from lib import *
10
 
11
  datetime_format = "%d/%m/%Y %H:%M:%S"
12
+ ist_offset = timedelta(hours=5, minutes=30)
13
  def now():
14
+ utc_time = datetime.now(timezone.utc)
15
+ ist_time = utc_time.astimezone(timezone(ist_offset))
16
+ return ist_time.strftime(datetime_format)
17
 
18
  class AudioPalette:
19
  def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
 
40
 
41
  return prompt
42
 
43
+ def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: typing.Union[str, None]):
44
+ if not ngrok_endpoint:
45
+ print(f"[{now()}] ngrok endpoint missing")
46
+ raise Error("ngrok endpoint missing")
47
+ print(f"[{now()}] {ngrok_endpoint}")
48
+
49
  pace = self.pace_model.predict(input_image)
50
  print(f"[{now()}]", pace)
51
  print(f"[{now()}] Pace Prediction Done")
52
 
53
+ try:
54
+ generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
55
+ except Exception as e:
56
+ print(f"[{now()}] image captioning error")
57
+ raise Error(repr(e))
58
+
59
  print(f"[{now()}]", generated_text)
60
  print(f"[{now()}] Captioning Done")
61
 
 
85
  concat_clip.write_videofile(file_name, fps=24)
86
  return file_name
87
 
88
+ def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: typing.Union[str, None]):
89
+ if not ngrok_endpoint:
90
+ print(f"[{now()}] ngrok endpoint missing")
91
+ raise Error("ngrok endpoint missing")
92
+ print(f"[{now()}] {ngrok_endpoint}")
93
+
94
  images = [Image.open(image_path) for image_path in file_paths]
95
  pace = []
96
  generated_text = []
 
105
  print(f"[{now()}] Pace Prediction Done")
106
 
107
  # Generating the caption for all the images
108
+ try:
109
+ for image in images:
110
+ caption = self.image_captioning.query(image)[0].get("generated_text")
111
+ generated_text.append(caption)
112
+ except Exception as e:
113
+ print(f"[{now()}] image captioning error")
114
+ raise Error(repr(e))
115
+
116
  print(f"[{now()}]", generated_text)
117
  print(f"[{now()}] Captioning Done")
118
 
utils/gradio_helper.py CHANGED
@@ -21,7 +21,7 @@ def single_image_interface(model: AudioPalette):
21
  gr.Textbox(
22
  lines=1,
23
  placeholder="ngrok endpoint",
24
- label="colab endpoint",
25
  show_label=True,
26
  container=True,
27
  type="text",
@@ -95,7 +95,7 @@ def multi_image_interface(model: AudioPalette):
95
  gr.Textbox(
96
  lines=1,
97
  placeholder="ngrok endpoint",
98
- label="colab endpoint",
99
  show_label=True,
100
  container=True,
101
  type="text",
 
21
  gr.Textbox(
22
  lines=1,
23
  placeholder="ngrok endpoint",
24
+ label="ngrok endpoint",
25
  show_label=True,
26
  container=True,
27
  type="text",
 
95
  gr.Textbox(
96
  lines=1,
97
  placeholder="ngrok endpoint",
98
+ label="ngrok endpoint",
99
  show_label=True,
100
  container=True,
101
  type="text",