Spaces:

onlycaps
/

audio_palette

Sleeping

App Files Files Community

manasch commited on Dec 1, 2023

Commit

8a68e19

verified ·

1 Parent(s): 12204b0

add sentiment analyser and refactor code

Browse files

Files changed (12) hide show

.gitattributes +0 -5
.gitignore +5 -0
app.py +8 -203
lib/__init__.py +4 -0
lib/audio_generation.py +0 -1
lib/image_captioning.py +0 -1
lib/pace_model.py +1 -1
lib/sentiment_analyser.py +69 -0
utils/__init__.py +2 -0
utils/audio_palette.py +112 -0
utils/emotions.txt +536 -0
utils/gradio_helper.py +120 -0

.gitattributes CHANGED Viewed

@@ -33,8 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-*.wav filter=lfs diff=lfs merge=lfs -text
-*.jpeg filter=lfs diff=lfs merge=lfs -text
-*.jpg filter=lfs diff=lfs merge=lfs -text
-*.png filter=lfs diff=lfs merge=lfs -text
-*.mp4 filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -8,9 +8,14 @@ __pycache__
 # Video
 *.mp4
 # Audio
 *.wav
 *.mp3
 *.log

 # Video
 *.mp4
+*.mkv
 # Audio
 *.wav
 *.mp3
+# Others
+*.pdf
+*.md
 *.log

app.py CHANGED Viewed

@@ -1,221 +1,26 @@
 import typing
 from pathlib import Path
-import numpy as np
 import gradio as gr
 import PIL
 from PIL import Image
 from moviepy.editor import *
-from lib.audio_generation import AudioGeneration
-from lib.image_captioning import ImageCaptioning
-from lib.pace_model import PaceModel
 pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
 resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
 height, width, channels = (224, 224, 3)
-class AudioPalette:
-    def __init__(self):
-        self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
-        self.image_captioning = ImageCaptioning()
-        self.audio_generation = AudioGeneration()
-        self.pace_map = {
-            "Fast": "high",
-            "Medium": "medium",
-            "Slow": "low"
-        }
-    def prompt_construction(self, caption: str, pace: str, instrument: typing.Union[str, None], first: bool = True):
-        instrument = instrument if instrument is not None else ""
-        if first:
-            prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality"
-        else:
-            prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality. Transitions smoothely from the previous audio while sounding different."
-        return prompt
-    def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
-        pace = self.pace_model.predict(input_image)
-        print("Pace Prediction Done")
-        generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
-        print("Captioning Done")
-        generated_text = generated_text if generated_text is not None else ""
-        prompt = self.prompt_construction(generated_text, pace, instrument)
-        print("Generated Prompt:", prompt)
-        audio_file = self.audio_generation.generate(prompt, ngrok_endpoint)
-        print("Audio Generation Done")
-        outputs = [prompt, pace, generated_text, audio_file]
-        return outputs
-    def stitch_images(self, file_paths: typing.List[str], audio_paths: typing.List[str]):
-        clips = [ImageClip(m).set_duration(5) for m in file_paths]
-        audio_clips = [AudioFileClip(a) for a in audio_paths]
-        concat_audio = concatenate_audioclips(audio_clips)
-        new_audio = CompositeAudioClip([concat_audio])
-        concat_clip = concatenate_videoclips(clips, method="compose")
-        concat_clip.audio = new_audio
-        file_name = "generated_video.mp4"
-        concat_clip.write_videofile(file_name, fps=24)
-        return file_name
-    def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
-        images = [Image.open(image_path) for image_path in file_paths]
-        pace = []
-        generated_text = []
-        prompts = []
-        # Extracting the pace for all the images
-        for image in images:
-            pace_prediction = self.pace_model.predict(image)
-            pace.append(pace_prediction)
-        print("Pace Prediction Done")
-        # Generating the caption for all the images
-        for image in images:
-            caption = self.image_captioning.query(image)[0].get("generated_text")
-            generated_text.append(caption)
-        print("Captioning Done")
-        first = True
-        for generated_caption, pace_pred in zip(generated_text, pace):
-            prompts.append(self.prompt_construction(generated_caption, pace_pred, instrument, first))
-            first = False
-        print("Generated Prompts: ", prompts)
-        audio_file = self.audio_generation.generate(prompts, ngrok_endpoint)
-        print("Audio Generation Done")
-        video_file = self.stitch_images(file_paths, [audio_file])
-        return video_file
-def single_image_interface(model: AudioPalette):
-    demo = gr.Interface(
-        fn=model.generate_single,
-        inputs=[
-            gr.Image(
-                type="pil",
-                label="Upload an image",
-                show_label=True,
-                container=True
-            ),
-            gr.Radio(
-                choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
-                label="Instrument",
-                show_label=True,
-                container=True
-            ),
-            gr.Textbox(
-                lines=1,
-                placeholder="ngrok endpoint",
-                label="colab endpoint",
-                show_label=True,
-                container=True,
-                type="text",
-                visible=True
-            )
-        ],
-        outputs=[
-            gr.Textbox(
-                lines=1,
-                placeholder="Prompt",
-                label="Generated Prompt",
-                show_label=True,
-                container=True,
-                type="text",
-                visible=False
-            ),
-            gr.Textbox(
-                lines=1,
-                placeholder="Pace of the image",
-                label="Pace",
-                show_label=True,
-                container=True,
-                type="text",
-                visible=False
-            ),
-            gr.Textbox(
-                lines=1,
-                placeholder="Caption for the image",
-                label="Caption",
-                show_label=True,
-                container=True,
-                type="text",
-                visible=False
-            ),
-            gr.Audio(
-                label="Generated Audio",
-                show_label=True,
-                container=True,
-                visible=True,
-                format="wav",
-                autoplay=False,
-                show_download_button=True,
-            )
-        ],
-        cache_examples=False,
-        live=False,
-        description="Provide an image to generate an appropriate background soundtrack",
-    )
-    return demo
-def multi_image_interface(model: AudioPalette):
-    demo = gr.Interface(
-        fn=model.generate_multiple,
-        inputs=[
-            gr.File(
-                file_count="multiple",
-                file_types=["image"],
-                type="filepath",
-                label="Upload images",
-                show_label=True,
-                container=True,
-                visible=True
-            ),
-            gr.Radio(
-                choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
-                label="Instrument",
-                show_label=True,
-                container=True
-            ),
-            gr.Textbox(
-                lines=1,
-                placeholder="ngrok endpoint",
-                label="colab endpoint",
-                show_label=True,
-                container=True,
-                type="text",
-                visible=True
-            )
-        ],
-        outputs=[
-            gr.Video(
-                format="mp4",
-                label="Generated Video",
-                show_label=True,
-                container=True,
-                visible=True,
-                autoplay=False,
-            )
-        ],
-        cache_examples=False,
-        live=False,
-        description="Provide images to generate an a slideshow of the images with appropriate music as background",
-    )
-    return demo
 def main():
-    model = AudioPalette()
     tab_1 = single_image_interface(model)
     tab_2 = multi_image_interface(model)

 import typing
 from pathlib import Path
 import gradio as gr
 import PIL
 from PIL import Image
 from moviepy.editor import *
+from utils import *
 pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
 resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
 height, width, channels = (224, 224, 3)
 def main():
+    model = AudioPalette(
+        pace_model_weights_path,
+        resnet50_tf_model_weights_path,
+        height,
+        width,
+        channels
+    )
     tab_1 = single_image_interface(model)
     tab_2 = multi_image_interface(model)

lib/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .audio_generation import AudioGeneration
+from .image_captioning import ImageCaptioning
+from .pace_model import PaceModel
+from .sentiment_analyser import SentimentAnalyser

lib/audio_generation.py CHANGED Viewed

@@ -33,5 +33,4 @@ class AudioGeneration:
         stored_file_path = self.request_generation(prompt)
         audio_file = self.request_download(stored_file_path)
-        print(audio_file)
         return audio_file

         stored_file_path = self.request_generation(prompt)
         audio_file = self.request_download(stored_file_path)
         return audio_file

lib/image_captioning.py CHANGED Viewed

@@ -26,5 +26,4 @@ class ImageCaptioning:
             headers=self.headers,
             data=self.convert_to_bytes(image)
         )
-        print(response.json())
         return response.json()

             headers=self.headers,
             data=self.convert_to_bytes(image)
         )
         return response.json()

lib/pace_model.py CHANGED Viewed

@@ -52,5 +52,5 @@ class PaceModel:
         image = np.expand_dims(resized_image, axis=0)
         prediction = self.resnet_model.predict(image)
-        print(prediction, np.argmax(prediction))
         return self.class_names[np.argmax(prediction)]

         image = np.expand_dims(resized_image, axis=0)
         prediction = self.resnet_model.predict(image)
+        # print(prediction, np.argmax(prediction))
         return self.class_names[np.argmax(prediction)]

lib/sentiment_analyser.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import string
+from collections import Counter
+from datetime import datetime
+from pathlib import Path
+import nltk
+from nltk.corpus import stopwords
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+from utils import *
+datetime_format = "%d/%m/%Y %H:%M:%S"
+def now():
+    return datetime.now().strftime(datetime_format)
+class SentimentAnalyser:
+    def __init__(self):
+        nltk.download('punkt')
+        nltk.download('stopwords')
+        nltk.download('wordnet')
+        self.emotions = Path("utils/emotions.txt").resolve()
+    def sentiment(self, text):
+        prompt = text
+        lower_case = text.lower()
+        cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
+        # Using word_tokenize because it's faster than split()
+        tokenized_words = word_tokenize(cleaned_text, "english")
+        # Removing Stop Words
+        final_words = []
+        for word in tokenized_words:
+            if word not in stopwords.words("english"):
+                final_words.append(word)
+        # Lemmatization - From plural to single + base form of a word (example better -> good)
+        lemma_words = []
+        for word in final_words:
+            word = WordNetLemmatizer().lemmatize(word)
+            lemma_words.append(word)
+        emotion_list = []
+        with open(self.emotions) as f:
+            for line in f:
+                clear_line = line.replace("\n", "").replace(",", "").replace("'", "").replace(" ", "").strip()
+                word, emotion = clear_line.split(":")
+                if word in lemma_words:
+                    emotion_list.append(emotion)
+        print(f"[{now()}] Emotion List:", emotion_list)
+        if not len(emotion_list):
+            print(f"[{now()}] No emotion could be extracted.")
+            return None
+        emotions_count = Counter(emotion_list)
+        print(f"[{now()}] Emotions Count:", emotions_count)
+        common = emotions_count.most_common(1)
+        print(f"[{now()}] Common Emotions:", common)
+        sentiment, val = common[0]
+        print(f"[{now()}] Emotion:", sentiment)
+        return sentiment

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .gradio_helper import single_image_interface, multi_image_interface
2	+ from .audio_palette import AudioPalette

utils/audio_palette.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import typing
+from datetime import datetime
+import PIL
+from PIL import Image
+from moviepy.editor import *
+from lib import *
+datetime_format = "%d/%m/%Y %H:%M:%S"
+def now():
+    return datetime.now().strftime(datetime_format)
+class AudioPalette:
+    def __init__(self, pace_model_weights_path, resnet50_tf_model_weights_path, height, width, channels):
+        self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
+        self.image_captioning = ImageCaptioning()
+        self.audio_generation = AudioGeneration()
+        self.sentiment_analyser = SentimentAnalyser()
+        self.pace_map = {
+            "Fast": "high",
+            "Medium": "medium",
+            "Slow": "low"
+        }
+    def prompt_construction(self, caption: str, pace: str, sentiment: typing.Union[str, None], instrument: typing.Union[str, None], first: bool = True):
+        instrument = instrument if instrument is not None else ""
+        if first:
+            prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality."
+        else:
+            prompt = f"A {instrument} soundtrack for {caption} with {self.pace_map[pace]} beats per minute. High Quality. Transitions smoothely from the previous audio while sounding different."
+        # if sentiment:
+        #     prompt += f" As a {sentiment} music."
+        return prompt
+    def generate_single(self, input_image: PIL.Image.Image, instrument: typing.Union[str, None], ngrok_endpoint: str):
+        pace = self.pace_model.predict(input_image)
+        print(f"[{now()}]", pace)
+        print(f"[{now()}] Pace Prediction Done")
+        generated_text = self.image_captioning.query(input_image)[0].get("generated_text")
+        print(f"[{now()}]", generated_text)
+        print(f"[{now()}] Captioning Done")
+        sentiment = self.sentiment_analyser.sentiment(generated_text)
+        print(f"[{now()}] Sentiment Analysis Done")
+        prompt = self.prompt_construction(generated_text, pace, sentiment, instrument)
+        print(f"[{now()}] Generated Prompt:", prompt)
+        audio_file = self.audio_generation.generate(prompt, ngrok_endpoint)
+        print(f"[{now()}]", audio_file)
+        print(f"[{now()}] Audio Generation Done")
+        outputs = [prompt, pace, generated_text, audio_file]
+        return outputs
+    def stitch_images(self, file_paths: typing.List[str], audio_paths: typing.List[str]):
+        clips = [ImageClip(m).set_duration(5) for m in file_paths]
+        audio_clips = [AudioFileClip(a) for a in audio_paths]
+        concat_audio = concatenate_audioclips(audio_clips)
+        new_audio = CompositeAudioClip([concat_audio])
+        concat_clip = concatenate_videoclips(clips, method="compose")
+        concat_clip.audio = new_audio
+        file_name = "generated_video.mp4"
+        concat_clip.write_videofile(file_name, fps=24)
+        return file_name
+    def generate_multiple(self, file_paths: typing.List[str], instrument: typing.Union[str, None], ngrok_endpoint: str):
+        images = [Image.open(image_path) for image_path in file_paths]
+        pace = []
+        generated_text = []
+        sentiments = []
+        prompts = []
+        # Extracting the pace for all the images
+        for image in images:
+            pace_prediction = self.pace_model.predict(image)
+            pace.append(pace_prediction)
+        print(f"[{now()}]", pace)
+        print(f"[{now()}] Pace Prediction Done")
+        # Generating the caption for all the images
+        for image in images:
+            caption = self.image_captioning.query(image)[0].get("generated_text")
+            generated_text.append(caption)
+        print(f"[{now()}]", generated_text)
+        print(f"[{now()}] Captioning Done")
+        # Extracting the sentiments from the generated captions
+        for text in generated_text:
+            sentiment = self.sentiment_analyser.sentiment(text)
+            sentiments.append(sentiment)
+        print(f"[{now()}] Sentiment Analysis Done:", sentiments)
+        first = True
+        for generated_caption, senti, pace_pred in zip(generated_text, sentiments, pace):
+            prompts.append(self.prompt_construction(generated_caption, pace_pred, senti, instrument, first))
+            first = False
+        print(f"[{now()}] Generated Prompts:", prompts)
+        audio_file = self.audio_generation.generate(prompts, ngrok_endpoint)
+        print(f"[{now()}]", audio_file)
+        print(f"[{now()}] Audio Generation Done")
+        video_file = self.stitch_images(file_paths, [audio_file])
+        return video_file

utils/emotions.txt ADDED Viewed

	@@ -0,0 +1,536 @@

+ 'victimized': 'cheated',
+ 'accused': 'cheated',
+ 'acquitted': 'singled out',
+ 'adorable': 'loved',
+ 'adored': 'loved',
+ 'affected': 'attracted',
+ 'afflicted': 'sad',
+ 'aghast': 'fearful',
+ 'agog': 'attracted',
+ 'agonized': 'sad',
+ 'alarmed': 'fearful',
+ 'amused': 'happy',
+ 'angry': 'angry',
+ 'anguished': 'sad',
+ 'animated': 'happy',
+ 'annoyed': 'angry',
+ 'anxious': 'attracted',
+ 'apathetic': 'bored',
+ 'appalled': 'angry',
+ 'appeased': 'singled out',
+ 'appreciated': 'esteemed',
+ 'apprehensive': 'fearful',
+ 'approved of': 'loved',
+ 'ardent': 'lustful',
+ 'aroused': 'lustful',
+ 'attached': 'attached',
+ 'attracted': 'attracted',
+ 'autonomous': 'independent',
+ 'awed': 'fearful',
+ 'awkward': 'embarrassed',
+ 'beaten down': 'powerless',
+ 'beatific': 'happy',
+ 'belonging': 'attached',
+ 'bereaved': 'sad',
+ 'betrayed': 'cheated',
+ 'bewildered': 'surprise',
+ 'bitter': 'angry',
+ 'blissful': 'happy',
+ 'blithe': 'happy',
+ 'blocked': 'powerless',
+ 'boiling': 'angry',
+ 'bold': 'fearless',
+ 'bored': 'bored',
+ 'brave': 'fearless',
+ 'bright': 'happy',
+ 'brisk': 'happy',
+ 'calm': 'safe',
+ 'capable': 'adequate',
+ 'captivated': 'attached',
+ 'careless': 'powerless',
+ 'categorized': 'singled out',
+ 'cautious': 'fearful',
+ 'certain': 'fearless',
+ 'chagrined': 'belittled',
+ 'challenged': 'attracted',
+ 'chastised': 'hated',
+ 'cheated': 'cheated',
+ 'cheerful': 'happy',
+ 'cheerless': 'sad',
+ 'cheery': 'happy',
+ 'cherished': 'attached',
+ 'chicken': 'fearful',
+ 'cocky': 'independent',
+ 'codependent': 'codependent',
+ 'coerced': 'cheated',
+ 'comfortable': 'happy',
+ 'common': 'average',
+ 'competent': 'adequate',
+ 'complacent': 'apathetic',
+ 'composed': 'adequate',
+ 'concerned': 'attracted',
+ 'confident': 'adequate',
+ 'confused': 'surprise',
+ 'connected': 'attached',
+ 'conned': 'cheated',
+ 'consumed': 'obsessed',
+ 'contented': 'happy',
+ 'controlled': 'powerless',
+ 'convivial': 'happy',
+ 'cornered': 'entitled',
+ 'courageous': 'fearless',
+ 'cowardly': 'fearful',
+ 'craving': 'attracted',
+ 'crestfallen': 'sad',
+ 'criticized': 'hated',
+ 'cross': 'angry',
+ 'cross-examined': 'singled out',
+ 'crushed': 'sad',
+ 'curious': 'attracted',
+ 'cut off': 'alone',
+ 'daring': 'fearless',
+ 'dark': 'sad',
+ 'dedicated': 'attracted',
+ 'defeated': 'powerless',
+ 'defenseless': 'fearful',
+ 'degraded': 'belittled',
+ 'dejected': 'sad',
+ 'depressed': 'sad',
+ 'deserted': 'hated',
+ 'desirable': 'loved',
+ 'despondent': 'sad',
+ 'detached': 'alone',
+ 'determined': 'focused',
+ 'diminished': 'belittled',
+ 'disappointed': 'demoralized',
+ 'discarded': 'hated',
+ 'disconsolate': 'sad',
+ 'discontented': 'sad',
+ 'discounted': 'belittled',
+ 'discouraged': 'powerless',
+ 'disgraced': 'belittled',
+ 'disgusted': 'angry',
+ 'disheartened': 'demoralized',
+ 'disillusioned': 'demoralized',
+ 'disjointed': 'derailed',
+ 'dismal': 'sad',
+ 'dismayed': 'fearful',
+ 'disoriented': 'derailed',
+ 'disparaged': 'cheated',
+ 'displeased': 'sad',
+ 'disrespected': 'belittled',
+ 'distressed': 'sad',
+ 'distrustful': 'anxious',
+ 'dolorous': 'sad',
+ 'doubtful': 'fearful',
+ 'down': 'sad',
+ 'downhearted': 'sad',
+ 'dreadful': 'sad',
+ 'dreary': 'sad',
+ 'dubious': 'anxious',
+ 'dull': 'sad',
+ 'duped': 'cheated',
+ 'eager': 'attracted',
+ 'earnest': 'attracted',
+ 'ecstatic': 'happy',
+ 'elated': 'happy',
+ 'embarrassed': 'embarrassed',
+ 'empathetic': 'attached',
+ 'enchanted': 'attracted',
+ 'encouraged': 'adequate',
+ 'engrossed': 'attracted',
+ 'enraged': 'angry',
+ 'enterprising': 'fearless',
+ 'enthusiastic': 'happy',
+ 'entrusted': 'loved',
+ 'esteemed': 'esteemed',
+ 'excited': 'happy',
+ 'excluded': 'alone',
+ 'exempt': 'entitled',
+ 'exhausted hopeless': 'powerless',
+ 'exhilarated': 'happy',
+ 'exploited': 'cheated',
+ 'exposed': 'fearful',
+ 'fabulous': 'ecstatic',
+ 'fainthearted': 'fearful',
+ 'fantastic': 'ecstatic',
+ 'fascinated': 'attracted',
+ 'favored': 'entitled',
+ 'fearful': 'fearful',
+ 'fervent': 'attracted',
+ 'fervid': 'attracted',
+ 'festive': 'happy',
+ 'flat': 'sad',
+ 'focused': 'focused',
+ 'forced': 'powerless',
+ 'forsaken': 'hated',
+ 'framed': 'cheated',
+ 'free': 'free',
+ 'free & easy': 'happy',
+ 'frightened': 'fearful',
+ 'frisky': 'happy',
+ 'frustrated': 'angry',
+ 'full of anticipation': 'attracted',
+ 'full of ennui': 'apathetic',
+ 'fuming': 'angry',
+ 'funereal': 'sad',
+ 'furious': 'angry',
+ 'gallant': 'fearless',
+ 'genial': 'happy',
+ 'glad': 'happy',
+ 'gleeful': 'happy',
+ 'gloomy': 'sad',
+ 'glum': 'sad',
+ 'grass': 'happy',
+ 'grief-stricken': 'sad',
+ 'grieved': 'sad',
+ 'guilt': 'sad',
+ 'guilty': 'singled out',
+ 'happy': 'happy',
+ 'hardy': 'fearless',
+ 'heartbroken': 'sad',
+ 'heavyhearted': 'sad',
+ 'hesitant': 'fearful',
+ 'high-spirited': 'happy',
+ 'hilarious': 'happy',
+ 'hopeful': 'attracted',
+ 'horny': 'lustful',
+ 'horrified': 'fearful',
+ 'hot and bothered': 'lustful',
+ 'humiliated': 'sad',
+ 'humorous': 'happy',
+ 'hurt': 'sad',
+ 'hysterical': 'fearful',
+ 'ignored': 'hated',
+ 'ill at ease': 'sad',
+ 'immobilized': 'apathetic',
+ 'immune': 'entitled',
+ 'important': 'happy',
+ 'impotent': 'powerless',
+ 'imprisoned': 'entitled',
+ 'in a huff': 'angry',
+ 'in a stew': 'angry',
+ 'in control': 'adequate',
+ 'in fear': 'fearful',
+ 'in pain': 'sad',
+ 'in the dumps': 'sad',
+ 'in the zone': 'focused',
+ 'incensed': 'angry',
+ 'included': 'attached',
+ 'indecisive': 'anxious',
+ 'independent': 'free',
+ 'indignant': 'angry',
+ 'infatuated': 'lustful',
+ 'inflamed': 'angry',
+ 'injured': 'sad',
+ 'inquisitive': 'attracted',
+ 'insecure': 'codependent',
+ 'insignificant': 'belittled',
+ 'intent': 'attracted',
+ 'interested': 'attracted',
+ 'interrogated': 'singled out',
+ 'intrigued': 'attracted',
+ 'irate': 'angry',
+ 'irresolute': 'fearful',
+ 'irresponsible': 'powerless',
+ 'irritated': 'angry',
+ 'isolated': 'alone',
+ 'jaunty': 'happy',
+ 'jocular': 'happy',
+ 'jolly': 'happy',
+ 'jovial': 'happy',
+ 'joyful': 'happy',
+ 'joyless': 'sad',
+ 'joyous': 'happy',
+ 'jubilant': 'happy',
+ 'justified': 'singled out',
+ 'keen': 'attracted',
+ 'labeled': 'singled out',
+ 'lackadaisical': 'bored',
+ 'lazy': 'apathetic',
+ 'left out': 'hated',
+ 'let down': 'hated',
+ 'lethargic': 'apathetic',
+ 'lied to': 'cheated',
+ 'lighthearted': 'happy',
+ 'liked': 'attached',
+ 'lively': 'happy',
+ 'livid': 'angry',
+ 'lonely': 'alone',
+ 'lonesome': 'alone',
+ 'lost': 'lost',
+ 'loved': 'attached',
+ 'low': 'sad',
+ 'lucky': 'happy',
+ 'lugubrious': 'sad',
+ 'macho': 'independent',
+ 'mad': 'angry',
+ 'melancholy': 'sad',
+ 'menaced': 'fearful',
+ 'merry': 'happy',
+ 'mirthful': 'happy',
+ 'misgiving': 'fearful',
+ 'misunderstood': 'alone',
+ 'moody': 'sad',
+ 'moping': 'sad',
+ 'motivated': 'attracted',
+ 'mournful': 'sad',
+ 'needed': 'attracted',
+ 'needy': 'codependent',
+ 'nervous': 'fearful',
+ 'obligated': 'powerless',
+ 'obsessed': 'obsessed',
+ 'offended': 'angry',
+ 'oppressed': 'sad',
+ 'optionless': 'entitled',
+ 'ordinary': 'average',
+ 'organized': 'adequate',
+ 'out of control': 'powerless',
+ 'out of sorts': 'sad',
+ 'outmaneuvered': 'entitled',
+ 'outraged': 'angry',
+ 'overjoyed': 'happy',
+ 'overlooked': 'hated',
+ 'overwhelmed': 'powerless',
+ 'panicked': 'fearful',
+ 'passionate': 'lustful',
+ 'passive': 'apathetic',
+ 'pathetic': 'sad',
+ 'peaceful': 'safe',
+ 'pensive': 'anxious',
+ 'perplexed': 'anxious',
+ 'phobic': 'fearful',
+ 'playful': 'happy',
+ 'pleased': 'happy',
+ 'powerless': 'powerless',
+ 'pressured': 'burdened',
+ 'privileged': 'entitled',
+ 'proud': 'happy',
+ 'provoked': 'angry',
+ 'punished': 'hated',
+ 'put upon': 'burdened',
+ 'quaking': 'fearful',
+ 'quiescent': 'apathetic',
+ 'rageful': 'angry',
+ 'rapturous': 'happy',
+ 'rated': 'singled out',
+ 'reassured': 'fearless',
+ 'reckless': 'powerless',
+ 'redeemed': 'singled out',
+ 'regretful': 'sad',
+ 'rejected': 'alone',
+ 'released': 'free',
+ 'remorse': 'sad',
+ 'replaced': 'hated',
+ 'repulsed': 'demoralized',
+ 'resentful': 'angry',
+ 'resolute': 'fearless',
+ 'respected': 'esteemed',
+ 'responsible': 'adequate',
+ 'restful': 'fearful',
+ 'revered': 'esteemed',
+ 'rueful': 'sad',
+ 'sad': 'sad',
+ 'satisfied': 'happy',
+ 'saucy': 'happy',
+ 'scared': 'fearful',
+ 'secure': 'fearless',
+ 'self-reliant': 'fearless',
+ 'serene': 'happy',
+ 'shaky': 'fearful',
+ 'shamed': 'sad',
+ 'shocked': 'surprise',
+ 'significant': 'esteemed',
+ 'singled out': 'singled out',
+ 'skeptical': 'anxious',
+ 'snoopy': 'attracted',
+ 'somber': 'sad',
+ 'sparkling': 'happy',
+ 'spirited': 'happy',
+ 'spiritless': 'sad',
+ 'sprightly': 'happy',
+ 'startled': 'surprise',
+ 'stereotyped': 'singled out',
+ 'stifled': 'powerless',
+ 'stout hearted': 'fearless',
+ 'strong': 'independent',
+ 'suffering': 'sad',
+ 'sulky': 'sad',
+ 'sullen': 'angry',
+ 'sunny': 'happy',
+ 'surprised': 'surprise',
+ 'suspicious': 'anxious',
+ 'sympathetic': 'codependent',
+ 'tense': 'anxious',
+ 'terrified': 'fearful',
+ 'terrorized': 'fearful',
+ 'thankful': 'happy',
+ 'threatened': 'fearful',
+ 'thwarted': 'powerless',
+ 'timid': 'fearful',
+ 'timorous': 'fearful',
+ 'torn': 'derailed',
+ 'tortured': 'sad',
+ 'tragic': 'sad',
+ 'tranquil': 'happy',
+ 'transported': 'happy',
+ 'trapped': 'entitled',
+ 'tremulous': 'fearful',
+ 'tricked': 'entitled',
+ 'turned on': 'lustful',
+ 'unapproved of': 'hated',
+ 'unbelieving': 'anxious',
+ 'uncertain': 'anxious',
+ 'unconcerned': 'apathetic',
+ 'understood': 'attached',
+ 'unfocussed': 'lost',
+ 'unlovable': 'hated',
+ 'unloved': 'hated',
+ 'unmotivated': 'apathetic',
+ 'unshackled': 'free',
+ 'unsupported': 'belittled',
+ 'up in arms': 'angry',
+ 'upset': 'fearful',
+ 'validated': 'loved',
+ 'valued': 'esteemed',
+ 'victimized': 'sad',
+ 'violated': 'cheated',
+ 'virulent': 'angry',
+ 'vivacious': 'happy',
+ 'vulnerable': 'powerless',
+ 'wavering': 'anxious',
+ 'weak': 'powerless',
+ 'welcomed': 'loved',
+ 'woebegone': 'sad',
+ 'woeful': 'sad',
+ 'worn down': 'powerless',
+ 'worn out': 'powerless',
+ 'worried': 'fearful',
+ 'worshiped': 'esteemed',
+ 'wrathful': 'angry',
+ 'wronged': 'singled out',
+ 'wrought up': 'angry',
+ 'yearning': 'lustful',
+ 'zealous': 'attracted',
+ 'abandoned': 'hated',
+ 'absolved': 'singled out',
+ 'absorbed': 'attracted',
+ 'abused': 'powerless',
+ 'accepted': 'loved',
+ 'aching': 'sad',
+ 'acrimonious': 'angry',
+ 'addicted': 'codependent',
+ 'adequate': 'adequate',
+ 'admired': 'esteemed',
+ 'affectionate': 'attached',
+ 'affronted': 'singled out',
+ 'afraid': 'fearful',
+ 'airy': 'happy',
+ 'alone': 'alone',
+ 'ambivalent': 'bored',
+ 'apathetic': 'apathetic',
+ 'apprehensive': 'anxious',
+ 'arrogant': 'independent',
+ 'ashamed': 'embarrassed',
+ 'astonished': 'surprise',
+ 'at ease': 'safe',
+ 'attacked': 'fearful',
+ 'audacious': 'fearless',
+ 'autonomous': 'free',
+ 'average': 'average',
+ 'avid': 'attracted',
+ 'baffled': 'lost',
+ 'bashful': 'powerless',
+ 'belittled': 'belittled',
+ 'buoyant': 'happy',
+ 'burdened': 'burdened',
+ 'clouded': 'sad',
+ 'committed': 'focused',
+ 'compassionate': 'attached',
+ 'compelled': 'obsessed',
+ 'dauntless': 'fearless',
+ 'debonair': 'happy',
+ 'deceived': 'entitled',
+ 'delighted': 'ecstatic',
+ 'demoralized': 'demoralized',
+ 'derailed': 'derailed',
+ 'desirous': 'attracted',
+ 'despairing': 'sad',
+ 'devastated': 'angry',
+ 'diffident': 'fearful',
+ 'discredited': 'belittled',
+ 'disheartened': 'sad',
+ 'disinclined': 'demoralized',
+ 'disorganized': 'powerless',
+ 'downcast': 'sad',
+ 'entitled': 'entitled',
+ 'excited': 'adequate',
+ 'exultant': 'happy',
+ 'fidgety': 'fearful',
+ 'frowning': 'sad',
+ 'full of misgiving': 'anxious',
+ 'great': 'happy',
+ 'hapless': 'sad',
+ 'hated': 'hated',
+ 'heroic': 'fearless',
+ 'hostile': 'angry',
+ 'in despair': 'sad',
+ 'indifferent': 'bored',
+ 'infuriated': 'angry',
+ 'insecure': 'fearful',
+ 'inspired': 'happy',
+ 'inspiring': 'attracted',
+ 'judged': 'singled out',
+ 'justified': 'singled out',
+ 'laughting': 'happy',
+ 'loved': 'loved',
+ 'loving': 'attached',
+ 'low': 'sad',
+ 'lustful': 'lustful',
+ 'manipulated': 'cheated',
+ 'mumpish': 'sad',
+ 'nosey': 'attracted',
+ 'numb': 'apathetic',
+ 'obliterated': 'powerless',
+ 'peaceful': 'happy',
+ 'petrified': 'fearful',
+ 'piqued': 'angry',
+ 'piteous': 'sad',
+ 'powerless': 'powerless',
+ 'questioning': 'anxious',
+ 'rejected': 'hated',
+ 'self-satisfied': 'happy',
+ 'set up': 'entitled',
+ 'shut out': 'alone',
+ 'sorrowful': 'sad',
+ 'spirited': 'sad',
+ 'supported': 'esteemed',
+ 'suspicious': 'fearful',
+ 'terrific': 'happy',
+ 'trapped': 'entitled',
+ 'trembling': 'fearful',
+ 'uncomfortable': 'anxious',
+ 'underestimated': 'belittled',
+ 'unhappy': 'sad',
+ 'vindicated': 'singled out',
+ 'worked up': 'angry',
+ 'airborne': 'excited',
+ 'grass': 'happy',
+ 'mountain': 'calm',
+ 'dog': 'happy',
+ 'umbrella': 'sad',
+ 'train': 'sorrow',
+ 'lightning': 'ominous',
+ 'rocket': 'energetic',
+ 'elevator': 'relaxed',
+ 'slides': 'happy',
+ 'mountains': 'relaxed',
+ 'dog': 'excited',
+ 'trees': 'relaxed',
+ 'people': 'happy',
+ 'old': 'sad',
+ 'men': 'happy',
+ 'women': 'happy',
+ 'humans': 'happy',
+ 'persons': 'happy',
+ 'person': 'happy'

utils/gradio_helper.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import gradio as gr
+from .audio_palette import AudioPalette
+def single_image_interface(model: AudioPalette):
+    demo = gr.Interface(
+        fn=model.generate_single,
+        inputs=[
+            gr.Image(
+                type="pil",
+                label="Upload an image",
+                show_label=True,
+                container=True
+            ),
+            gr.Radio(
+                choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
+                label="Instrument",
+                show_label=True,
+                container=True
+            ),
+            gr.Textbox(
+                lines=1,
+                placeholder="ngrok endpoint",
+                label="colab endpoint",
+                show_label=True,
+                container=True,
+                type="text",
+                visible=True
+            )
+        ],
+        outputs=[
+            gr.Textbox(
+                lines=1,
+                placeholder="Prompt",
+                label="Generated Prompt",
+                show_label=True,
+                container=True,
+                type="text",
+                visible=False
+            ),
+            gr.Textbox(
+                lines=1,
+                placeholder="Pace of the image",
+                label="Pace",
+                show_label=True,
+                container=True,
+                type="text",
+                visible=False
+            ),
+            gr.Textbox(
+                lines=1,
+                placeholder="Caption for the image",
+                label="Caption",
+                show_label=True,
+                container=True,
+                type="text",
+                visible=False
+            ),
+            gr.Audio(
+                label="Generated Audio",
+                show_label=True,
+                container=True,
+                visible=True,
+                format="wav",
+                autoplay=False,
+                show_download_button=True,
+            )
+        ],
+        cache_examples=False,
+        live=False,
+        description="Provide an image to generate an appropriate background soundtrack",
+    )
+    return demo
+def multi_image_interface(model: AudioPalette):
+    demo = gr.Interface(
+        fn=model.generate_multiple,
+        inputs=[
+            gr.File(
+                file_count="multiple",
+                file_types=["image"],
+                type="filepath",
+                label="Upload images",
+                show_label=True,
+                container=True,
+                visible=True
+            ),
+            gr.Radio(
+                choices=["Piano", "Drums", "Guitar", "Violin", "Flute"],
+                label="Instrument",
+                show_label=True,
+                container=True
+            ),
+            gr.Textbox(
+                lines=1,
+                placeholder="ngrok endpoint",
+                label="colab endpoint",
+                show_label=True,
+                container=True,
+                type="text",
+                visible=True
+            )
+        ],
+        outputs=[
+            gr.Video(
+                format="mp4",
+                label="Generated Video",
+                show_label=True,
+                container=True,
+                visible=True,
+                autoplay=False,
+            )
+        ],
+        cache_examples=False,
+        live=False,
+        description="Provide images to generate an a slideshow of the images with appropriate music as background",
+    )
+    return demo