Spaces:
Runtime error
Runtime error
Duplicate from doevent/msk
Browse filesCo-authored-by: Max Skobeev <doevent@users.noreply.huggingface.co>
- .gitattributes +33 -0
- README.md +14 -0
- app.py +145 -0
- constants.py +7 -0
- requirements.txt +3 -0
- utils.py +50 -0
.gitattributes
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Img to Music Video
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 3.10.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: unknown
|
| 11 |
+
duplicated_from: doevent/msk
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
|
| 6 |
+
import httpx
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
from utils import get_tags_for_prompts, get_mubert_tags_embeddings, get_pat
|
| 10 |
+
#import subprocess
|
| 11 |
+
import os
|
| 12 |
+
import uuid
|
| 13 |
+
from tempfile import gettempdir
|
| 14 |
+
from PIL import Image
|
| 15 |
+
import cv2
|
| 16 |
+
from pprint import pprint
|
| 17 |
+
|
| 18 |
+
minilm = SentenceTransformer('all-MiniLM-L6-v2')
|
| 19 |
+
mubert_tags_embeddings = get_mubert_tags_embeddings(minilm)
|
| 20 |
+
|
| 21 |
+
# image_to_text = gr.Interface.load("spaces/doevent/image_to_text", api_key=os.environ['HF_TOKEN'])
|
| 22 |
+
image_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
|
| 23 |
+
def center_crop(img, dim: tuple = (512, 512)):
|
| 24 |
+
|
| 25 |
+
"""Returns center cropped image
|
| 26 |
+
Args:
|
| 27 |
+
img: image to be center cropped
|
| 28 |
+
dim: dimensions (width, height) to be cropped
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
width, height = img.shape[1], img.shape[0]
|
| 32 |
+
|
| 33 |
+
# process crop width and height for max available dimension
|
| 34 |
+
crop_width = dim[0] if dim[0]<img.shape[1] else img.shape[1]
|
| 35 |
+
crop_height = dim[1] if dim[1]<img.shape[0] else img.shape[0]
|
| 36 |
+
mid_x, mid_y = int(width/2), int(height/2)
|
| 37 |
+
cw2, ch2 = int(crop_width/2), int(crop_height/2)
|
| 38 |
+
crop_img = img[mid_y-ch2:mid_y+ch2, mid_x-cw2:mid_x+cw2]
|
| 39 |
+
return crop_img
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def scale_image(img, factor=1):
|
| 43 |
+
"""Returns resize image by scale factor.
|
| 44 |
+
This helps to retain resolution ratio while resizing.
|
| 45 |
+
Args:
|
| 46 |
+
img: image to be scaled
|
| 47 |
+
factor: scale factor to resize
|
| 48 |
+
"""
|
| 49 |
+
return cv2.resize(img,(int(img.shape[1]*factor), int(img.shape[0]*factor)))
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def get_track_by_tags(tags, pat, duration, maxit=20, loop=False):
|
| 53 |
+
if loop:
|
| 54 |
+
mode = "loop"
|
| 55 |
+
else:
|
| 56 |
+
mode = "track"
|
| 57 |
+
r = httpx.post('https://api-b2b.mubert.com/v2/RecordTrackTTM',
|
| 58 |
+
json={
|
| 59 |
+
"method": "RecordTrackTTM",
|
| 60 |
+
"params": {
|
| 61 |
+
"pat": pat,
|
| 62 |
+
"duration": duration,
|
| 63 |
+
"tags": tags,
|
| 64 |
+
"mode": mode
|
| 65 |
+
}
|
| 66 |
+
})
|
| 67 |
+
|
| 68 |
+
pprint(r.text)
|
| 69 |
+
rdata = json.loads(r.text)
|
| 70 |
+
assert rdata['status'] == 1, rdata['error']['text']
|
| 71 |
+
trackurl = rdata['data']['tasks'][0]['download_link']
|
| 72 |
+
|
| 73 |
+
#print('Generating track ', end='')
|
| 74 |
+
for i in range(maxit):
|
| 75 |
+
r = httpx.get(trackurl)
|
| 76 |
+
if r.status_code == 200:
|
| 77 |
+
return trackurl
|
| 78 |
+
time.sleep(1)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def generate_track_by_prompt(image, email, duration, loop=False):
|
| 82 |
+
try:
|
| 83 |
+
# Checking Image Aspect Ratio
|
| 84 |
+
filename_png = f"{uuid.uuid4().hex}.png"
|
| 85 |
+
filepath_png = f"{gettempdir()}/{filename_png}"
|
| 86 |
+
|
| 87 |
+
with Image.open(image) as im:
|
| 88 |
+
# image size
|
| 89 |
+
ratio_width = im.size[0]
|
| 90 |
+
ratio_height = im.size[1]
|
| 91 |
+
im.convert("RGB").save(filepath_png)
|
| 92 |
+
if ratio_width > 3501 or ratio_height > 3501:
|
| 93 |
+
raise gr.Error("Image aspect ratio must not exceed width: 1024 px or height: 1024 px.")
|
| 94 |
+
elif ratio_width > 3500 or ratio_height > 3500:
|
| 95 |
+
image_g = cv2.imread(image)
|
| 96 |
+
scale_img = scale_image(image_g, factor=0.2)
|
| 97 |
+
cv2.imwrite(filepath_png, scale_img)
|
| 98 |
+
elif ratio_width > 1800 or ratio_height > 1800:
|
| 99 |
+
image_g = cv2.imread(image)
|
| 100 |
+
scale_img = scale_image(image_g, factor=0.3)
|
| 101 |
+
cv2.imwrite(filepath_png, scale_img)
|
| 102 |
+
elif ratio_width > 900 or ratio_height > 900:
|
| 103 |
+
image_g = cv2.imread(image)
|
| 104 |
+
scale_img = scale_image(image_g, factor=0.5)
|
| 105 |
+
cv2.imwrite(filepath_png, scale_img)
|
| 106 |
+
|
| 107 |
+
# prompt = image_to_text(filepath_png, "Image Captioning", "", "Nucleus sampling")
|
| 108 |
+
prompt = image_to_text(filepath_png, "ViT-L (best for Stable Diffusion 1.*)", "Fast", fn_index=1)[0]
|
| 109 |
+
print(f"PROMPT: {prompt}")
|
| 110 |
+
|
| 111 |
+
pat = get_pat(email)
|
| 112 |
+
_, tags = get_tags_for_prompts(minilm, mubert_tags_embeddings, [prompt, ])[0]
|
| 113 |
+
filepath = get_track_by_tags(tags, pat, int(duration), loop=loop)
|
| 114 |
+
|
| 115 |
+
filename_mp3 = filepath.split("/")[-1]
|
| 116 |
+
filepath_mp3 = f"{gettempdir()}/{filename_mp3}"
|
| 117 |
+
filename_mp4 = f"{uuid.uuid4().hex}.mp4"
|
| 118 |
+
filepath_mp4 = f"{gettempdir()}/{filename_mp4}"
|
| 119 |
+
|
| 120 |
+
os.system(f"wget {filepath} -P {gettempdir()}")
|
| 121 |
+
|
| 122 |
+
# waveform
|
| 123 |
+
with Image.open(filepath_png) as im:
|
| 124 |
+
width = im.size[0]
|
| 125 |
+
height = im.size[1]
|
| 126 |
+
print(f"{width}x{height}")
|
| 127 |
+
command = f'ffmpeg -hide_banner -loglevel warning -y -i {filepath_mp3} -loop 1 -i {filepath_png} -filter_complex "[0:a]showwaves=s={width}x{height}:colors=0xffffff:mode=cline,format=rgba[v];[1:v][v]overlay[outv]" -map "[outv]" -map 0:a -c:v libx264 -r 15 -c:a copy -pix_fmt yuv420p -shortest {filepath_mp4}'
|
| 128 |
+
os.system(command)
|
| 129 |
+
os.remove(filepath_png)
|
| 130 |
+
os.remove(filepath_mp3)
|
| 131 |
+
|
| 132 |
+
return filepath_mp4, filepath, prompt, tags
|
| 133 |
+
except Exception as e:
|
| 134 |
+
raise gr.Error(str(e))
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
iface = gr.Interface(fn=generate_track_by_prompt,
|
| 138 |
+
inputs=[gr.Image(type="filepath"),
|
| 139 |
+
"text",
|
| 140 |
+
gr.Slider(label="duration (seconds)", value=30, minimum=10, maximum=60)],
|
| 141 |
+
outputs=[gr.Video(label="Video"),
|
| 142 |
+
gr.Audio(label="Audio"),
|
| 143 |
+
gr.Text(label="Prompt"),
|
| 144 |
+
gr.Text(label="Tags")])
|
| 145 |
+
iface.queue().launch()
|
constants.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
MUBERT_TAGS_STRING = 'tribal,action,kids,neo-classic,run 130,pumped,jazz / funk,ethnic,dubtechno,reggae,acid jazz,liquidfunk,funk,witch house,tech house,underground,artists,mystical,disco,sensorium,r&b,agender,psychedelic trance / psytrance,peaceful,run 140,piano,run 160,setting,meditation,christmas,ambient,horror,cinematic,electro house,idm,bass,minimal,underscore,drums,glitchy,beautiful,technology,tribal house,country pop,jazz & funk,documentary,space,classical,valentines,chillstep,experimental,trap,new jack swing,drama,post-rock,tense,corporate,neutral,happy,analog,funky,spiritual,sberzvuk special,chill hop,dramatic,catchy,holidays,fitness 90,optimistic,orchestra,acid techno,energizing,romantic,minimal house,breaks,hyper pop,warm up,dreamy,dark,urban,microfunk,dub,nu disco,vogue,keys,hardcore,aggressive,indie,electro funk,beauty,relaxing,trance,pop,hiphop,soft,acoustic,chillrave / ethno-house,deep techno,angry,dance,fun,dubstep,tropical,latin pop,heroic,world music,inspirational,uplifting,atmosphere,art,epic,advertising,chillout,scary,spooky,slow ballad,saxophone,summer,erotic,jazzy,energy 100,kara mar,xmas,atmospheric,indie pop,hip-hop,yoga,reggaeton,lounge,travel,running,folk,chillrave & ethno-house,detective,darkambient,chill,fantasy,minimal techno,special,night,tropical house,downtempo,lullaby,meditative,upbeat,glitch hop,fitness,neurofunk,sexual,indie rock,future pop,jazz,cyberpunk,melancholic,happy hardcore,family / kids,synths,electric guitar,comedy,psychedelic trance & psytrance,edm,psychedelic rock,calm,zen,bells,podcast,melodic house,ethnic percussion,nature,heavy,bassline,indie dance,techno,drumnbass,synth pop,vaporwave,sad,8-bit,chillgressive,deep,orchestral,futuristic,hardtechno,nostalgic,big room,sci-fi,tutorial,joyful,pads,minimal 170,drill,ethnic 108,amusing,sleepy ambient,psychill,italo disco,lofi,house,acoustic guitar,bassline house,rock,k-pop,synthwave,deep house,electronica,gabber,nightlife,sport & fitness,road trip,celebration,electro,disco house,electronic'
|
| 4 |
+
MUBERT_TAGS = np.array(MUBERT_TAGS_STRING.split(','))
|
| 5 |
+
MUBERT_LICENSE = "ttmmubertlicense#f0acYBenRcfeFpNT4wpYGaTQIyDI4mJGv5MfIhBFz97NXDwDNFHmMRsBSzmGsJwbTpP1A6i07AXcIeAHo5"
|
| 6 |
+
MUBERT_MODE = "loop"
|
| 7 |
+
MUBERT_TOKEN = "4951f6428e83172a4f39de05d5b3ab10d58560b8"
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
httpx
|
| 2 |
+
sentence-transformers
|
| 3 |
+
opencv-python
|
utils.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import numpy as np
|
| 3 |
+
import httpx
|
| 4 |
+
|
| 5 |
+
from constants import MUBERT_TAGS, MUBERT_LICENSE, MUBERT_MODE, MUBERT_TOKEN
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_mubert_tags_embeddings(w2v_model):
|
| 9 |
+
return w2v_model.encode(MUBERT_TAGS)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_pat(email: str):
|
| 13 |
+
r = httpx.post('https://api-b2b.mubert.com/v2/GetServiceAccess',
|
| 14 |
+
json={
|
| 15 |
+
"method": "GetServiceAccess",
|
| 16 |
+
"params": {
|
| 17 |
+
"email": email,
|
| 18 |
+
"license": MUBERT_LICENSE,
|
| 19 |
+
"token": MUBERT_TOKEN,
|
| 20 |
+
"mode": MUBERT_MODE,
|
| 21 |
+
}
|
| 22 |
+
})
|
| 23 |
+
|
| 24 |
+
rdata = json.loads(r.text)
|
| 25 |
+
assert rdata['status'] == 1, "probably incorrect e-mail"
|
| 26 |
+
pat = rdata['data']['pat']
|
| 27 |
+
return pat
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def find_similar(em, embeddings, method='cosine'):
|
| 31 |
+
scores = []
|
| 32 |
+
for ref in embeddings:
|
| 33 |
+
if method == 'cosine':
|
| 34 |
+
scores.append(1 - np.dot(ref, em) / (np.linalg.norm(ref) * np.linalg.norm(em)))
|
| 35 |
+
if method == 'norm':
|
| 36 |
+
scores.append(np.linalg.norm(ref - em))
|
| 37 |
+
return np.array(scores), np.argsort(scores)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def get_tags_for_prompts(w2v_model, mubert_tags_embeddings, prompts, top_n=3, debug=False):
|
| 41 |
+
prompts_embeddings = w2v_model.encode(prompts)
|
| 42 |
+
ret = []
|
| 43 |
+
for i, pe in enumerate(prompts_embeddings):
|
| 44 |
+
scores, idxs = find_similar(pe, mubert_tags_embeddings)
|
| 45 |
+
top_tags = MUBERT_TAGS[idxs[:top_n]]
|
| 46 |
+
top_prob = 1 - scores[idxs[:top_n]]
|
| 47 |
+
if debug:
|
| 48 |
+
print(f"Prompt: {prompts[i]}\nTags: {', '.join(top_tags)}\nScores: {top_prob}\n\n\n")
|
| 49 |
+
ret.append((prompts[i], list(top_tags)))
|
| 50 |
+
return ret
|