alidev2002 commited on
Commit
2dfdea1
Β·
verified Β·
1 Parent(s): 9d27962

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -105
app.py CHANGED
@@ -1,119 +1,108 @@
1
  import gradio as gr
2
- import torch
3
- import soundfile as sf
4
- from omnivoice import OmniVoice
5
- import tempfile
6
  import os
 
 
 
 
7
 
8
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
9
 
10
- model = OmniVoice.from_pretrained(
11
- "k2-fsa/OmniVoice",
12
- device_map="cuda:0" if device == "cuda" else "cpu",
13
- dtype=torch.float16 if device == "cuda" else torch.float32
14
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- def build_voice_prompt(gender, age, pitch, style):
17
- attrs = []
18
-
19
- if gender:
20
- attrs.append(gender)
21
- if age:
22
- attrs.append(age)
23
- if pitch:
24
- attrs.append(pitch)
25
- if style:
26
- attrs.append(style)
27
-
28
- if len(attrs) > 0:
29
- print(attrs)
30
- voice_desc = ", ".join(attrs)
31
- return voice_desc
32
- else:
33
- return None
34
-
35
-
36
- def generate(text, ref_audio, gender, age, pitch, style, num_steps, speed):
37
-
38
- output_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
39
-
40
- instruct = build_voice_prompt(gender, age, pitch, style)
41
-
42
- if ref_audio is not None:
43
- audio = model.generate(
44
- text=text,
45
- language='Persian',
46
- instruct=instruct,
47
- ref_audio=ref_audio,
48
- num_step=int(num_steps),
49
- speed=float(speed)
50
- )
51
- else:
52
- audio = model.generate(
53
- text=text,
54
- language='Persian',
55
- instruct=instruct,
56
- num_step=int(num_steps),
57
- speed=float(speed)
58
- )
59
-
60
- sf.write(output_path, audio[0], 24000)
61
 
62
  return output_path
63
 
64
 
 
 
 
 
 
 
 
 
 
65
  demo = gr.Interface(
66
  fn=generate,
67
- inputs=[
68
- gr.Textbox(label="Text"),
69
-
70
- gr.Audio(type="filepath", label="Reference Voice (optional)"),
71
-
72
- gr.Dropdown(
73
- choices=["male", "female"],
74
- label="Gender",
75
- value=None
76
- ),
77
-
78
- gr.Dropdown(
79
- choices=["child", "teenager", "young adult", "middle-aged", "elderly"],
80
- label="Age",
81
- value=None
82
- ),
83
-
84
- gr.Dropdown(
85
- choices=["very low pitch", "low pitch", "medium pitch", "high pitch"],
86
- label="Pitch",
87
- value=None
88
- ),
89
-
90
- gr.Dropdown(
91
- choices=["normal", "whisper", "calm", "angry"],
92
- label="Style",
93
- value=None
94
- ),
95
-
96
- gr.Slider(
97
- minimum=1,
98
- maximum=32,
99
- value=4,
100
- step=1,
101
- label="num_steps"
102
- ),
103
-
104
- gr.Slider(
105
- minimum=0.5,
106
- maximum=1.5,
107
- value=0.8,
108
- step=0.05,
109
- label="Speech Speed"
110
- )
111
- ],
112
-
113
- outputs=gr.Audio(type="filepath"),
114
-
115
- title="OmniVoice TTS (Voice Design + Cloning)",
116
- description="Control voice with gender, age, pitch, style + num_steps"
117
  )
118
 
119
  demo.launch()
 
1
  import gradio as gr
 
 
 
 
2
  import os
3
+ import subprocess
4
+ import zipfile
5
+ import requests
6
+ from huggingface_hub import hf_hub_download, login
7
 
8
+ hf_token = os.getenv("HF_TOKEN")
9
+ login(token=hf_token)
10
 
11
+ BASE_DIR = os.getcwd()
12
+ SDCPP_DIR = os.path.join(BASE_DIR, "sdcpp")
13
+
14
+ # =========================
15
+ # 1. Ψ―Ψ§Ω†Ω„ΩˆΨ― stable-diffusion.cpp
16
+ # =========================
17
+ def setup_sdcpp():
18
+ if not os.path.exists(SDCPP_DIR):
19
+ os.makedirs(SDCPP_DIR, exist_ok=True)
20
+
21
+ zip_url = "https://github.com/leejet/stable-diffusion.cpp/releases/download/master-586-c97702e/sd-master-c97702e-bin-Linux-Ubuntu-24.04-x86_64.zip"
22
+ zip_path = os.path.join(BASE_DIR, "sdcpp.zip")
23
+
24
+ print("Downloading stable-diffusion.cpp...")
25
+ r = requests.get(zip_url)
26
+ with open(zip_path, "wb") as f:
27
+ f.write(r.content)
28
+
29
+ print("Extracting...")
30
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
31
+ zip_ref.extractall(SDCPP_DIR)
32
+
33
+ os.remove(zip_path)
34
+
35
+ # chmod
36
+ subprocess.run(["chmod", "+x", f"{SDCPP_DIR}/sd-cli"])
37
+
38
+
39
+ # =========================
40
+ # 2. Ψ―Ψ§Ω†Ω„ΩˆΨ― Ω…Ψ―Ω„β€ŒΩ‡Ψ§
41
+ # =========================
42
+ def setup_models():
43
+ print("Downloading models...")
44
+
45
+ model_path = hf_hub_download(
46
+ repo_id="unsloth/Z-Image-Turbo-GGUF",
47
+ filename="z-image-turbo-Q4_K_M.gguf"
48
+ )
49
 
50
+ vae_path = hf_hub_download(
51
+ repo_id="black-forest-labs/FLUX.1-schnell",
52
+ filename="ae.safetensors"
53
+ )
54
+
55
+ llm_path = hf_hub_download(
56
+ repo_id="unsloth/Qwen3-4B-Instruct-2507-GGUF",
57
+ filename="Qwen3-4B-Instruct-2507-Q4_K_M.gguf"
58
+ )
59
+
60
+ return model_path, vae_path, llm_path
61
+
62
+
63
+ MODEL_PATH, VAE_PATH, LLM_PATH = None, None, None
64
+
65
+ # =========================
66
+ # 3. inference
67
+ # =========================
68
+ def generate(prompt):
69
+ output_path = os.path.join(BASE_DIR, "output.png")
70
+
71
+ cmd = [
72
+ f"{SDCPP_DIR}/sd-cli",
73
+ "-m", MODEL_PATH,
74
+ "--vae", VAE_PATH,
75
+ "--llm", LLM_PATH,
76
+ "-p", prompt,
77
+ "--steps", "6",
78
+ "--cfg-scale", "1.0",
79
+ "-o", output_path
80
+ ]
81
+
82
+ env = os.environ.copy()
83
+ env["LD_LIBRARY_PATH"] = SDCPP_DIR
84
+
85
+ print("Running:", " ".join(cmd))
86
+
87
+ subprocess.run(cmd, env=env, check=True)
 
 
 
 
 
 
 
88
 
89
  return output_path
90
 
91
 
92
+ # =========================
93
+ # init
94
+ # =========================
95
+ setup_sdcpp()
96
+ MODEL_PATH, VAE_PATH, LLM_PATH = setup_models()
97
+
98
+ # =========================
99
+ # UI
100
+ # =========================
101
  demo = gr.Interface(
102
  fn=generate,
103
+ inputs=gr.Textbox(label="Prompt"),
104
+ outputs=gr.Image(label="Generated Image"),
105
+ title="Z-Image Turbo GGUF (CPU)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  )
107
 
108
  demo.launch()