nikhilhyperneuron commited on
Commit
107cd17
·
verified ·
1 Parent(s): 86a4c3a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import CsmForConditionalGeneration, AutoProcessor
4
+ import tempfile
5
+ import os
6
+ from huggingface_hub import login
7
+
8
+
9
+ # Initialize model and processor
10
+ def load_model():
11
+ # For Spaces, reference your model by its HF Hub ID
12
+ model_id = "hyperneuronAILabs/vocali" # Replace with your HF model ID
13
+
14
+ try:
15
+ processor = AutoProcessor.from_pretrained(model_id)
16
+
17
+ # Check for available hardware
18
+ device = "cuda" if torch.cuda.is_available() else "cpu"
19
+ print(f"Using device: {device}")
20
+
21
+ # Use 8-bit quantization for better memory efficiency on Spaces
22
+ model = CsmForConditionalGeneration.from_pretrained(
23
+ model_id,
24
+ device_map=device,
25
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
26
+ low_cpu_mem_usage=True
27
+ )
28
+
29
+ return model, processor, device, None
30
+ except Exception as e:
31
+ return None, None, "cpu", str(e)
32
+
33
+ # Load model on startup
34
+ model, processor, device, error_msg = load_model()
35
+ model_loaded = model is not None
36
+
37
+ # Function to generate speech
38
+ def generate_speech(text, max_new_tokens=70):
39
+ if not model_loaded:
40
+ return None, f"Model failed to load: {error_msg}"
41
+
42
+ try:
43
+ # Create conversation format
44
+ conversation = [
45
+ {"role": "0", "content": [{"type": "text", "text": text}]},
46
+ ]
47
+
48
+ # Process the input
49
+ inputs = processor.apply_chat_template(
50
+ conversation,
51
+ tokenize=True,
52
+ return_dict=True,
53
+ ).to(device)
54
+
55
+ # Generate audio with memory efficient settings
56
+ with torch.no_grad(): # Save memory during inference
57
+ audio = model.generate(
58
+ **inputs,
59
+ output_audio=True,
60
+ max_new_tokens=max_new_tokens
61
+ )
62
+
63
+ # Save to a temporary file
64
+ temp_dir = tempfile.gettempdir()
65
+ output_path = os.path.join(temp_dir, f"generated_speech_{hash(text)}.wav")
66
+ processor.save_audio(audio, output_path)
67
+
68
+ return output_path, "Speech generated successfully!"
69
+
70
+ except Exception as e:
71
+ return None, f"Error generating speech: {str(e)}"
72
+
73
+ # Create Gradio interface
74
+ with gr.Blocks(title="Hindi Text-to-Speech Generator") as demo:
75
+ gr.Markdown("# Hindi Text-to-Speech Generator")
76
+
77
+ if not model_loaded:
78
+ gr.Markdown(f"⚠️ **Error loading model: {error_msg}**")
79
+ else:
80
+ gr.Markdown("Enter text in Hindi to convert it to speech")
81
+
82
+ with gr.Row():
83
+ with gr.Column():
84
+ text_input = gr.Textbox(
85
+ label="Input Text",
86
+ placeholder="नमस्ते आप कैसे हैं?",
87
+ lines=5
88
+ )
89
+
90
+ max_tokens = gr.Slider(
91
+ minimum=10,
92
+ maximum=100,
93
+ value=50,
94
+ step=5,
95
+ label="Max New Tokens (higher values may use more memory)"
96
+ )
97
+
98
+ submit_btn = gr.Button("Generate Speech", variant="primary")
99
+
100
+ with gr.Column():
101
+ audio_output = gr.Audio(label="Generated Speech", type="filepath")
102
+ status_text = gr.Textbox(label="Status", interactive=False)
103
+
104
+ # Example inputs (fewer examples to conserve memory)
105
+ if model_loaded:
106
+ gr.Examples(
107
+ examples=[
108
+ ["नमस्ते आप कैसे हैं?", 50],
109
+ ["मैं आपकी किस प्रकार सहायता कर सकता हूँ", 50],
110
+ ],
111
+ inputs=[text_input, max_tokens],
112
+ outputs=[audio_output, status_text],
113
+ fn=generate_speech,
114
+ cache_examples=True
115
+ )
116
+
117
+ # Set up the function call
118
+ submit_btn.click(
119
+ fn=generate_speech,
120
+ inputs=[text_input, max_tokens],
121
+ outputs=[audio_output, status_text]
122
+ )
123
+
124
+ gr.Markdown("### System Information")
125
+ gr.Markdown(f"- Using device: {device}")
126
+ gr.Markdown(f"- Model loaded: {'Yes' if model_loaded else 'No'}")
127
+
128
+ # Launch the app
129
+ if __name__ == "__main__":
130
+ demo.launch() # Don't use share=True on Spaces