Subiksha0515 commited on
Commit
c3db11f
·
verified ·
1 Parent(s): 613bf12

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import soundfile as sf
3
+ from safetensors.numpy import load_file
4
+ import sentencepiece as spm
5
+ import torch
6
+
7
+ # Load tokenizer
8
+ sp = spm.SentencePieceProcessor()
9
+ sp.load("tokenizer.model")
10
+
11
+ # Load quantized model
12
+ tensors = load_file("model.safetensors")
13
+
14
+ # Dequantize weights
15
+ weights = {}
16
+
17
+ for name in list(tensors.keys()):
18
+
19
+ if name.endswith("_scale"):
20
+ continue
21
+
22
+ scale_name = name + "_scale"
23
+
24
+ if scale_name in tensors:
25
+
26
+ weight_i8 = tensors[name].astype(np.float32)
27
+ scale = tensors[scale_name].astype(np.float32)
28
+
29
+ weights[name] = weight_i8 * scale
30
+
31
+ else:
32
+ weights[name] = tensors[name]
33
+
34
+
35
+ print("Model loaded successfully")
36
+
37
+
38
+ # Dummy inference function (example structure)
39
+ # NOTE: Pocket-TTS requires full architecture,
40
+ # this example shows structure and audio output pipeline
41
+
42
+ def generate_dummy_audio(text):
43
+
44
+ tokens = sp.encode(text)
45
+
46
+ print("Tokens:", tokens)
47
+
48
+ # Generate dummy waveform (replace with real inference)
49
+ duration = 3 # seconds
50
+ sample_rate = 24000
51
+
52
+ t = np.linspace(0, duration, int(sample_rate * duration))
53
+
54
+ audio = 0.2 * np.sin(2 * np.pi * 220 * t)
55
+
56
+ return audio, sample_rate
57
+
58
+
59
+ # Text input
60
+ text = "Hello Subiksha, welcome to text to speech system"
61
+
62
+ audio, sr = generate_dummy_audio(text)
63
+
64
+ # Save audio
65
+ sf.write("output.wav", audio, sr)
66
+
67
+ print("Speech saved as output.wav")