sohamchitimali commited on
Commit
e7ee8fc
ยท
0 Parent(s):

Fresh init

Browse files
Files changed (5) hide show
  1. .gitattributes +37 -0
  2. .gitignore +2 -0
  3. README.md +12 -0
  4. app.py +54 -0
  5. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mdl filter=lfs diff=lfs merge=lfs -text
37
+ *.fst filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ token.txt
2
+ models/
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Vosk
3
+ emoji: ๐Ÿš€
4
+ colorFrom: gray
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 5.44.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import wave
3
+ import gradio as gr
4
+ from vosk import Model, KaldiRecognizer
5
+
6
+ # ๐Ÿ”น Load models once at startup
7
+ # Download models from https://alphacephei.com/vosk/models and unzip them in ./models/
8
+ models = {
9
+ "English (US)": Model("models/vosk-model-small-en-us-0.15"),
10
+ "English (Indian)": Model("models/vosk-model-small-en-in-0.4"),
11
+ "Hindi": Model("models/vosk-model-small-hi-in-0.22"),
12
+ "Telugu": Model("models/vosk-model-small-te-in-0.22")
13
+ }
14
+
15
+ def transcribe(audio_file, language):
16
+ if audio_file is None:
17
+ return "Please record or upload an audio file."
18
+
19
+ # Open audio
20
+ wf = wave.open(audio_file, "rb")
21
+
22
+ # Ensure format
23
+ if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
24
+ return "Audio must be mono PCM16 at 16kHz. Please re-upload."
25
+
26
+ rec = KaldiRecognizer(models[language], wf.getframerate())
27
+
28
+ results = []
29
+ while True:
30
+ data = wf.readframes(4000)
31
+ if len(data) == 0:
32
+ break
33
+ if rec.AcceptWaveform(data):
34
+ results.append(rec.Result())
35
+ results.append(rec.FinalResult())
36
+
37
+ # Extract recognized text
38
+ text = " ".join([r for r in results])
39
+ return text
40
+
41
+ # ๐Ÿ”น Gradio UI
42
+ with gr.Blocks() as demo:
43
+ gr.Markdown("## ๐ŸŽ™๏ธ Multi-Language Speech-to-Text with Vosk")
44
+ with gr.Row():
45
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
46
+ lang_dropdown = gr.Dropdown(choices=list(models.keys()), value="English (US)", label="Language")
47
+ output = gr.Textbox(label="Transcription")
48
+
49
+ btn = gr.Button("Transcribe")
50
+ btn.click(fn=transcribe, inputs=[audio_input, lang_dropdown], outputs=output)
51
+
52
+ # Launch app
53
+ if __name__ == "__main__":
54
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ vosk
2
+ gradio