rmysmo commited on
Commit
5bf3f7c
·
verified ·
1 Parent(s): 2c6666e

added 4 files

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. app.py +39 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a base image
2
+ FROM python:3.9-slim
3
+
4
+ # Create and set the working directory
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container at /app
8
+ COPY requirements.txt .
9
+
10
+ # Install Python packages using pip
11
+ RUN pip install --no-cache-dir --upgrade pip && \
12
+ pip install --no-cache-dir --upgrade setuptools && \
13
+ pip install --no-cache-dir wheel && \
14
+ pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy the current directory contents into the container at /app
17
+ COPY . .
18
+
19
+ # Set permissions for the application directory
20
+ RUN chown -R root:root /app
21
+ RUN chmod -R 755 /app
22
+
23
+ # Expose the port the app runs on
24
+ EXPOSE 7860
25
+
26
+ # Command to run the application
27
+ CMD ["python3", "app.py"]
28
+
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from vosk import Model, KaldiRecognizer
3
+ import wave
4
+ import json
5
+
6
+ # Set up the Vosk model
7
+ model = Model("vosk-model-small-uz-0.22")
8
+
9
+ def recognize_from_file(audio_file):
10
+ wf = wave.open(audio_file, "rb")
11
+ if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
12
+ return "Audio file must be WAV format mono PCM."
13
+
14
+ recognizer = KaldiRecognizer(model, wf.getframerate())
15
+ result_text = ""
16
+
17
+ while True:
18
+ data = wf.readframes(4000)
19
+ if len(data) == 0:
20
+ break
21
+ if recognizer.AcceptWaveform(data):
22
+ result = json.loads(recognizer.Result())
23
+ result_text += result.get('text', '') + " "
24
+
25
+ final_result = json.loads(recognizer.FinalResult())
26
+ result_text += final_result.get('text', '')
27
+
28
+ return result_text
29
+
30
+
31
+ iface = gr.Interface(
32
+ fn=recognize_from_file,
33
+ inputs=gr.Audio(type="filepath"),
34
+ outputs="text",
35
+ title="Speech Recognition from Audio File",
36
+ description="Upload a WAV file for recognition."
37
+ )
38
+
39
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ vosk
2
+ PyAudio
3
+ gradio