kamranferoz commited on
Commit
71f9a5c
·
1 Parent(s): e17707c

Upload folder using huggingface_hub

Browse files
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
00-00_voice2text.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ import time
4
+ from pyChatGPT import ChatGPT
5
+ import warnings
6
+ import openai
7
+ from gtts import gTTS
8
+ from dotenv import load_dotenv
9
+ import os
10
+
11
+ # Load the .env file and import variables
12
+ load_dotenv()
13
+
14
+ # Assign the values to python variables
15
+ OPENAI_API_KEY = os.getenv('openai_api_key')
16
+
17
+ warnings.filterwarnings("ignore")
18
+ model = whisper.load_model("base")
19
+
20
+ def transcribe(audio):
21
+
22
+ # load audio and pad/trim it to fit 30 seconds
23
+ audio = whisper.load_audio(audio)
24
+ audio = whisper.pad_or_trim(audio)
25
+
26
+ # make log-Mel spectrogram and move to the same device as the model
27
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
28
+
29
+ # detect the spoken language
30
+ _, probs = model.detect_language(mel)
31
+
32
+ # decode the audio
33
+ options = whisper.DecodingOptions(fp16=False)
34
+ result = whisper.decode(model, mel, options)
35
+ result_text = result.text
36
+
37
+ # Call OpenAI API for text for reply back.
38
+ openai.api_key = OPENAI_API_KEY
39
+ result = openai.Completion.create(
40
+ model="text-davinci-003",
41
+ prompt=result_text,
42
+ max_tokens=500,
43
+ temperature=0
44
+ )
45
+ out_result = result["choices"][0]["text"]
46
+ return [result_text, out_result]
47
+
48
+ output_1 = gr.Textbox(label="Speech to Text")
49
+ output_2 = gr.Textbox(label="ChatGPT Output")
50
+
51
+ gr.Interface(
52
+ title = 'Voice to Text using OpenAI (KF)',
53
+ fn=transcribe,
54
+ inputs=[
55
+ gr.inputs.Audio(source="microphone", type="filepath")
56
+ ],
57
+
58
+ outputs=[
59
+ output_1, output_2
60
+ ],
61
+ live=True, allow_flagging=False).launch(share=True)
00-01_voice2text.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ import time
4
+ from pyChatGPT import ChatGPT
5
+ import warnings
6
+ from gtts import gTTS
7
+
8
+
9
+ warnings.filterwarnings("ignore")
10
+ # model = whisper.load_model("base")
11
+ model = whisper.load_model("base")
12
+
13
+ def transcribe(audio):
14
+
15
+ # load audio and pad/trim it to fit 30 seconds
16
+ audio = whisper.load_audio(audio)
17
+ audio = whisper.pad_or_trim(audio)
18
+
19
+ # make log-Mel spectrogram and move to the same device as the model
20
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
21
+
22
+ # decode the audio
23
+ options = whisper.DecodingOptions(fp16=False)
24
+ result = whisper.decode(model, mel, options)
25
+ result_text = result.text
26
+
27
+ # print the result
28
+ return result_text
29
+
30
+ output_1 = gr.Textbox(label="Speech to Text")
31
+
32
+ gr.Interface(
33
+ title = 'Voice to Text (KF)',
34
+ fn=transcribe,
35
+ inputs=[
36
+ gr.inputs.Audio(source="microphone", type="filepath")
37
+ ],
38
+
39
+ outputs=[
40
+ output_1
41
+ ],
42
+ live=True, allow_flagging=False).launch(share=True)
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: V2t
3
- emoji: 🐢
4
- colorFrom: red
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.44.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: v2t
3
+ app_file: 00-01_voice2text.py
 
 
4
  sdk: gradio
5
+ sdk_version: 3.42.0
 
 
6
  ---
 
 
requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ altair==5.1.1
5
+ anyio==3.7.1
6
+ async-timeout==4.0.3
7
+ attrs==23.1.0
8
+ beautifulsoup4==4.12.2
9
+ certifi==2023.7.22
10
+ charset-normalizer==3.2.0
11
+ click==8.1.7
12
+ contourpy==1.1.0
13
+ cycler==0.11.0
14
+ dacite==1.8.1
15
+ exceptiongroup==1.1.3
16
+ fastapi==0.103.1
17
+ ffmpy==0.3.1
18
+ filelock==3.12.3
19
+ fonttools==4.42.1
20
+ frozenlist==1.4.0
21
+ fsspec==2023.9.0
22
+ gradio==3.42.0
23
+ gradio_client==0.5.0
24
+ gTTS==2.3.2
25
+ h11==0.14.0
26
+ htmlmin==0.1.12
27
+ httpcore==0.17.3
28
+ httpx==0.24.1
29
+ huggingface-hub==0.16.4
30
+ idna==3.4
31
+ ImageHash==4.3.1
32
+ importlib-resources==6.0.1
33
+ Jinja2==3.1.2
34
+ joblib==1.3.2
35
+ jsonschema==4.19.0
36
+ jsonschema-specifications==2023.7.1
37
+ kiwisolver==1.4.5
38
+ llvmlite==0.40.1
39
+ markdownify==0.11.6
40
+ MarkupSafe==2.1.3
41
+ matplotlib==3.7.2
42
+ more-itertools==10.1.0
43
+ mpmath==1.3.0
44
+ multidict==6.0.4
45
+ multimethod==1.9.1
46
+ networkx==3.1
47
+ numba==0.57.1
48
+ numpy==1.23.5
49
+ openai==0.28.0
50
+ openai-whisper @ git+https://github.com/openai/whisper.git@e8622f9afc4eba139bf796c210f5c01081000472
51
+ orjson==3.9.5
52
+ outcome==1.2.0
53
+ packaging==23.1
54
+ pandas==2.0.3
55
+ pandas-profiling==3.6.6
56
+ patsy==0.5.3
57
+ phik==0.12.3
58
+ Pillow==10.0.0
59
+ pyChatGPT==0.4.3.3
60
+ pydantic==1.10.12
61
+ pydub==0.25.1
62
+ pyparsing==3.0.9
63
+ PySocks==1.7.1
64
+ python-dateutil==2.8.2
65
+ python-multipart==0.0.6
66
+ pytz==2023.3
67
+ PyWavelets==1.4.1
68
+ PyYAML==6.0.1
69
+ referencing==0.30.2
70
+ regex==2023.8.8
71
+ requests==2.31.0
72
+ rpds-py==0.10.2
73
+ scipy==1.11.2
74
+ seaborn==0.12.2
75
+ selenium==4.12.0
76
+ semantic-version==2.10.0
77
+ six==1.16.0
78
+ sniffio==1.3.0
79
+ sortedcontainers==2.4.0
80
+ soupsieve==2.5
81
+ starlette==0.27.0
82
+ statsmodels==0.14.0
83
+ sympy==1.12
84
+ tangled-up-in-unicode==0.2.0
85
+ tiktoken==0.3.3
86
+ toolz==0.12.0
87
+ torch==2.0.1
88
+ tqdm==4.66.1
89
+ trio==0.22.2
90
+ trio-websocket==0.10.4
91
+ typeguard==2.13.3
92
+ typing_extensions==4.7.1
93
+ tzdata==2023.3
94
+ undetected-chromedriver==3.5.3
95
+ urllib3==2.0.4
96
+ uvicorn==0.23.2
97
+ visions==0.7.5
98
+ websockets==11.0.3
99
+ whisper==1.1.10
100
+ wordcloud==1.9.2
101
+ wsproto==1.2.0
102
+ yarl==1.9.2
103
+ ydata-profiling==4.5.1
104
+ zipp==3.16.2