Anonumous commited on
Commit
8f8f39e
·
verified ·
1 Parent(s): 1636870

Add notebook

Browse files
Files changed (1) hide show
  1. notebook.ipynb +163 -0
notebook.ipynb ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "metadata": {
23
+ "id": "MgzOBm5ggGts"
24
+ },
25
+ "outputs": [],
26
+ "source": [
27
+ "from transformers import AutoModelForCausalLM, AutoTokenizer, AutoFeatureExtractor\n",
28
+ "import torch\n",
29
+ "import librosa\n",
30
+ "import gradio as gr\n",
31
+ "import numpy as np\n",
32
+ "from scipy.signal import resample"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "source": [
38
+ "model = AutoModelForCausalLM.from_pretrained(\"Vikhrmodels/Borealis\", trust_remote_code=True)\n",
39
+ "tokenizer = AutoTokenizer.from_pretrained(\"Vikhrmodels/Borealis\")\n",
40
+ "extractor = AutoFeatureExtractor.from_pretrained(\"Vikhrmodels/Borealis\")"
41
+ ],
42
+ "metadata": {
43
+ "id": "-jATl7uegLVb"
44
+ },
45
+ "execution_count": null,
46
+ "outputs": []
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "source": [
51
+ "model.eval()\n",
52
+ "model = model.to(\"cuda\")"
53
+ ],
54
+ "metadata": {
55
+ "id": "y78mNR_6gLX1"
56
+ },
57
+ "execution_count": null,
58
+ "outputs": []
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "source": [
63
+ "def transcribe(audio):\n",
64
+ " if audio is None:\n",
65
+ " return \"Аудио не предоставлено.\"\n",
66
+ "\n",
67
+ " sr, waveform = audio\n",
68
+ "\n",
69
+ "\n",
70
+ " if waveform.ndim > 1:\n",
71
+ " waveform = np.mean(waveform, axis=1)\n",
72
+ "\n",
73
+ "\n",
74
+ " waveform = waveform.astype(np.float32) / 32768.0\n",
75
+ "\n",
76
+ " target_sr = 16000\n",
77
+ " if sr != target_sr:\n",
78
+ " num_samples = int(len(waveform) * target_sr / sr)\n",
79
+ " waveform = resample(waveform, num_samples)\n",
80
+ " sr = target_sr\n",
81
+ "\n",
82
+ " proc = extractor(\n",
83
+ " waveform,\n",
84
+ " sampling_rate=sr,\n",
85
+ " padding=\"max_length\",\n",
86
+ " max_length=480_000,\n",
87
+ " return_attention_mask=True,\n",
88
+ " return_tensors=\"pt\",\n",
89
+ " )\n",
90
+ "\n",
91
+ " mel = proc.input_features.squeeze(0).to(\"cuda\")\n",
92
+ " att_mask = proc.attention_mask.squeeze(0).to(\"cuda\")\n",
93
+ "\n",
94
+ " with torch.inference_mode():\n",
95
+ " transcript = model.generate(mel=mel, att_mask=att_mask, **generation_params)\n",
96
+ "\n",
97
+ " return transcript"
98
+ ],
99
+ "metadata": {
100
+ "id": "q890Jhp3gLaB"
101
+ },
102
+ "execution_count": null,
103
+ "outputs": []
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "source": [
108
+ "generation_params = {\n",
109
+ " \"max_new_tokens\": 350,\n",
110
+ " \"do_sample\": True,\n",
111
+ " \"top_p\": 0.9,\n",
112
+ " \"top_k\": 50,\n",
113
+ " \"temperature\": 0.2,\n",
114
+ "}"
115
+ ],
116
+ "metadata": {
117
+ "id": "jl4M9fXVjpLC"
118
+ },
119
+ "execution_count": null,
120
+ "outputs": []
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "source": [
125
+ "with gr.Blocks(theme=gr.themes.Soft()) as demo:\n",
126
+ " gr.Markdown(\"<h1 style='text-align: center; margin-bottom: 20px;'>Демо Borealis</h1>\")\n",
127
+ " with gr.Row():\n",
128
+ " with gr.Column(scale=2):\n",
129
+ " audio_input = gr.Audio(sources=[\"microphone\", \"upload\"], type=\"numpy\", label=\"Запишите аудио или загрузите файл\", interactive=True)\n",
130
+ " with gr.Column(scale=1):\n",
131
+ " btn = gr.Button(\"Распознать\", variant=\"primary\", size=\"lg\")\n",
132
+ " output = gr.Textbox(label=\"Расшифровка аудио\", lines=6, show_copy_button=True, interactive=False)\n",
133
+ " btn.click(transcribe, inputs=audio_input, outputs=output)"
134
+ ],
135
+ "metadata": {
136
+ "id": "jJ-aDtBNgLcM"
137
+ },
138
+ "execution_count": null,
139
+ "outputs": []
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "source": [
144
+ "demo.launch(share=True)"
145
+ ],
146
+ "metadata": {
147
+ "id": "WJehoSe9gLeI",
148
+ "collapsed": true
149
+ },
150
+ "execution_count": null,
151
+ "outputs": []
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "source": [],
156
+ "metadata": {
157
+ "id": "oXquIX2QgLgI"
158
+ },
159
+ "execution_count": null,
160
+ "outputs": []
161
+ }
162
+ ]
163
+ }