Lguyogiro commited on
Commit
4d576e2
·
1 Parent(s): 7f4b8f4

sabre on docker

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. app.py +101 -0
  3. templates/index.html +225 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+ RUN pip install --no-cache-dir --upgrade flask
7
+
8
+ COPY . .
9
+
10
+ # Flask usually runs on 5000, but HF expects the container to listen on 7860
11
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file runs the backend for a simple read-aloud audio book recorder app.
3
+ It works as follows:
4
+ - a user selects and uploads a .txt file that contains 1 sentence per line
5
+ - the backend stores the
6
+ """
7
+ from flask import Flask, render_template, request, jsonify, send_file
8
+ import os
9
+ import glob
10
+ from zipfile import ZipFile
11
+ from io import BytesIO
12
+ import hashlib
13
+
14
+
15
+ app = Flask(__name__)
16
+ UPLOAD_FOLDER = 'audio_files'
17
+ TSV_FILE = 'audio_mapping.tsv'
18
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
+
20
+ LOCAL_SENTENCES_FILE = "last_uploaded_sentences.txt"
21
+
22
+
23
+ @app.route('/')
24
+ def index():
25
+ return render_template('index.html')
26
+
27
+
28
+ @app.route('/upload-sentences', methods=['POST'])
29
+ def upload():
30
+ file = request.files['file']
31
+ sentences = file.read().decode('utf-8').split('\n')
32
+ sentences = [s.strip() for s in sentences if s.strip()]
33
+ with open(LOCAL_SENTENCES_FILE, 'w', encoding='utf-8') as f:
34
+ f.write('\n'.join(sentences))
35
+ # when we get a new text file, we can ignore the old mappings we were
36
+ # keeping internally...
37
+ open(TSV_FILE, 'w').close()
38
+
39
+ return jsonify(sentences)
40
+
41
+
42
+ @app.route('/upload-audio', methods=['POST'])
43
+ def upload_audio():
44
+ audio = request.files['audio']
45
+ idx = request.form.get('sentence_idx', '0')
46
+ sentence = request.form.get("sentence_text")
47
+ md5hash = hashlib.md5(sentence.encode())
48
+ filename = f"{md5hash.hexdigest()}.webm"
49
+
50
+ path = os.path.join(UPLOAD_FOLDER, filename)
51
+ audio.save(path)
52
+
53
+ # # Update TSV with mapping (append if not present)
54
+ # with open(LOCAL_SENTENCES_FILE, encoding='utf-8') as f:
55
+ # sentences = [s.strip() for s in f if s.strip()]
56
+ # sentence = sentences[int(idx)] if int(idx) < len(sentences) else ""
57
+
58
+ # Make sure mapping is unique and up-to-date
59
+ mappings = {}
60
+ if os.path.exists(TSV_FILE):
61
+ with open(TSV_FILE, encoding='utf-8') as f:
62
+ for line in f:
63
+ parts = line.rstrip('\n').split('\t')
64
+ if len(parts) == 2:
65
+ mappings[parts[0]] = parts[1]
66
+ mappings[filename] = sentence
67
+ with open(TSV_FILE, 'w', encoding='utf-8') as f:
68
+ for fn, sent in mappings.items():
69
+ f.write(f"{fn}\t{sent}\n")
70
+ return 'Audio received', 200
71
+
72
+
73
+ @app.route('/download-recordings')
74
+ def download_recordings():
75
+ # Load mapping of audio files to sentences
76
+ mappings = []
77
+ if os.path.exists(TSV_FILE):
78
+ with open(TSV_FILE, encoding='utf-8') as f:
79
+ for line in f:
80
+ parts = line.rstrip('\n').split('\t')
81
+ if len(parts) == 2 and os.path.exists(os.path.join(UPLOAD_FOLDER, parts[0])):
82
+ mappings.append((parts[0], parts[1]))
83
+
84
+ tsv_content = "audio_filename\tsentence\n" + '\n'.join(f"{fn}\t{sent}" for fn, sent in mappings)
85
+ memory_file = BytesIO()
86
+ with ZipFile(memory_file, 'w') as zf:
87
+ # Add audio files
88
+ for filename, _ in mappings:
89
+ zf.write(os.path.join(UPLOAD_FOLDER, filename), filename)
90
+ os.remove(os.path.join(UPLOAD_FOLDER, filename))
91
+ # Add TSV mapping
92
+ zf.writestr("mapping.tsv", tsv_content)
93
+ memory_file.seek(0)
94
+
95
+ return send_file(memory_file, as_attachment=True,
96
+ download_name='recordings.zip')
97
+
98
+
99
+ if __name__ == '__main__':
100
+ app.run(host="0.0.0.0", port=7860)
101
+ # app.run(debug=True)
templates/index.html ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>SABRe: Simple Audio Book Recorder</title>
6
+ <link href="https://fonts.googleapis.com/css2?family=Mozilla+Text:wght@200..700&display=swap" rel="stylesheet">
7
+ <style>
8
+ body { font-family: "Mozilla Text", sans-serif; margin: 30px; background-color: #e8decc}
9
+ #sentence { font-size: 1.2em; margin-bottom: 20px; }
10
+ #controls button { margin: 0 5px; }
11
+ #uploadForm, #recorder { margin-bottom: 25px; }
12
+ button {
13
+ background-color: #e1ecf4;
14
+ border-radius: 3px;
15
+ border: 1px solid #7aa7c7;
16
+ box-shadow: rgba(255, 255, 255, .7) 0 1px 0 0 inset;
17
+ box-sizing: border-box;
18
+ color: #39739d;
19
+ cursor: pointer;
20
+ display: inline-block;
21
+ font-size: 14px;
22
+ font-weight: 400;
23
+ line-height: 1.15385;
24
+ margin: 0;
25
+ outline: none;
26
+ padding: 8px .8em;
27
+ position: relative;
28
+ text-align: center;
29
+ text-decoration: none;
30
+ user-select: none;
31
+ -webkit-user-select: none;
32
+ touch-action: manipulation;
33
+ vertical-align: baseline;
34
+ white-space: nowrap;
35
+ }
36
+
37
+ button:hover,
38
+ button:focus {
39
+ background-color: #b3d3ea;
40
+ color: #2c5777;
41
+ font-weight: bold;
42
+ }
43
+
44
+ button:focus {
45
+ box-shadow: 0 0 0 4px rgba(0, 149, 255, .15);
46
+ }
47
+
48
+ button:active {
49
+ background-color: #a0c7e4;
50
+ box-shadow: none;
51
+ color: #2c5f85;
52
+ }
53
+
54
+ button:disabled {
55
+ background-color: lightgrey;
56
+ box-shadow: none;
57
+ color: grey;
58
+ }
59
+ </style>
60
+ </head>
61
+ <body style="text-align: center; margin-left: 10em; margin-right: 10em">
62
+ <div >
63
+ <h1>SABRe: Simple Audio Book Recorder</h1>
64
+ <hr>
65
+ </div>
66
+ <div style="text-align: left;">
67
+ <span id="sentCntDisplay">Sentences recorded: 0</span>
68
+ <br>
69
+ <span id="durationDisplay">Total duration: 0</span>
70
+
71
+ </div>
72
+
73
+ <form id="uploadForm" enctype="multipart/form-data">
74
+ <input type="file" name="file" id="fileInput" accept=".txt" required>
75
+ <button type="submit">Upload selected file</button>
76
+ </form>
77
+ <div id="recorder" style="display: none;">
78
+ <div id="sentence"></div>
79
+ <div id="controls">
80
+ <button id="recordBtn">Record</button>
81
+ <button id="stopBtn" disabled>Stop</button>
82
+ <button id="nextBtn" disabled>Next Sentence</button>
83
+ </div>
84
+ <audio id="audioPlayback" controls style="display:none; margin-top:10px;"></audio>
85
+ </div>
86
+ <hr>
87
+ <button id="downloadBtn" style="margin-top: 30px; display:grid">Download Current Recordings</button>
88
+ <script>
89
+ document.getElementById('downloadBtn').onclick = function() {
90
+ window.location.href = '/download-recordings';
91
+ };
92
+ </script>
93
+ <script>
94
+ let sentences = [];
95
+ let current = 0;
96
+ let startTime;
97
+ let stopTime;
98
+ let totalTime = 0;
99
+ // load sentences from file on file upload
100
+ document.getElementById('uploadForm').onsubmit = async function(e) {
101
+ e.preventDefault();
102
+ let form = new FormData();
103
+ form.append('file', document.getElementById('fileInput').files[0]);
104
+ let res = await fetch('/upload-sentences', { method: 'POST', body: form });
105
+ sentences = await res.json();
106
+ current = 0;
107
+ showSentence();
108
+ document.getElementById('recorder').style.display = '';
109
+ document.getElementById('uploadForm').style.display = 'none';
110
+ };
111
+
112
+ function showSentence() {
113
+ if (current > 0){
114
+ var prev_sent = sentences[current - 1]
115
+ }
116
+ else{
117
+ var prev_sent = "None"
118
+ }
119
+ var current_sent = sentences[current];
120
+
121
+ if (current < sentences.length - 1){
122
+ var next_sent = sentences[current + 1] || "FINISHED."
123
+ }
124
+ else{
125
+ var next_sent = "None"
126
+ }
127
+ sent_container = document.getElementById('sentence');
128
+ while (sent_container.hasChildNodes()) {
129
+ sent_container.removeChild(sent_container.firstChild);
130
+ }
131
+ const pre_sent_p = document.createElement("p");
132
+ pre_sent_p.style.color = "grey";
133
+ pre_sent_p.innerText = "previous: " + prev_sent;
134
+
135
+ const sent_p = document.createElement("p");
136
+ sent_p.style.fontWeight = "bold";
137
+ sent_p.innerText = current_sent;
138
+
139
+ const post_sent_p = document.createElement("p");
140
+ post_sent_p.style.color="grey";
141
+ post_sent_p.innerText = "next: " + next_sent;
142
+
143
+ document.getElementById('sentence').appendChild(pre_sent_p);
144
+ document.getElementById('sentence').appendChild(sent_p);
145
+ document.getElementById('sentence').appendChild(post_sent_p);
146
+ document.getElementById('nextBtn').disabled = true;
147
+ document.getElementById('audioPlayback').style.display = 'none';
148
+ }
149
+
150
+ let mediaRecorder, audioChunks = [];
151
+ document.getElementById('recordBtn').onclick = async function() {
152
+ let stream = await navigator.mediaDevices.getUserMedia({ audio: true });
153
+ mediaRecorder = new MediaRecorder(stream);
154
+ audioChunks = [];
155
+ mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
156
+ mediaRecorder.onstop = async function() {
157
+ let blob = new Blob(audioChunks, { type: 'audio/webm' });
158
+ let form = new FormData();
159
+ form.append('audio', blob, 'sentence' + current + '.webm');
160
+ form.append('sentence_idx', current);
161
+ form.append("sentence_text", sentences[current])
162
+ await fetch('/upload-audio', { method: 'POST', body: form });
163
+ document.getElementById('audioPlayback').src = URL.createObjectURL(blob);
164
+ document.getElementById('audioPlayback').style.display = '';
165
+ document.getElementById('nextBtn').disabled = false;
166
+ };
167
+ mediaRecorder.start();
168
+
169
+ startTime = performance.now()
170
+
171
+ document.getElementById('recordBtn').disabled = true;
172
+ document.getElementById('stopBtn').disabled = false;
173
+ };
174
+ function getFormattedTime(t) {
175
+ time = t/1000; //seconds
176
+ unit = "seconds"
177
+ if (time >= 60) {
178
+ time /= 60
179
+ unit = "minutes"
180
+ }
181
+ if (time >=60) {
182
+ time /= 60
183
+ unit = "hours"
184
+ }
185
+ return time.toFixed(2) + " " + unit
186
+
187
+ }
188
+ document.getElementById('stopBtn').onclick = function() {
189
+ mediaRecorder.stop();
190
+ stopTime = performance.now();
191
+ document.getElementById('recordBtn').disabled = false;
192
+ document.getElementById('stopBtn').disabled = true;
193
+ };
194
+
195
+ // Show the download button when done
196
+ function showDownloadButton() {
197
+ document.getElementById('downloadBtn').style.display = '';
198
+ }
199
+
200
+ document.getElementById('nextBtn').onclick = function() {
201
+ if (current + 1 < sentences.length) {
202
+ current++;
203
+ showSentence();
204
+ } else {
205
+ document.getElementById('recorder').innerHTML = "<strong>Finished recording all sentences...you can download them with the button below.</strong>";
206
+ showDownloadButton();
207
+ }
208
+ var count_display = document.getElementById("sentCntDisplay");
209
+ count_display.textContent = "Sentences recorded: " + current
210
+
211
+
212
+ clip_dur = stopTime - startTime;
213
+ totalTime += clip_dur
214
+ console.log(getFormattedTime(totalTime))
215
+ var dur_display = document.getElementById("durationDisplay");
216
+ dur_display.textContent = "Total duration: " + getFormattedTime(totalTime);
217
+ };
218
+
219
+ // Download handler
220
+ document.getElementById('downloadBtn').onclick = function() {
221
+ window.location.href = '/download-recordings';
222
+ };
223
+ </script>
224
+ </body>
225
+ </html>