Spaces:
Paused
Paused
| from flask import Flask, render_template, request, jsonify, send_file | |
| import os | |
| import shutil | |
| from zipfile import ZipFile | |
| from io import BytesIO | |
| import hashlib | |
| app = Flask(__name__) | |
| UPLOAD_FOLDER = 'audio_files' | |
| TSV_FILE = 'audio_mapping.tsv' | |
| LOCAL_SENTENCES_FILE = "last_uploaded_sentences.txt" | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| def index(): | |
| return render_template('index.html') | |
| def upload(): | |
| file = request.files['file'] | |
| sentences = file.read().decode('utf-8').split('\n') | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| with open(LOCAL_SENTENCES_FILE, 'w', encoding='utf-8') as f: | |
| f.write('\n'.join(sentences)) | |
| # Wipe old mappings because we have a brand new text file | |
| if os.path.exists(TSV_FILE): | |
| open(TSV_FILE, 'w').close() | |
| return jsonify(sentences) | |
| def get_session(): | |
| sentences = [] | |
| if os.path.exists(LOCAL_SENTENCES_FILE): | |
| with open(LOCAL_SENTENCES_FILE, 'r', encoding='utf-8') as f: | |
| sentences = [line.strip() for line in f if line.strip()] | |
| recorded_indices = [] | |
| mappings = {} | |
| if os.path.exists(TSV_FILE): | |
| with open(TSV_FILE, encoding='utf-8') as f: | |
| for line in f: | |
| parts = line.rstrip('\n').split('\t') | |
| if len(parts) == 2: | |
| mappings[parts[1]] = parts[0] | |
| for i, sent in enumerate(sentences): | |
| filename = mappings.get(sent) | |
| if filename and os.path.exists(os.path.join(UPLOAD_FOLDER, filename)): | |
| recorded_indices.append(i) | |
| return jsonify({ | |
| "sentences": sentences, | |
| "recorded_indices": recorded_indices | |
| }) | |
| def upload_audio(): | |
| audio = request.files['audio'] | |
| sentence = request.form.get("sentence_text") | |
| md5hash = hashlib.md5(sentence.encode()).hexdigest() | |
| filename = f"{md5hash}.webm" | |
| path = os.path.join(UPLOAD_FOLDER, filename) | |
| audio.save(path) | |
| mappings = {} | |
| if os.path.exists(TSV_FILE): | |
| with open(TSV_FILE, encoding='utf-8') as f: | |
| for line in f: | |
| parts = line.rstrip('\n').split('\t') | |
| if len(parts) == 2: | |
| mappings[parts[0]] = parts[1] | |
| mappings[filename] = sentence | |
| with open(TSV_FILE, 'w', encoding='utf-8') as f: | |
| for fn, sent in mappings.items(): | |
| f.write(f"{fn}\t{sent}\n") | |
| return 'Audio received', 200 | |
| def download_recordings(): | |
| mappings = [] | |
| if os.path.exists(TSV_FILE): | |
| with open(TSV_FILE, encoding='utf-8') as f: | |
| for line in f: | |
| parts = line.rstrip('\n').split('\t') | |
| if len(parts) == 2 and os.path.exists(os.path.join(UPLOAD_FOLDER, parts[0])): | |
| mappings.append((parts[0], parts[1])) | |
| tsv_content = "audio_filename\tsentence\n" + '\n'.join(f"{fn}\t{sent}" for fn, sent in mappings) | |
| memory_file = BytesIO() | |
| with ZipFile(memory_file, 'w') as zf: | |
| for filename, _ in mappings: | |
| zf.write(os.path.join(UPLOAD_FOLDER, filename), filename) | |
| zf.writestr("mapping.tsv", tsv_content) | |
| memory_file.seek(0) | |
| return send_file(memory_file, as_attachment=True, download_name='recordings.zip') | |
| def delete_data(): | |
| if os.path.exists(UPLOAD_FOLDER): | |
| shutil.rmtree(UPLOAD_FOLDER) | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| for f in [TSV_FILE, LOCAL_SENTENCES_FILE]: | |
| if os.path.exists(f): | |
| os.remove(f) | |
| return 'Data deleted', 200 | |
| if __name__ == '__main__': | |
| app.run(host="0.0.0.0", port=7860) | |