from flask import Flask, render_template, request, jsonify, send_file import os import shutil from zipfile import ZipFile from io import BytesIO import hashlib app = Flask(__name__) UPLOAD_FOLDER = 'audio_files' TSV_FILE = 'audio_mapping.tsv' LOCAL_SENTENCES_FILE = "last_uploaded_sentences.txt" os.makedirs(UPLOAD_FOLDER, exist_ok=True) @app.route('/') def index(): return render_template('index.html') @app.route('/upload-sentences', methods=['POST']) def upload(): file = request.files['file'] sentences = file.read().decode('utf-8').split('\n') sentences = [s.strip() for s in sentences if s.strip()] with open(LOCAL_SENTENCES_FILE, 'w', encoding='utf-8') as f: f.write('\n'.join(sentences)) # Wipe old mappings because we have a brand new text file if os.path.exists(TSV_FILE): open(TSV_FILE, 'w').close() return jsonify(sentences) @app.route('/get-session', methods=['GET']) def get_session(): sentences = [] if os.path.exists(LOCAL_SENTENCES_FILE): with open(LOCAL_SENTENCES_FILE, 'r', encoding='utf-8') as f: sentences = [line.strip() for line in f if line.strip()] recorded_indices = [] mappings = {} if os.path.exists(TSV_FILE): with open(TSV_FILE, encoding='utf-8') as f: for line in f: parts = line.rstrip('\n').split('\t') if len(parts) == 2: mappings[parts[1]] = parts[0] for i, sent in enumerate(sentences): filename = mappings.get(sent) if filename and os.path.exists(os.path.join(UPLOAD_FOLDER, filename)): recorded_indices.append(i) return jsonify({ "sentences": sentences, "recorded_indices": recorded_indices }) @app.route('/upload-audio', methods=['POST']) def upload_audio(): audio = request.files['audio'] sentence = request.form.get("sentence_text") md5hash = hashlib.md5(sentence.encode()).hexdigest() filename = f"{md5hash}.webm" path = os.path.join(UPLOAD_FOLDER, filename) audio.save(path) mappings = {} if os.path.exists(TSV_FILE): with open(TSV_FILE, encoding='utf-8') as f: for line in f: parts = line.rstrip('\n').split('\t') if len(parts) == 2: mappings[parts[0]] = parts[1] mappings[filename] = sentence with open(TSV_FILE, 'w', encoding='utf-8') as f: for fn, sent in mappings.items(): f.write(f"{fn}\t{sent}\n") return 'Audio received', 200 @app.route('/download-recordings') def download_recordings(): mappings = [] if os.path.exists(TSV_FILE): with open(TSV_FILE, encoding='utf-8') as f: for line in f: parts = line.rstrip('\n').split('\t') if len(parts) == 2 and os.path.exists(os.path.join(UPLOAD_FOLDER, parts[0])): mappings.append((parts[0], parts[1])) tsv_content = "audio_filename\tsentence\n" + '\n'.join(f"{fn}\t{sent}" for fn, sent in mappings) memory_file = BytesIO() with ZipFile(memory_file, 'w') as zf: for filename, _ in mappings: zf.write(os.path.join(UPLOAD_FOLDER, filename), filename) zf.writestr("mapping.tsv", tsv_content) memory_file.seek(0) return send_file(memory_file, as_attachment=True, download_name='recordings.zip') @app.route('/delete-data', methods=['POST']) def delete_data(): if os.path.exists(UPLOAD_FOLDER): shutil.rmtree(UPLOAD_FOLDER) os.makedirs(UPLOAD_FOLDER, exist_ok=True) for f in [TSV_FILE, LOCAL_SENTENCES_FILE]: if os.path.exists(f): os.remove(f) return 'Data deleted', 200 if __name__ == '__main__': app.run(host="0.0.0.0", port=7860)