SABRe-v2 / app.py
Lguyogiro's picture
glow up
f57659d
from flask import Flask, render_template, request, jsonify, send_file
import os
import shutil
from zipfile import ZipFile
from io import BytesIO
import hashlib
app = Flask(__name__)
UPLOAD_FOLDER = 'audio_files'
TSV_FILE = 'audio_mapping.tsv'
LOCAL_SENTENCES_FILE = "last_uploaded_sentences.txt"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/upload-sentences', methods=['POST'])
def upload():
file = request.files['file']
sentences = file.read().decode('utf-8').split('\n')
sentences = [s.strip() for s in sentences if s.strip()]
with open(LOCAL_SENTENCES_FILE, 'w', encoding='utf-8') as f:
f.write('\n'.join(sentences))
# Wipe old mappings because we have a brand new text file
if os.path.exists(TSV_FILE):
open(TSV_FILE, 'w').close()
return jsonify(sentences)
@app.route('/get-session', methods=['GET'])
def get_session():
sentences = []
if os.path.exists(LOCAL_SENTENCES_FILE):
with open(LOCAL_SENTENCES_FILE, 'r', encoding='utf-8') as f:
sentences = [line.strip() for line in f if line.strip()]
recorded_indices = []
mappings = {}
if os.path.exists(TSV_FILE):
with open(TSV_FILE, encoding='utf-8') as f:
for line in f:
parts = line.rstrip('\n').split('\t')
if len(parts) == 2:
mappings[parts[1]] = parts[0]
for i, sent in enumerate(sentences):
filename = mappings.get(sent)
if filename and os.path.exists(os.path.join(UPLOAD_FOLDER, filename)):
recorded_indices.append(i)
return jsonify({
"sentences": sentences,
"recorded_indices": recorded_indices
})
@app.route('/upload-audio', methods=['POST'])
def upload_audio():
audio = request.files['audio']
sentence = request.form.get("sentence_text")
md5hash = hashlib.md5(sentence.encode()).hexdigest()
filename = f"{md5hash}.webm"
path = os.path.join(UPLOAD_FOLDER, filename)
audio.save(path)
mappings = {}
if os.path.exists(TSV_FILE):
with open(TSV_FILE, encoding='utf-8') as f:
for line in f:
parts = line.rstrip('\n').split('\t')
if len(parts) == 2:
mappings[parts[0]] = parts[1]
mappings[filename] = sentence
with open(TSV_FILE, 'w', encoding='utf-8') as f:
for fn, sent in mappings.items():
f.write(f"{fn}\t{sent}\n")
return 'Audio received', 200
@app.route('/download-recordings')
def download_recordings():
mappings = []
if os.path.exists(TSV_FILE):
with open(TSV_FILE, encoding='utf-8') as f:
for line in f:
parts = line.rstrip('\n').split('\t')
if len(parts) == 2 and os.path.exists(os.path.join(UPLOAD_FOLDER, parts[0])):
mappings.append((parts[0], parts[1]))
tsv_content = "audio_filename\tsentence\n" + '\n'.join(f"{fn}\t{sent}" for fn, sent in mappings)
memory_file = BytesIO()
with ZipFile(memory_file, 'w') as zf:
for filename, _ in mappings:
zf.write(os.path.join(UPLOAD_FOLDER, filename), filename)
zf.writestr("mapping.tsv", tsv_content)
memory_file.seek(0)
return send_file(memory_file, as_attachment=True, download_name='recordings.zip')
@app.route('/delete-data', methods=['POST'])
def delete_data():
if os.path.exists(UPLOAD_FOLDER):
shutil.rmtree(UPLOAD_FOLDER)
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
for f in [TSV_FILE, LOCAL_SENTENCES_FILE]:
if os.path.exists(f):
os.remove(f)
return 'Data deleted', 200
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)