Spaces:
Sleeping
Sleeping
File size: 3,124 Bytes
699b0d1 174b073 0824a9a 699b0d1 d48a95f 699b0d1 d48a95f 699b0d1 d48a95f 699b0d1 aef9afa c521596 6a4ab13 aef9afa c521596 aef9afa c521596 aef9afa c521596 aef9afa c521596 aef9afa 699b0d1 2398853 699b0d1 2398853 699b0d1 aef9afa 2398853 aef9afa 2398853 aef9afa 699b0d1 0824a9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import re
from flask import Flask, render_template, request
from asgiref.wsgi import WsgiToAsgi
# Create Flask app
app = Flask(__name__)
def process_ssml_text(ssml_text):
"""
Process SSML text to extract content between SSML tags and format it properly.
"""
# Regular expression to find all textnorm tags and their content
result = []
# Process each line separately
lines = ssml_text.split('\n')
for line in lines:
# Create a new line by removing all SSML tags and just using the clean text
if "<textnorm" in line:
clean_line = re.sub(r'<textnorm[^>]*>([^<]*)</textnorm>', r'\1', line)
# Replace specific symbols to match the expected output
clean_line = clean_line.replace('A☼M', 'A※M').replace('P☼M', 'P※M')
result.append(clean_line)
return '\n'.join(result)
def process_old_ssml_text(ssml_text):
"""
Process old SSML text format to extract and format content.
"""
result = []
# Process each line separately
lines = ssml_text.split('\n')
for line in lines:
# Remove DECIMAL and CURRENCY tags, keeping the spoken text and any trailing content
if "<" in line:
clean_line = re.sub(r'<(?:DECIMAL|DATE02|NUMERIC_RANGE|CURRENCY|DATE|TIME|DURATION|TELEPHONE|EMAIL|URL|SYMBOL|ORDINAL|CARDINAL|MEASUREMENT|ROMAN|FRACTION|MATH_EXPRESSION|MATH_EQUATION|BANK_NUMBER|ADDRESS|ALPHA_NUMERIC|DIGIT|LONG_NUMBER|LATIN|PHONE|CONTACT|WEB|NUMERIC|ALPHABETIC|SPECIAL_CHAR|PUNCTUATION|SPACE|TAG_REPLACEMENT|LANGUAGE|LOCALE):\s*([^>]+)>\s*', r'\1 ', line)
# Remove textnorm tags
clean_line = re.sub(r'<textnorm[^>]*>([^<]*)</textnorm>', r'\1', clean_line)
# Replace specific symbols to match the expected output
clean_line = clean_line.replace('A☼M', 'A※M').replace('P☼M', 'P※M')
# Strip extra whitespace
clean_line = clean_line.strip()
result.append(clean_line)
return '\n'.join(result)
@app.route('/', methods=['GET', 'POST'])
def index():
processed_text = ""
input_text = ""
current_endpoint = "/"
if request.method == 'POST':
input_text = request.form.get('input_text', '')
processed_text = process_ssml_text(input_text)
return render_template('index.html', processed_text=processed_text, input_text=input_text, current_endpoint=current_endpoint)
@app.route('/old', methods=['GET', 'POST'])
def old_index():
processed_text = ""
input_text = ""
current_endpoint = "/old"
if request.method == 'POST':
input_text = request.form.get('input_text', '')
processed_text = process_old_ssml_text(input_text)
return render_template('index.html', processed_text=processed_text, input_text=input_text, current_endpoint=current_endpoint)
if __name__ == '__main__':
app.run(debug=True)
# Wrap Flask app with ASGI adapter for Uvicorn/Hugging Face Spaces compatibility
asgi_app = WsgiToAsgi(app)
|