File size: 3,124 Bytes
699b0d1
 
174b073
 
0824a9a
699b0d1
 
 
 
 
 
 
 
 
 
 
 
 
 
d48a95f
 
 
699b0d1
d48a95f
 
699b0d1
d48a95f
699b0d1
 
 
aef9afa
 
 
 
 
 
 
 
 
 
 
 
c521596
6a4ab13
aef9afa
c521596
 
aef9afa
c521596
 
aef9afa
c521596
 
aef9afa
c521596
aef9afa
 
 
 
699b0d1
 
 
 
2398853
699b0d1
 
 
 
 
2398853
699b0d1
aef9afa
 
 
 
2398853
aef9afa
 
 
 
 
2398853
aef9afa
699b0d1
 
0824a9a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import re
from flask import Flask, render_template, request
from asgiref.wsgi import WsgiToAsgi

# Create Flask app
app = Flask(__name__)

def process_ssml_text(ssml_text):
    """
    Process SSML text to extract content between SSML tags and format it properly.
    """
    # Regular expression to find all textnorm tags and their content
    result = []
    
    # Process each line separately
    lines = ssml_text.split('\n')
    
    for line in lines:
        # Create a new line by removing all SSML tags and just using the clean text
        if "<textnorm" in line:
            
            clean_line = re.sub(r'<textnorm[^>]*>([^<]*)</textnorm>', r'\1', line)
        
            # Replace specific symbols to match the expected output
            clean_line = clean_line.replace('A☼M', 'A※M').replace('P☼M', 'P※M')
        
            result.append(clean_line)
    
    return '\n'.join(result)


def process_old_ssml_text(ssml_text):
    """
    Process old SSML text format to extract and format content.
    """
    result = []
    
    # Process each line separately
    lines = ssml_text.split('\n')
    
    for line in lines:
        # Remove DECIMAL and CURRENCY tags, keeping the spoken text and any trailing content
        if "<" in line:
            clean_line = re.sub(r'<(?:DECIMAL|DATE02|NUMERIC_RANGE|CURRENCY|DATE|TIME|DURATION|TELEPHONE|EMAIL|URL|SYMBOL|ORDINAL|CARDINAL|MEASUREMENT|ROMAN|FRACTION|MATH_EXPRESSION|MATH_EQUATION|BANK_NUMBER|ADDRESS|ALPHA_NUMERIC|DIGIT|LONG_NUMBER|LATIN|PHONE|CONTACT|WEB|NUMERIC|ALPHABETIC|SPECIAL_CHAR|PUNCTUATION|SPACE|TAG_REPLACEMENT|LANGUAGE|LOCALE):\s*([^>]+)>\s*', r'\1 ', line)
        
            # Remove textnorm tags
            clean_line = re.sub(r'<textnorm[^>]*>([^<]*)</textnorm>', r'\1', clean_line)
        
            # Replace specific symbols to match the expected output
            clean_line = clean_line.replace('A☼M', 'A※M').replace('P☼M', 'P※M')
        
            # Strip extra whitespace
            clean_line = clean_line.strip()
        
            result.append(clean_line)
    
    return '\n'.join(result) 


@app.route('/', methods=['GET', 'POST'])
def index():
    processed_text = ""
    input_text = ""
    current_endpoint = "/"
    
    if request.method == 'POST':
        input_text = request.form.get('input_text', '')
        processed_text = process_ssml_text(input_text)
    
    return render_template('index.html', processed_text=processed_text, input_text=input_text, current_endpoint=current_endpoint)

@app.route('/old', methods=['GET', 'POST'])
def old_index():
    processed_text = ""
    input_text = ""
    current_endpoint = "/old"
    
    if request.method == 'POST':
        input_text = request.form.get('input_text', '')
        processed_text = process_old_ssml_text(input_text)
    
    return render_template('index.html', processed_text=processed_text, input_text=input_text, current_endpoint=current_endpoint)

if __name__ == '__main__':
    app.run(debug=True)

# Wrap Flask app with ASGI adapter for Uvicorn/Hugging Face Spaces compatibility
asgi_app = WsgiToAsgi(app)