kambris commited on
Commit
a5703b6
·
verified ·
1 Parent(s): e9f910e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +223 -0
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ from collections import Counter
4
+
5
+ # USAS category information
6
+ USAS_CATEGORIES = {
7
+ 'A': ('General & Abstract Terms', '#fee2e2'),
8
+ 'B': ('Body & Individual', '#fce7f3'),
9
+ 'C': ('Arts & Crafts', '#f3e8ff'),
10
+ 'E': ('Emotional Actions', '#ffe4e6'),
11
+ 'F': ('Food & Farming', '#dcfce7'),
12
+ 'G': ('Government & Public', '#dbeafe'),
13
+ 'H': ('Architecture & Buildings', '#fef3c7'),
14
+ 'I': ('Money & Commerce', '#d1fae5'),
15
+ 'K': ('Entertainment & Sports', '#e9d5ff'),
16
+ 'L': ('Life & Living Things', '#ecfccb'),
17
+ 'M': ('Movement & Location', '#cffafe'),
18
+ 'N': ('Numbers & Measurement', '#e0e7ff'),
19
+ 'O': ('Substances & Objects', '#fed7aa'),
20
+ 'P': ('Education', '#ccfbf1'),
21
+ 'Q': ('Linguistic Actions', '#e0f2fe'),
22
+ 'S': ('Social Actions', '#fae8ff'),
23
+ 'T': ('Time', '#fef9c3'),
24
+ 'W': ('World & Environment', '#bbf7d0'),
25
+ 'X': ('Psychological Actions', '#ddd6fe'),
26
+ 'Y': ('Science & Technology', '#bfdbfe'),
27
+ 'Z': ('Names & Grammatical', '#e5e7eb')
28
+ }
29
+
30
+ def get_category_color(tag):
31
+ """Get color for a tag based on its first letter"""
32
+ if not tag:
33
+ return '#f3f4f6'
34
+ first_char = tag[0].upper()
35
+ return USAS_CATEGORIES.get(first_char, ('#f3f4f6', 'Unknown'))[1]
36
+
37
+ def get_category_name(tag):
38
+ """Get category name for a tag"""
39
+ if not tag:
40
+ return 'Unknown'
41
+ first_char = tag[0].upper()
42
+ return USAS_CATEGORIES.get(first_char, ('Unknown', '#f3f4f6'))[0]
43
+
44
+ def parse_tagged_text(text):
45
+ """
46
+ Parse pre-tagged text in underscore format: word_TAG
47
+ Example: I_Z8 love_E2+ walking_M1
48
+ """
49
+ if not text.strip():
50
+ return "Please enter some tagged text to visualize.", "", ""
51
+
52
+ tokens = []
53
+
54
+ # Split by whitespace and parse each token
55
+ parts = text.split()
56
+ for part in parts:
57
+ if '_' in part:
58
+ # word_TAG format - split on last underscore to handle words with underscores
59
+ word, tag = part.rsplit('_', 1)
60
+ tokens.append((word, tag))
61
+ else:
62
+ # No tag found, treat as untagged
63
+ tokens.append((part, 'Z99'))
64
+
65
+ if not tokens:
66
+ return "No tagged content found. Please check the format.", "", ""
67
+
68
+ # Create HTML visualization
69
+ html_parts = ['<div style="line-height: 2.5; font-size: 16px;">']
70
+
71
+ tag_counts = Counter()
72
+
73
+ for word, tag in tokens:
74
+ # Count tags (use first letter of primary tag)
75
+ first_char = tag.split('/')[0][0].upper() if tag else 'Z'
76
+ tag_counts[first_char] += 1
77
+
78
+ # Get color
79
+ color = get_category_color(tag)
80
+ category = get_category_name(tag)
81
+
82
+ # Create colored span with tooltip
83
+ html_parts.append(
84
+ f'<span style="background-color: {color}; '
85
+ f'padding: 4px 8px; margin: 2px; border-radius: 6px; '
86
+ f'display: inline-block; border: 2px solid {color}; '
87
+ f'cursor: help;" '
88
+ f'title="{word}\nTag: {tag}\nCategory: {category}">'
89
+ f'<strong>{word}</strong><br>'
90
+ f'<small style="font-size: 11px; font-family: monospace;">{tag}</small>'
91
+ f'</span> '
92
+ )
93
+
94
+ html_parts.append('</div>')
95
+
96
+ # Create statistics table
97
+ stats_html = ['<div style="margin-top: 20px;"><h3>Tag Distribution</h3>',
98
+ '<table style="width: 100%; border-collapse: collapse;">',
99
+ '<tr style="background-color: #f3f4f6;">',
100
+ '<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">Category</th>',
101
+ '<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">Name</th>',
102
+ '<th style="padding: 8px; text-align: right; border: 1px solid #ddd;">Count</th>',
103
+ '<th style="padding: 8px; text-align: right; border: 1px solid #ddd;">%</th>',
104
+ '</tr>']
105
+
106
+ total = sum(tag_counts.values())
107
+ for cat, count in tag_counts.most_common():
108
+ cat_name = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[0]
109
+ color = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[1]
110
+ percentage = (count / total * 100) if total > 0 else 0
111
+ stats_html.append(
112
+ f'<tr><td style="padding: 8px; border: 1px solid #ddd; background-color: {color};">'
113
+ f'<strong>{cat}</strong></td>'
114
+ f'<td style="padding: 8px; border: 1px solid #ddd;">{cat_name}</td>'
115
+ f'<td style="padding: 8px; border: 1px solid #ddd; text-align: right;">{count}</td>'
116
+ f'<td style="padding: 8px; border: 1px solid #ddd; text-align: right;">{percentage:.1f}%</td></tr>'
117
+ )
118
+
119
+ stats_html.append('</table></div>')
120
+
121
+ # Create legend
122
+ legend_html = ['<div style="margin-top: 20px;"><h3>USAS Categories Legend</h3>',
123
+ '<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;">']
124
+
125
+ for cat, (name, color) in sorted(USAS_CATEGORIES.items()):
126
+ legend_html.append(
127
+ f'<div style="background-color: {color}; padding: 10px; '
128
+ f'border-radius: 6px; border: 2px solid {color};">'
129
+ f'<strong>{cat}</strong> - {name}</div>'
130
+ )
131
+
132
+ legend_html.append('</div></div>')
133
+
134
+ return ''.join(html_parts), ''.join(stats_html), ''.join(legend_html)
135
+
136
+ # Create Gradio interface
137
+ with gr.Blocks(title="UCREL USAS Semantic Tag Visualizer", theme=gr.themes.Soft()) as demo:
138
+ gr.Markdown(
139
+ """
140
+ # 🏷️ UCREL USAS Semantic Tag Visualizer
141
+
142
+ This app visualizes pre-tagged text using the **UCREL Semantic Analysis System (USAS)** tags.
143
+
144
+ **Format:** Use underscore notation: `word_TAG`
145
+
146
+ Example: `I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7`
147
+
148
+ Simply paste your tagged text below and click **Visualize**!
149
+ """
150
+ )
151
+
152
+ with gr.Row():
153
+ with gr.Column():
154
+ text_input = gr.Textbox(
155
+ label="Paste your tagged text here (word_TAG format)",
156
+ placeholder="Example: I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 ._PUNC",
157
+ lines=10
158
+ )
159
+ submit_btn = gr.Button("🎨 Visualize Tags", variant="primary", size="lg")
160
+
161
+ with gr.Row():
162
+ with gr.Column():
163
+ tagged_output = gr.HTML(label="Visualized Tags")
164
+
165
+ with gr.Row():
166
+ with gr.Column(scale=1):
167
+ stats_output = gr.HTML(label="Statistics")
168
+ with gr.Column(scale=1):
169
+ legend_output = gr.HTML(label="Legend")
170
+
171
+ gr.Markdown(
172
+ """
173
+ ### About USAS Tags
174
+
175
+ The UCREL Semantic Analysis System (USAS) categorizes words into 21 major semantic fields:
176
+ - **A**: General & Abstract Terms (e.g., A5.1+ = good, A5.1- = bad)
177
+ - **B**: Body & Individual (e.g., B1 = anatomy)
178
+ - **E**: Emotional Actions (e.g., E2+ = like/love, E3- = violent/angry)
179
+ - **F**: Food & Farming (e.g., F1 = food)
180
+ - **G**: Government & Public (e.g., G1.1c = government, G1.2 = politics)
181
+ - **I**: Money & Commerce (e.g., I1.1 = money: affluent)
182
+ - **M**: Movement & Location (e.g., M1 = moving, M7 = places)
183
+ - **N**: Numbers & Measurement (e.g., N1 = numbers, N5+ = quantities: many)
184
+ - **P**: Education (e.g., P1 = education)
185
+ - **Q**: Linguistic Actions (e.g., Q2.2 = speech acts, Q3 = language)
186
+ - **S**: Social Actions (e.g., S2mf = people, S8+ = helping)
187
+ - **T**: Time (e.g., T1.3 = time: period)
188
+ - **X**: Psychological Actions (e.g., X2.1 = thought, X2.2+ = knowledge)
189
+ - **Z**: Names & Grammatical (e.g., Z5 = grammatical words, Z8 = pronouns)
190
+ - And more categories!
191
+
192
+ **Tag modifiers:**
193
+ - **+** = positive (e.g., A5.1+ = good)
194
+ - **-** = negative (e.g., A5.1- = bad)
195
+ - **/** = multiple tags (e.g., M1/M7/S2mf = moving/place/person)
196
+
197
+ **Hover over tagged words** to see detailed information about each semantic tag.
198
+
199
+ ---
200
+ Learn more: [USAS Documentation](https://ucrel.lancs.ac.uk/usas/)
201
+ """
202
+ )
203
+
204
+ # Examples
205
+ gr.Examples(
206
+ examples=[
207
+ ["I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 on_Z5 sunny_W4 days_T1.3 ._PUNC"],
208
+ ["The_Z5 company_I2.1 announced_Q2.2 record_N5.1+ profits_I1.1 yesterday_T1.1.1 ._PUNC"],
209
+ ["She_Z8 thinks_X2.1 education_P1 is_A3+ very_A13.3 important_A11.1+ ._PUNC"],
210
+ ["As_Z5 an_Z5 immigrant_M1/M7/S2mf in_Z5 the_Z5 United_Z2c States_Z2c you_Z8mf have_A9+ the_Z5 right_S7.4+ to_Z5 receive_A9+ language_Q3 access_M1 services_S8+ ._PUNC"],
211
+ ["The_Z5 Civil_G1.1 Rights_A5.3+ Act_A1.1.1 of_Z5 1964_N1 and_Z5 the_Z5 Voting_G1.2 Rights_A5.3+ Act_A1.1.1 of_Z5 1965_N1 protect_S8+/A15+ your_Z8 linguistic_Q3 rights_S7.4+ ._PUNC"]
212
+ ],
213
+ inputs=text_input
214
+ )
215
+
216
+ submit_btn.click(
217
+ fn=parse_tagged_text,
218
+ inputs=text_input,
219
+ outputs=[tagged_output, stats_output, legend_output]
220
+ )
221
+
222
+ if __name__ == "__main__":
223
+ demo.launch()