import gradio as gr import re from collections import Counter # USAS category information USAS_CATEGORIES = { 'A': ('General & Abstract Terms', '#fee2e2'), 'B': ('Body & Individual', '#fce7f3'), 'C': ('Arts & Crafts', '#f3e8ff'), 'E': ('Emotional Actions', '#ffe4e6'), 'F': ('Food & Farming', '#dcfce7'), 'G': ('Government & Public', '#dbeafe'), 'H': ('Architecture & Buildings', '#fef3c7'), 'I': ('Money & Commerce', '#d1fae5'), 'K': ('Entertainment & Sports', '#e9d5ff'), 'L': ('Life & Living Things', '#ecfccb'), 'M': ('Movement & Location', '#cffafe'), 'N': ('Numbers & Measurement', '#e0e7ff'), 'O': ('Substances & Objects', '#fed7aa'), 'P': ('Education', '#ccfbf1'), 'Q': ('Linguistic Actions', '#e0f2fe'), 'S': ('Social Actions', '#fae8ff'), 'T': ('Time', '#fef9c3'), 'W': ('World & Environment', '#bbf7d0'), 'X': ('Psychological Actions', '#ddd6fe'), 'Y': ('Science & Technology', '#bfdbfe'), 'Z': ('Names & Grammatical', '#e5e7eb') } def get_category_color(tag): """Get color for a tag based on its first letter""" if not tag: return '#f3f4f6' first_char = tag[0].upper() return USAS_CATEGORIES.get(first_char, ('#f3f4f6', 'Unknown'))[1] def get_category_name(tag): """Get category name for a tag""" if not tag: return 'Unknown' first_char = tag[0].upper() return USAS_CATEGORIES.get(first_char, ('Unknown', '#f3f4f6'))[0] def parse_tagged_text(text): """ Parse pre-tagged text in underscore format: word_TAG Example: I_Z8 love_E2+ walking_M1 """ if not text.strip(): return "Please enter some tagged text to visualize.", "", "" tokens = [] # Split by whitespace and parse each token parts = text.split() for part in parts: if '_' in part: # word_TAG format - split on last underscore to handle words with underscores word, tag = part.rsplit('_', 1) tokens.append((word, tag)) else: # No tag found, treat as untagged tokens.append((part, 'Z99')) if not tokens: return "No tagged content found. Please check the format.", "", "" # Create HTML visualization html_parts = ['
'] tag_counts = Counter() for word, tag in tokens: # Count tags (use first letter of primary tag) first_char = tag.split('/')[0][0].upper() if tag else 'Z' tag_counts[first_char] += 1 # Get color color = get_category_color(tag) category = get_category_name(tag) # Create colored span with tooltip html_parts.append( f'' f'{word}
' f'{tag}' f'
' ) html_parts.append('
') # Create statistics table stats_html = ['

Tag Distribution

', '', '', '', '', '', '', ''] total = sum(tag_counts.values()) for cat, count in tag_counts.most_common(): cat_name = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[0] color = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[1] percentage = (count / total * 100) if total > 0 else 0 stats_html.append( f'' f'' f'' f'' ) stats_html.append('
CategoryNameCount%
' f'{cat}{cat_name}{count}{percentage:.1f}%
') # Create legend legend_html = ['

USAS Categories Legend

', '
'] for cat, (name, color) in sorted(USAS_CATEGORIES.items()): legend_html.append( f'
' f'{cat} - {name}
' ) legend_html.append('
') return ''.join(html_parts), ''.join(stats_html), ''.join(legend_html) # Create Gradio interface with gr.Blocks(title="UCREL USAS Semantic Tag Visualizer", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🏷️ UCREL USAS Semantic Tag Visualizer This app visualizes pre-tagged text using the **UCREL Semantic Analysis System (USAS)** tags. **Format:** Use underscore notation: `word_TAG` Example: `I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7` Simply paste your tagged text below and click **Visualize**! """ ) with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Paste your tagged text here (word_TAG format)", placeholder="Example: I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 ._PUNC", lines=10 ) submit_btn = gr.Button("🎨 Visualize Tags", variant="primary", size="lg") with gr.Row(): with gr.Column(): tagged_output = gr.HTML(label="Visualized Tags") with gr.Row(): with gr.Column(scale=1): stats_output = gr.HTML(label="Statistics") with gr.Column(scale=1): legend_output = gr.HTML(label="Legend") gr.Markdown( """ ### About USAS Tags The UCREL Semantic Analysis System (USAS) categorizes words into 21 major semantic fields: - **A**: General & Abstract Terms (e.g., A5.1+ = good, A5.1- = bad) - **B**: Body & Individual (e.g., B1 = anatomy) - **E**: Emotional Actions (e.g., E2+ = like/love, E3- = violent/angry) - **F**: Food & Farming (e.g., F1 = food) - **G**: Government & Public (e.g., G1.1c = government, G1.2 = politics) - **I**: Money & Commerce (e.g., I1.1 = money: affluent) - **M**: Movement & Location (e.g., M1 = moving, M7 = places) - **N**: Numbers & Measurement (e.g., N1 = numbers, N5+ = quantities: many) - **P**: Education (e.g., P1 = education) - **Q**: Linguistic Actions (e.g., Q2.2 = speech acts, Q3 = language) - **S**: Social Actions (e.g., S2mf = people, S8+ = helping) - **T**: Time (e.g., T1.3 = time: period) - **X**: Psychological Actions (e.g., X2.1 = thought, X2.2+ = knowledge) - **Z**: Names & Grammatical (e.g., Z5 = grammatical words, Z8 = pronouns) - And more categories! **Tag modifiers:** - **+** = positive (e.g., A5.1+ = good) - **-** = negative (e.g., A5.1- = bad) - **/** = multiple tags (e.g., M1/M7/S2mf = moving/place/person) **Hover over tagged words** to see detailed information about each semantic tag. --- Learn more: [USAS Documentation](https://ucrel.lancs.ac.uk/usas/) """ ) # Examples gr.Examples( examples=[ ["I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 on_Z5 sunny_W4 days_T1.3 ._PUNC"], ["The_Z5 company_I2.1 announced_Q2.2 record_N5.1+ profits_I1.1 yesterday_T1.1.1 ._PUNC"], ["She_Z8 thinks_X2.1 education_P1 is_A3+ very_A13.3 important_A11.1+ ._PUNC"], ["As_Z5 an_Z5 immigrant_M1/M7/S2mf in_Z5 the_Z5 United_Z2c States_Z2c you_Z8mf have_A9+ the_Z5 right_S7.4+ to_Z5 receive_A9+ language_Q3 access_M1 services_S8+ ._PUNC"], ["The_Z5 Civil_G1.1 Rights_A5.3+ Act_A1.1.1 of_Z5 1964_N1 and_Z5 the_Z5 Voting_G1.2 Rights_A5.3+ Act_A1.1.1 of_Z5 1965_N1 protect_S8+/A15+ your_Z8 linguistic_Q3 rights_S7.4+ ._PUNC"] ], inputs=text_input ) submit_btn.click( fn=parse_tagged_text, inputs=text_input, outputs=[tagged_output, stats_output, legend_output] ) if __name__ == "__main__": demo.launch()