Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import re | |
| from collections import Counter | |
| # USAS category information | |
| USAS_CATEGORIES = { | |
| 'A': ('General & Abstract Terms', '#fee2e2'), | |
| 'B': ('Body & Individual', '#fce7f3'), | |
| 'C': ('Arts & Crafts', '#f3e8ff'), | |
| 'E': ('Emotional Actions', '#ffe4e6'), | |
| 'F': ('Food & Farming', '#dcfce7'), | |
| 'G': ('Government & Public', '#dbeafe'), | |
| 'H': ('Architecture & Buildings', '#fef3c7'), | |
| 'I': ('Money & Commerce', '#d1fae5'), | |
| 'K': ('Entertainment & Sports', '#e9d5ff'), | |
| 'L': ('Life & Living Things', '#ecfccb'), | |
| 'M': ('Movement & Location', '#cffafe'), | |
| 'N': ('Numbers & Measurement', '#e0e7ff'), | |
| 'O': ('Substances & Objects', '#fed7aa'), | |
| 'P': ('Education', '#ccfbf1'), | |
| 'Q': ('Linguistic Actions', '#e0f2fe'), | |
| 'S': ('Social Actions', '#fae8ff'), | |
| 'T': ('Time', '#fef9c3'), | |
| 'W': ('World & Environment', '#bbf7d0'), | |
| 'X': ('Psychological Actions', '#ddd6fe'), | |
| 'Y': ('Science & Technology', '#bfdbfe'), | |
| 'Z': ('Names & Grammatical', '#e5e7eb') | |
| } | |
| def get_category_color(tag): | |
| """Get color for a tag based on its first letter""" | |
| if not tag: | |
| return '#f3f4f6' | |
| first_char = tag[0].upper() | |
| return USAS_CATEGORIES.get(first_char, ('#f3f4f6', 'Unknown'))[1] | |
| def get_category_name(tag): | |
| """Get category name for a tag""" | |
| if not tag: | |
| return 'Unknown' | |
| first_char = tag[0].upper() | |
| return USAS_CATEGORIES.get(first_char, ('Unknown', '#f3f4f6'))[0] | |
| def parse_tagged_text(text): | |
| """ | |
| Parse pre-tagged text in underscore format: word_TAG | |
| Example: I_Z8 love_E2+ walking_M1 | |
| """ | |
| if not text.strip(): | |
| return "Please enter some tagged text to visualize.", "", "" | |
| tokens = [] | |
| # Split by whitespace and parse each token | |
| parts = text.split() | |
| for part in parts: | |
| if '_' in part: | |
| # word_TAG format - split on last underscore to handle words with underscores | |
| word, tag = part.rsplit('_', 1) | |
| tokens.append((word, tag)) | |
| else: | |
| # No tag found, treat as untagged | |
| tokens.append((part, 'Z99')) | |
| if not tokens: | |
| return "No tagged content found. Please check the format.", "", "" | |
| # Create HTML visualization | |
| html_parts = ['<div style="line-height: 2.5; font-size: 16px;">'] | |
| tag_counts = Counter() | |
| for word, tag in tokens: | |
| # Count tags (use first letter of primary tag) | |
| first_char = tag.split('/')[0][0].upper() if tag else 'Z' | |
| tag_counts[first_char] += 1 | |
| # Get color | |
| color = get_category_color(tag) | |
| category = get_category_name(tag) | |
| # Create colored span with tooltip | |
| html_parts.append( | |
| f'<span style="background-color: {color}; ' | |
| f'padding: 4px 8px; margin: 2px; border-radius: 6px; ' | |
| f'display: inline-block; border: 2px solid {color}; ' | |
| f'cursor: help;" ' | |
| f'title="{word}\nTag: {tag}\nCategory: {category}">' | |
| f'<strong>{word}</strong><br>' | |
| f'<small style="font-size: 11px; font-family: monospace;">{tag}</small>' | |
| f'</span> ' | |
| ) | |
| html_parts.append('</div>') | |
| # Create statistics table | |
| stats_html = ['<div style="margin-top: 20px;"><h3>Tag Distribution</h3>', | |
| '<table style="width: 100%; border-collapse: collapse;">', | |
| '<tr style="background-color: #f3f4f6;">', | |
| '<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">Category</th>', | |
| '<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">Name</th>', | |
| '<th style="padding: 8px; text-align: right; border: 1px solid #ddd;">Count</th>', | |
| '<th style="padding: 8px; text-align: right; border: 1px solid #ddd;">%</th>', | |
| '</tr>'] | |
| total = sum(tag_counts.values()) | |
| for cat, count in tag_counts.most_common(): | |
| cat_name = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[0] | |
| color = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[1] | |
| percentage = (count / total * 100) if total > 0 else 0 | |
| stats_html.append( | |
| f'<tr><td style="padding: 8px; border: 1px solid #ddd; background-color: {color};">' | |
| f'<strong>{cat}</strong></td>' | |
| f'<td style="padding: 8px; border: 1px solid #ddd;">{cat_name}</td>' | |
| f'<td style="padding: 8px; border: 1px solid #ddd; text-align: right;">{count}</td>' | |
| f'<td style="padding: 8px; border: 1px solid #ddd; text-align: right;">{percentage:.1f}%</td></tr>' | |
| ) | |
| stats_html.append('</table></div>') | |
| # Create legend | |
| legend_html = ['<div style="margin-top: 20px;"><h3>USAS Categories Legend</h3>', | |
| '<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;">'] | |
| for cat, (name, color) in sorted(USAS_CATEGORIES.items()): | |
| legend_html.append( | |
| f'<div style="background-color: {color}; padding: 10px; ' | |
| f'border-radius: 6px; border: 2px solid {color};">' | |
| f'<strong>{cat}</strong> - {name}</div>' | |
| ) | |
| legend_html.append('</div></div>') | |
| return ''.join(html_parts), ''.join(stats_html), ''.join(legend_html) | |
| # Create Gradio interface | |
| with gr.Blocks(title="UCREL USAS Semantic Tag Visualizer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # 🏷️ UCREL USAS Semantic Tag Visualizer | |
| This app visualizes pre-tagged text using the **UCREL Semantic Analysis System (USAS)** tags. | |
| **Format:** Use underscore notation: `word_TAG` | |
| Example: `I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7` | |
| Simply paste your tagged text below and click **Visualize**! | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| label="Paste your tagged text here (word_TAG format)", | |
| placeholder="Example: I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 ._PUNC", | |
| lines=10 | |
| ) | |
| submit_btn = gr.Button("🎨 Visualize Tags", variant="primary", size="lg") | |
| with gr.Row(): | |
| with gr.Column(): | |
| tagged_output = gr.HTML(label="Visualized Tags") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| stats_output = gr.HTML(label="Statistics") | |
| with gr.Column(scale=1): | |
| legend_output = gr.HTML(label="Legend") | |
| gr.Markdown( | |
| """ | |
| ### About USAS Tags | |
| The UCREL Semantic Analysis System (USAS) categorizes words into 21 major semantic fields: | |
| - **A**: General & Abstract Terms (e.g., A5.1+ = good, A5.1- = bad) | |
| - **B**: Body & Individual (e.g., B1 = anatomy) | |
| - **E**: Emotional Actions (e.g., E2+ = like/love, E3- = violent/angry) | |
| - **F**: Food & Farming (e.g., F1 = food) | |
| - **G**: Government & Public (e.g., G1.1c = government, G1.2 = politics) | |
| - **I**: Money & Commerce (e.g., I1.1 = money: affluent) | |
| - **M**: Movement & Location (e.g., M1 = moving, M7 = places) | |
| - **N**: Numbers & Measurement (e.g., N1 = numbers, N5+ = quantities: many) | |
| - **P**: Education (e.g., P1 = education) | |
| - **Q**: Linguistic Actions (e.g., Q2.2 = speech acts, Q3 = language) | |
| - **S**: Social Actions (e.g., S2mf = people, S8+ = helping) | |
| - **T**: Time (e.g., T1.3 = time: period) | |
| - **X**: Psychological Actions (e.g., X2.1 = thought, X2.2+ = knowledge) | |
| - **Z**: Names & Grammatical (e.g., Z5 = grammatical words, Z8 = pronouns) | |
| - And more categories! | |
| **Tag modifiers:** | |
| - **+** = positive (e.g., A5.1+ = good) | |
| - **-** = negative (e.g., A5.1- = bad) | |
| - **/** = multiple tags (e.g., M1/M7/S2mf = moving/place/person) | |
| **Hover over tagged words** to see detailed information about each semantic tag. | |
| --- | |
| Learn more: [USAS Documentation](https://ucrel.lancs.ac.uk/usas/) | |
| """ | |
| ) | |
| # Examples | |
| gr.Examples( | |
| examples=[ | |
| ["I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 on_Z5 sunny_W4 days_T1.3 ._PUNC"], | |
| ["The_Z5 company_I2.1 announced_Q2.2 record_N5.1+ profits_I1.1 yesterday_T1.1.1 ._PUNC"], | |
| ["She_Z8 thinks_X2.1 education_P1 is_A3+ very_A13.3 important_A11.1+ ._PUNC"], | |
| ["As_Z5 an_Z5 immigrant_M1/M7/S2mf in_Z5 the_Z5 United_Z2c States_Z2c you_Z8mf have_A9+ the_Z5 right_S7.4+ to_Z5 receive_A9+ language_Q3 access_M1 services_S8+ ._PUNC"], | |
| ["The_Z5 Civil_G1.1 Rights_A5.3+ Act_A1.1.1 of_Z5 1964_N1 and_Z5 the_Z5 Voting_G1.2 Rights_A5.3+ Act_A1.1.1 of_Z5 1965_N1 protect_S8+/A15+ your_Z8 linguistic_Q3 rights_S7.4+ ._PUNC"] | |
| ], | |
| inputs=text_input | |
| ) | |
| submit_btn.click( | |
| fn=parse_tagged_text, | |
| inputs=text_input, | |
| outputs=[tagged_output, stats_output, legend_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |