Spaces:

kambris
/

LLMLPsemantic

Sleeping

App Files Files Community

LLMLPsemantic / app.py

kambris

Create app.py

a5703b6 verified about 2 months ago

raw

history blame contribute delete

9.14 kB

	import gradio as gr
	import re
	from collections import Counter

	# USAS category information
	USAS_CATEGORIES = {
	'A': ('General & Abstract Terms', '#fee2e2'),
	'B': ('Body & Individual', '#fce7f3'),
	'C': ('Arts & Crafts', '#f3e8ff'),
	'E': ('Emotional Actions', '#ffe4e6'),
	'F': ('Food & Farming', '#dcfce7'),
	'G': ('Government & Public', '#dbeafe'),
	'H': ('Architecture & Buildings', '#fef3c7'),
	'I': ('Money & Commerce', '#d1fae5'),
	'K': ('Entertainment & Sports', '#e9d5ff'),
	'L': ('Life & Living Things', '#ecfccb'),
	'M': ('Movement & Location', '#cffafe'),
	'N': ('Numbers & Measurement', '#e0e7ff'),
	'O': ('Substances & Objects', '#fed7aa'),
	'P': ('Education', '#ccfbf1'),
	'Q': ('Linguistic Actions', '#e0f2fe'),
	'S': ('Social Actions', '#fae8ff'),
	'T': ('Time', '#fef9c3'),
	'W': ('World & Environment', '#bbf7d0'),
	'X': ('Psychological Actions', '#ddd6fe'),
	'Y': ('Science & Technology', '#bfdbfe'),
	'Z': ('Names & Grammatical', '#e5e7eb')
	}

	def get_category_color(tag):
	"""Get color for a tag based on its first letter"""
	if not tag:
	return '#f3f4f6'
	first_char = tag[0].upper()
	return USAS_CATEGORIES.get(first_char, ('#f3f4f6', 'Unknown'))[1]

	def get_category_name(tag):
	"""Get category name for a tag"""
	if not tag:
	return 'Unknown'
	first_char = tag[0].upper()
	return USAS_CATEGORIES.get(first_char, ('Unknown', '#f3f4f6'))[0]

	def parse_tagged_text(text):
	"""
	Parse pre-tagged text in underscore format: word_TAG
	Example: I_Z8 love_E2+ walking_M1
	"""
	if not text.strip():
	return "Please enter some tagged text to visualize.", "", ""

	tokens = []

	# Split by whitespace and parse each token
	parts = text.split()
	for part in parts:
	if '_' in part:
	# word_TAG format - split on last underscore to handle words with underscores
	word, tag = part.rsplit('_', 1)
	tokens.append((word, tag))
	else:
	# No tag found, treat as untagged
	tokens.append((part, 'Z99'))

	if not tokens:
	return "No tagged content found. Please check the format.", "", ""

	# Create HTML visualization
	html_parts = ['<div style="line-height: 2.5; font-size: 16px;">']

	tag_counts = Counter()

	for word, tag in tokens:
	# Count tags (use first letter of primary tag)
	first_char = tag.split('/')[0][0].upper() if tag else 'Z'
	tag_counts[first_char] += 1

	# Get color
	color = get_category_color(tag)
	category = get_category_name(tag)

	# Create colored span with tooltip
	html_parts.append(
	f'<span style="background-color: {color}; '
	f'padding: 4px 8px; margin: 2px; border-radius: 6px; '
	f'display: inline-block; border: 2px solid {color}; '
	f'cursor: help;" '
	f'title="{word}\nTag: {tag}\nCategory: {category}">'
	f'<strong>{word}</strong><br>'
	f'<small style="font-size: 11px; font-family: monospace;">{tag}</small>'
	f'</span> '
	)

	html_parts.append('</div>')

	# Create statistics table
	stats_html = ['<div style="margin-top: 20px;"><h3>Tag Distribution</h3>',
	'<table style="width: 100%; border-collapse: collapse;">',
	'<tr style="background-color: #f3f4f6;">',
	'<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">Category</th>',
	'<th style="padding: 8px; text-align: left; border: 1px solid #ddd;">Name</th>',
	'<th style="padding: 8px; text-align: right; border: 1px solid #ddd;">Count</th>',
	'<th style="padding: 8px; text-align: right; border: 1px solid #ddd;">%</th>',
	'</tr>']

	total = sum(tag_counts.values())
	for cat, count in tag_counts.most_common():
	cat_name = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[0]
	color = USAS_CATEGORIES.get(cat, ('Unknown', '#f3f4f6'))[1]
	percentage = (count / total * 100) if total > 0 else 0
	stats_html.append(
	f'<tr><td style="padding: 8px; border: 1px solid #ddd; background-color: {color};">'
	f'<strong>{cat}</strong></td>'
	f'<td style="padding: 8px; border: 1px solid #ddd;">{cat_name}</td>'
	f'<td style="padding: 8px; border: 1px solid #ddd; text-align: right;">{count}</td>'
	f'<td style="padding: 8px; border: 1px solid #ddd; text-align: right;">{percentage:.1f}%</td></tr>'
	)

	stats_html.append('</table></div>')

	# Create legend
	legend_html = ['<div style="margin-top: 20px;"><h3>USAS Categories Legend</h3>',
	'<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;">']

	for cat, (name, color) in sorted(USAS_CATEGORIES.items()):
	legend_html.append(
	f'<div style="background-color: {color}; padding: 10px; '
	f'border-radius: 6px; border: 2px solid {color};">'
	f'<strong>{cat}</strong> - {name}</div>'
	)

	legend_html.append('</div></div>')

	return ''.join(html_parts), ''.join(stats_html), ''.join(legend_html)

	# Create Gradio interface
	with gr.Blocks(title="UCREL USAS Semantic Tag Visualizer", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🏷️ UCREL USAS Semantic Tag Visualizer

	This app visualizes pre-tagged text using the UCREL Semantic Analysis System (USAS) tags.

	Format: Use underscore notation: `word_TAG`

	Example: `I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7`

	Simply paste your tagged text below and click Visualize!
	"""
	)

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Paste your tagged text here (word_TAG format)",
	placeholder="Example: I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 ._PUNC",
	lines=10
	)
	submit_btn = gr.Button("🎨 Visualize Tags", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	tagged_output = gr.HTML(label="Visualized Tags")

	with gr.Row():
	with gr.Column(scale=1):
	stats_output = gr.HTML(label="Statistics")
	with gr.Column(scale=1):
	legend_output = gr.HTML(label="Legend")

	gr.Markdown(
	"""
	### About USAS Tags

	The UCREL Semantic Analysis System (USAS) categorizes words into 21 major semantic fields:
	- A: General & Abstract Terms (e.g., A5.1+ = good, A5.1- = bad)
	- B: Body & Individual (e.g., B1 = anatomy)
	- E: Emotional Actions (e.g., E2+ = like/love, E3- = violent/angry)
	- F: Food & Farming (e.g., F1 = food)
	- G: Government & Public (e.g., G1.1c = government, G1.2 = politics)
	- I: Money & Commerce (e.g., I1.1 = money: affluent)
	- M: Movement & Location (e.g., M1 = moving, M7 = places)
	- N: Numbers & Measurement (e.g., N1 = numbers, N5+ = quantities: many)
	- P: Education (e.g., P1 = education)
	- Q: Linguistic Actions (e.g., Q2.2 = speech acts, Q3 = language)
	- S: Social Actions (e.g., S2mf = people, S8+ = helping)
	- T: Time (e.g., T1.3 = time: period)
	- X: Psychological Actions (e.g., X2.1 = thought, X2.2+ = knowledge)
	- Z: Names & Grammatical (e.g., Z5 = grammatical words, Z8 = pronouns)
	- And more categories!

	Tag modifiers:
	- + = positive (e.g., A5.1+ = good)
	- - = negative (e.g., A5.1- = bad)
	- / = multiple tags (e.g., M1/M7/S2mf = moving/place/person)

	Hover over tagged words to see detailed information about each semantic tag.

	---
	Learn more: [USAS Documentation](https://ucrel.lancs.ac.uk/usas/)
	"""
	)

	# Examples
	gr.Examples(
	examples=[
	["I_Z8 love_E2+ walking_M1 in_Z5 the_Z5 park_M7 on_Z5 sunny_W4 days_T1.3 ._PUNC"],
	["The_Z5 company_I2.1 announced_Q2.2 record_N5.1+ profits_I1.1 yesterday_T1.1.1 ._PUNC"],
	["She_Z8 thinks_X2.1 education_P1 is_A3+ very_A13.3 important_A11.1+ ._PUNC"],
	["As_Z5 an_Z5 immigrant_M1/M7/S2mf in_Z5 the_Z5 United_Z2c States_Z2c you_Z8mf have_A9+ the_Z5 right_S7.4+ to_Z5 receive_A9+ language_Q3 access_M1 services_S8+ ._PUNC"],
	["The_Z5 Civil_G1.1 Rights_A5.3+ Act_A1.1.1 of_Z5 1964_N1 and_Z5 the_Z5 Voting_G1.2 Rights_A5.3+ Act_A1.1.1 of_Z5 1965_N1 protect_S8+/A15+ your_Z8 linguistic_Q3 rights_S7.4+ ._PUNC"]
	],
	inputs=text_input
	)

	submit_btn.click(
	fn=parse_tagged_text,
	inputs=text_input,
	outputs=[tagged_output, stats_output, legend_output]
	)

	if __name__ == "__main__":
	demo.launch()