plant-msyn / add_legend_to_plot.py
Yoshigold's picture
Update webapp with Scripts files for HF Spaces deployment
f342936 verified
#!/usr/bin/env python3
"""
Add a color legend to microsynteny plot images.
Reads gene_colors.txt and adds a legend with colored boxes and gene annotations.
Legend is placed on the right side, 0.25 from top.
Usage:
python add_legend_to_plot.py <work_dir>
Input files (in work_dir):
- microsynteny_plot.png
- microsynteny_plot.svg
- gene_colors.txt (format: Gene_ID\tColor\tConfidence\tAnnotation\tMatches)
Output:
- Overwrites the plot files with legend added
"""
import os
import re
import sys
from pathlib import Path
import matplotlib
matplotlib.use('Agg') # Non-interactive backend
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from PIL import Image
# Centralized logging
from logger import get_logger, log_info, log_warning, log_error, die
# Module logger
logger = get_logger(__name__)
# Color mapping for matplotlib
COLOR_MAP = {
# Single-letter matplotlib color codes
'r': '#E74C3C',
'b': '#3498DB',
'g': '#27AE60',
'c': '#1ABC9C',
'm': '#E91E63',
'y': '#F1C40F',
'k': '#000000',
'w': '#FFFFFF',
# Full color names
'red': '#E74C3C',
'blue': '#3498DB',
'green': '#27AE60',
'orange': '#E67E22',
'purple': '#9B59B6',
'cyan': '#1ABC9C',
'magenta': '#E91E63',
'brown': '#795548',
'pink': '#FF69B4',
'olive': '#808000',
'grey': '#95A5A6',
'gray': '#95A5A6',
'yellow': '#F1C40F',
# Extended CSS color names that might be used
'mediumvioletred': '#C71585',
'darkblue': '#00008B',
'darkgreen': '#006400',
'darkred': '#8B0000',
'darkorange': '#FF8C00',
'darkviolet': '#9400D3',
'deeppink': '#FF1493',
'dodgerblue': '#1E90FF',
'forestgreen': '#228B22',
'gold': '#FFD700',
'indianred': '#CD5C5C',
'limegreen': '#32CD32',
'navy': '#000080',
'orangered': '#FF4500',
'royalblue': '#4169E1',
'seagreen': '#2E8B57',
'steelblue': '#4682B4',
'teal': '#008080',
'tomato': '#FF6347',
'violet': '#EE82EE',
}
def parse_gene_colors(filepath):
"""Parse gene_colors.txt and return list of (gene_id, color, annotation) tuples using pandas.
Only includes genes that have real annotations (not just gene ID)."""
# Load as TSV with pandas
df = pd.read_csv(filepath, sep='\t', header=None, dtype=str,
names=['gene_id', 'color', 'confidence', 'annotation', 'matches'],
na_values=[], keep_default_na=False, comment='#')
# Filter to genes with annotations
df = df[df['annotation'].str.strip() != '']
if df.empty:
return []
# Map colors - no fallback, unknown colors will raise an error
def map_color(c):
c_lower = c.lower()
if c_lower not in COLOR_MAP:
log_error(f"Unknown color '{c}' in gene_colors.txt. Please use a known color name or add it to COLOR_MAP.")
die(f"Unknown color: {c}")
return COLOR_MAP[c_lower]
df['color'] = df['color'].apply(map_color)
# Convert to list of tuples
return list(zip(df['gene_id'], df['color'], df['annotation']))
def check_has_annotations(genes):
"""Check if any gene has a real annotation (not just gene ID)."""
for gene_id, color, annotation in genes:
if annotation and annotation != gene_id:
return True
return False
def add_legend_to_png(png_path, genes, output_path=None):
"""Add legend to PNG image."""
if output_path is None:
output_path = png_path
# Load original image
img = Image.open(png_path)
orig_width, orig_height = img.size
# Calculate legend dimensions (doubled for better visibility)
n_genes = len(genes)
box_width = 60 # Wider rectangle
box_height = 30 # Shorter height
corner_radius = 6 # Rounded corners
line_height = 50
legend_padding = 30
text_offset = 80
# Estimate text width (rough approximation)
max_text_len = max(len(ann) for _, _, ann in genes)
text_width = max_text_len * 10 # Approximate pixels per character
legend_width = box_width + text_offset + text_width + legend_padding * 2
legend_height = n_genes * line_height + legend_padding * 2
# Create new image with space for legend on the right
new_width = orig_width + legend_width + 20
new_img = Image.new('RGBA', (new_width, orig_height), (255, 255, 255, 255))
new_img.paste(img, (0, 0))
# Create legend using matplotlib
fig, ax = plt.subplots(figsize=(legend_width/100, legend_height/100), dpi=100)
ax.set_xlim(0, legend_width)
ax.set_ylim(0, legend_height)
ax.axis('off')
ax.set_facecolor('white')
# Draw legend items
y_pos = legend_height - legend_padding - line_height/2
for gene_id, color, annotation in genes:
# Draw colored box (rectangular with rounded corners)
rect = mpatches.FancyBboxPatch(
(legend_padding, y_pos - box_height/2),
box_width, box_height,
boxstyle=mpatches.BoxStyle("Round", pad=0, rounding_size=corner_radius),
facecolor=color,
edgecolor='none',
linewidth=0
)
ax.add_patch(rect)
# Draw annotation text
ax.text(
legend_padding + box_width + 16,
y_pos,
annotation,
fontsize=18,
verticalalignment='center',
fontfamily='DejaVu Sans'
)
y_pos -= line_height
# Save legend to temporary file
legend_path = str(png_path) + '.legend.png'
fig.savefig(legend_path, dpi=100, bbox_inches='tight', pad_inches=0.1,
facecolor='white', edgecolor='none')
plt.close(fig)
# Load legend and paste onto new image
legend_img = Image.open(legend_path)
# Position legend on right side, overlapping with plot whitespace
legend_x = orig_width - 200 # Move 200px into the plot area (closer to content)
legend_y = int(orig_height * 0.25)
# Ensure legend fits
if legend_y + legend_img.height > orig_height:
legend_y = max(10, orig_height - legend_img.height - 10)
new_img.paste(legend_img, (legend_x, legend_y))
# Save result
new_img.convert('RGB').save(output_path, 'PNG', quality=95)
# Clean up
os.remove(legend_path)
return True
def add_legend_to_svg(svg_path, genes, output_path=None):
"""Add legend to SVG by appending SVG elements."""
if output_path is None:
output_path = svg_path
# Read original SVG
with open(svg_path, 'r') as f:
svg_content = f.read()
# Find the viewBox to determine dimensions
import re
viewbox_match = re.search(r'viewBox="([^"]+)"', svg_content)
if not viewbox_match:
# Try to find width/height
width_match = re.search(r'width="(\d+)"', svg_content)
height_match = re.search(r'height="(\d+)"', svg_content)
if width_match and height_match:
vb_width = float(width_match.group(1))
vb_height = float(height_match.group(1))
else:
log_warning("Could not determine SVG dimensions")
return False
else:
vb_parts = viewbox_match.group(1).split()
vb_width = float(vb_parts[2])
vb_height = float(vb_parts[3])
# Calculate legend position and dimensions (rectangular with rounded corners)
n_genes = len(genes)
box_width = 45 # Wider rectangle
box_height = 22 # Shorter height
corner_radius = 4 # Rounded corners
line_height = 36
legend_padding = 20
legend_x = vb_width - 100 # Move 100 units into plot whitespace (closer to content)
legend_y = vb_height * 0.25
# Create legend SVG elements
legend_elements = []
y_pos = legend_y
for gene_id, color, annotation in genes:
# Color should already be hex from parse_gene_colors
hex_color = color
# Colored box (rectangular with rounded corners)
legend_elements.append(
f'<rect x="{legend_x}" y="{y_pos}" width="{box_width}" height="{box_height}" '
f'rx="{corner_radius}" ry="{corner_radius}" fill="{hex_color}" stroke="none"/>'
)
# Text annotation
text_x = legend_x + box_width + 12
text_y = y_pos + box_height * 0.75
legend_elements.append(
f'<text x="{text_x}" y="{text_y}" fill="#000000" font-family="DejaVu Sans" font-size="18">{annotation}</text>'
)
y_pos += line_height
# Calculate new viewBox width to accommodate legend
max_text_len = max(len(ann) for _, _, ann in genes)
new_width = vb_width + 40 + box_width + 8 + max_text_len * 8
# Insert legend elements before closing </svg> tag
legend_group = f'<g id="legend">\n' + '\n'.join(legend_elements) + '\n</g>\n'
# Update viewBox
if viewbox_match:
new_viewbox = f'viewBox="0 0 {new_width} {vb_height}"'
svg_content = svg_content.replace(viewbox_match.group(0), new_viewbox)
# Also update width attribute if present
svg_content = re.sub(r'width="(\d+)"', f'width="{int(new_width)}"', svg_content)
# Insert legend before </svg>
svg_content = svg_content.replace('</svg>', legend_group + '</svg>')
# Save
with open(output_path, 'w') as f:
f.write(svg_content)
return True
def main():
if len(sys.argv) < 2:
log_error("Usage: python add_legend_to_plot.py <work_dir>")
sys.exit(1)
work_dir = Path(sys.argv[1])
gene_colors_file = work_dir / 'gene_colors.txt'
if not gene_colors_file.exists():
log_error(f"{gene_colors_file} not found")
sys.exit(1)
# Parse gene colors (only genes with real annotations are returned)
genes = parse_gene_colors(gene_colors_file)
if not genes:
log_info("No genes with annotations found - skipping legend")
sys.exit(0)
log_info(f"Adding legend with {len(genes)} annotated genes...")
# Process each plot format
png_path = work_dir / 'microsynteny_plot.png'
if png_path.exists():
try:
add_legend_to_png(png_path, genes)
log_info(f"Added legend to PNG")
except Exception as e:
log_warning(f"Could not add legend to PNG: {e}")
svg_path = work_dir / 'microsynteny_plot.svg'
if svg_path.exists():
try:
add_legend_to_svg(svg_path, genes)
log_info(f"Added legend to SVG")
except Exception as e:
log_warning(f"Could not add legend to SVG: {e}")
log_info("Legend addition complete")
if __name__ == '__main__':
main()