| | import json |
| | import matplotlib.pyplot as plt |
| | import numpy as np |
| |
|
| | |
| | with open('output/codette_antibody_designs_20250912_150658.json', 'r') as f: |
| | data = json.load(f) |
| |
|
| | |
| | lengths = [] |
| | scores = [] |
| | categories = [] |
| |
|
| | for binder in data['personalized_binders']: |
| | length = len(binder['sequence']) |
| | lengths.append(length) |
| | scores.append(binder['personalization_score']) |
| | categories.append('Short' if length < 100 else 'Medium' if length < 200 else 'Long') |
| |
|
| | |
| | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) |
| |
|
| | |
| | colors = ['red' if c == 'Short' else 'blue' if c == 'Medium' else 'green' for c in categories] |
| | ax1.scatter(lengths, scores, c=colors, s=100) |
| | ax1.set_xlabel('Sequence Length (amino acids)') |
| | ax1.set_ylabel('Personalization Score') |
| | ax1.set_title('Sequence Length vs. Personalization Score') |
| |
|
| | |
| | from matplotlib.lines import Line2D |
| | legend_elements = [Line2D([0], [0], marker='o', color='w', markerfacecolor=c, label=l, markersize=10) |
| | for c, l in zip(['red', 'blue', 'green'], ['Short', 'Medium', 'Long'])] |
| | ax1.legend(handles=legend_elements) |
| |
|
| | |
| | ax2.bar(['Short', 'Medium', 'Long'], |
| | [np.mean([s for s, c in zip(scores, categories) if c == cat]) |
| | for cat in ['Short', 'Medium', 'Long']]) |
| | ax2.set_ylabel('Average Personalization Score') |
| | ax2.set_title('Average Score by Sequence Category') |
| |
|
| | |
| | plt.tight_layout() |
| | plt.savefig('output/sequence_analysis.png', dpi=300, bbox_inches='tight') |
| | plt.close() |
| |
|
| | |
| | print("\nSequence Analysis Summary:") |
| | print("-" * 50) |
| | print(f"Total sequences: {len(lengths)}") |
| | print("\nBy Category:") |
| | for category in ['Short', 'Medium', 'Long']: |
| | cat_scores = [s for s, c in zip(scores, categories) if c == category] |
| | cat_lengths = [l for l, c in zip(lengths, categories) if c == category] |
| | if cat_scores: |
| | print(f"\n{category} sequences:") |
| | print(f"Count: {len(cat_scores)}") |
| | print(f"Length range: {min(cat_lengths)}-{max(cat_lengths)} amino acids") |
| | print(f"Average score: {np.mean(cat_scores):.3f}") |
| | print(f"Score range: {min(cat_scores):.3f}-{max(cat_scores):.3f}") |