| """ |
| Report generation — JSON and Markdown output for senator profiles. |
| """ |
| import json |
| import logging |
| from datetime import datetime |
| from pathlib import Path |
| from typing import Optional |
|
|
| from .fusion import SenatorProfile |
|
|
| log = logging.getLogger(__name__) |
|
|
|
|
| def generate_json_report( |
| profile: SenatorProfile, |
| output_path: Optional[str] = None, |
| ) -> dict: |
| """Generate a JSON report from a SenatorProfile.""" |
| report = { |
| "meta": { |
| "generated_at": datetime.utcnow().isoformat() + "Z", |
| "pipeline_version": "0.1.0", |
| "model_stack": { |
| "embeddings": "Qwen/Qwen3-Embedding-0.6B", |
| "sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest", |
| "emotion": "cardiffnlp/twitter-roberta-base-emotion", |
| "offensive": "cardiffnlp/twitter-roberta-base-offensive", |
| "irony": "cardiffnlp/twitter-roberta-base-irony", |
| "hate": "cardiffnlp/twitter-roberta-base-hate-multiclass-latest", |
| "toxicity": "s-nlp/roberta_toxicity_classifier", |
| }, |
| }, |
| "senator": { |
| "name": profile.senator_name, |
| "twitter_handle": profile.twitter_handle, |
| "party": profile.party, |
| "state": profile.state, |
| }, |
| "summary": { |
| "n_tweets_analyzed": profile.n_tweets_analyzed, |
| "date_range": profile.date_range, |
| "compulsion_score": profile.compulsion_score, |
| "virulence_score": profile.virulence_score, |
| "overall_risk_score": profile.overall_risk_score, |
| }, |
| "compulsion": { |
| "score": profile.compulsion_score, |
| "subscores": profile.compulsion_subscores, |
| }, |
| "virulence": { |
| "score": profile.virulence_score, |
| "subscores": profile.virulence_subscores, |
| "distribution": profile.virulence_distribution, |
| }, |
| "classification_detail": { |
| "sentiment_distribution": profile.sentiment_distribution, |
| "emotion_distribution": profile.emotion_distribution, |
| "toxicity_stats": profile.toxicity_stats, |
| }, |
| "top_rage_tweets": profile.top_rage_tweets, |
| "disclaimers": [ |
| "This analysis does not constitute a clinical diagnosis of addiction, " |
| "compulsion, or mental health condition.", |
| "Scores are derived from automated classifiers with known error rates " |
| "and should not be treated as ground truth.", |
| "Temporal analysis uses UTC timestamps which may not reflect the " |
| "poster's local timezone.", |
| "Classifier models were trained on general Twitter data, not " |
| "specifically on political speech.", |
| ], |
| } |
|
|
| if output_path: |
| p = Path(output_path) |
| p.parent.mkdir(parents=True, exist_ok=True) |
| with open(p, "w") as f: |
| json.dump(report, f, indent=2, default=str) |
| log.info("JSON report saved to %s", p) |
|
|
| return report |
|
|
|
|
| def generate_markdown_report( |
| profile: SenatorProfile, |
| output_path: Optional[str] = None, |
| ) -> str: |
| """Generate a Markdown report from a SenatorProfile.""" |
|
|
| lines = [] |
| lines.append(f"# X-Box Analysis: {profile.senator_name}") |
| lines.append(f"**@{profile.twitter_handle}** | {profile.party} | {profile.state}") |
| lines.append("") |
|
|
| |
| lines.append("## Summary") |
| lines.append(f"- **Tweets analyzed**: {profile.n_tweets_analyzed:,}") |
| lines.append(f"- **Date range**: {profile.date_range}") |
| lines.append(f"- **Compulsion score**: {profile.compulsion_score}/100") |
| lines.append(f"- **Virulence score**: {profile.virulence_score}/100") |
| lines.append(f"- **Overall risk score**: {profile.overall_risk_score}/100") |
| lines.append("") |
|
|
| |
| lines.append("## Compulsion-Like Behavior") |
| lines.append("| Dimension | Score |") |
| lines.append("| --- | ---: |") |
| for k, v in profile.compulsion_subscores.items(): |
| lines.append(f"| {k.replace('_', ' ').title()} | {v} |") |
| lines.append("") |
|
|
| |
| lines.append("## Virulence Analysis") |
| lines.append("| Dimension | Score |") |
| lines.append("| --- | ---: |") |
| for k, v in profile.virulence_subscores.items(): |
| lines.append(f"| {k.replace('_', ' ').title()} | {v} |") |
| lines.append("") |
|
|
| |
| if profile.sentiment_distribution: |
| lines.append("### Sentiment Distribution") |
| lines.append("| Label | Share |") |
| lines.append("| --- | ---: |") |
| for k, v in sorted(profile.sentiment_distribution.items()): |
| lines.append(f"| {k} | {v:.1%} |") |
| lines.append("") |
|
|
| if profile.emotion_distribution: |
| lines.append("### Emotion Distribution") |
| lines.append("| Emotion | Share |") |
| lines.append("| --- | ---: |") |
| for k, v in sorted(profile.emotion_distribution.items()): |
| lines.append(f"| {k} | {v:.1%} |") |
| lines.append("") |
|
|
| if profile.toxicity_stats: |
| lines.append("### Toxicity") |
| tox = profile.toxicity_stats |
| lines.append(f"- Mean toxicity score: {tox.get('mean', 0):.4f}") |
| lines.append(f"- % classified toxic: {tox.get('pct_toxic', 0):.2f}%") |
| lines.append(f"- P90 toxicity: {tox.get('p90', 0):.4f}") |
| lines.append("") |
|
|
| |
| if profile.top_rage_tweets: |
| lines.append("## Top Rage Events") |
| lines.append("| Date | Virulence | Outrage | Ad Hominem | Text |") |
| lines.append("| --- | ---: | ---: | ---: | --- |") |
| for evt in profile.top_rage_tweets[:10]: |
| date = str(evt.get("created_at", ""))[:10] |
| text = evt.get("text", "")[:80].replace("|", "\\|") |
| lines.append( |
| f"| {date} | {evt.get('composite_virulence', 0):.3f} " |
| f"| {evt.get('outrage_intensity', 0):.3f} " |
| f"| {evt.get('ad_hominem', 0):.3f} " |
| f"| {text}... |" |
| ) |
| lines.append("") |
|
|
| |
| lines.append("## Methodology") |
| lines.append("- **Embeddings**: Qwen/Qwen3-Embedding-0.6B (MTEB #1 under 1B params)") |
| lines.append("- **Sentiment**: cardiffnlp/twitter-roberta-base-sentiment-latest") |
| lines.append("- **Emotion**: cardiffnlp/twitter-roberta-base-emotion (anger/joy/optimism/sadness)") |
| lines.append("- **Offensive**: cardiffnlp/twitter-roberta-base-offensive") |
| lines.append("- **Irony**: cardiffnlp/twitter-roberta-base-irony") |
| lines.append("- **Hate speech**: cardiffnlp/twitter-roberta-base-hate-multiclass-latest") |
| lines.append("- **Toxicity**: s-nlp/roberta_toxicity_classifier") |
| lines.append("- **Behavioral**: Temporal/metadata features with sigmoid-scaled scoring") |
| lines.append("") |
|
|
| |
| lines.append("## Disclaimers") |
| lines.append("- This analysis does not constitute a clinical diagnosis.") |
| lines.append("- Classifier scores are probabilistic and subject to error.") |
| lines.append("- UTC timestamps may not reflect the poster's local timezone.") |
| lines.append("- Models trained on general Twitter data, not political speech specifically.") |
| lines.append("") |
|
|
| text = "\n".join(lines) |
|
|
| if output_path: |
| p = Path(output_path) |
| p.parent.mkdir(parents=True, exist_ok=True) |
| with open(p, "w") as f: |
| f.write(text) |
| log.info("Markdown report saved to %s", p) |
|
|
| return text |
|
|