#!/usr/bin/env python3 """ src/summarizer.py — Match Explanation Generator Generates ≤80-word explanations in EN or FR explaining why a tender matches a profile. Uses template-based generation (CPU-only, no LLM dependency required). """ import random # ─── English Templates ──────────────────────────────────────────────────────── EN_TEMPLATES = [ ( "{org_name} matches **{tender_title}** (score: {score:.2f}). " "This {sector} grant from {tender_region} aligns with your operations in {country}. " "The available funding of USD {budget:,} fits your budget range. " "Deadline: {deadline}. " "Sector overlap and {tfidf_pct}% content similarity drive this ranking." ), ( "**{tender_title}** is ranked #{rank} for {org_name}. " "Sector: {sector} ✓. Budget: USD {budget:,}. Deadline: {deadline}. " "Your needs in {needs_snippet} closely match this tender's objectives. " "Score breakdown — similarity: {tfidf_pct}%, sector: {sector_pct}%, budget: {budget_pct}%." ), ( "This {sector} opportunity suits {org_name} because your profile in {country} aligns " "with the tender's focus on {region_phrase}. " "Budget of USD {budget:,} is within reach. Apply before {deadline}. " "Composite match score: {score:.2f}/1.00." ), ] # ─── French Templates ───────────────────────────────────────────────────────── FR_TEMPLATES = [ ( "{org_name} correspond à **{tender_title}** (score : {score:.2f}). " "Cette subvention {sector} en {tender_region} s'aligne avec vos activités en {country}. " "Le financement disponible de USD {budget:,} correspond à votre capacité budgétaire. " "Date limite : {deadline}. " "La correspondance sectorielle et {tfidf_pct}% de similarité de contenu motivent ce classement." ), ( "**{tender_title}** est classé #{rank} pour {org_name}. " "Secteur : {sector} ✓. Budget : USD {budget:,}. Date limite : {deadline}. " "Vos besoins en {needs_snippet} correspondent étroitement aux objectifs de cet appel. " "Détail du score — similarité : {tfidf_pct}%, secteur : {sector_pct}%, budget : {budget_pct}%." ), ( "Cette opportunité {sector} convient à {org_name} car votre profil en {country} s'aligne " "avec l'appel ciblant {region_phrase}. " "Le budget de USD {budget:,} est accessible. Déposez votre candidature avant le {deadline}. " "Score composite : {score:.2f}/1.00." ), ] SECTOR_PHRASES_EN = { "agritech": "digital agriculture and farming innovation", "healthtech": "health technology and community health services", "cleantech": "clean and renewable energy solutions", "edtech": "digital education and offline learning", "fintech": "digital finance and financial inclusion", "wastetech": "waste management and circular economy", "general": "general development and innovation", } SECTOR_PHRASES_FR = { "agritech": "l'agriculture numérique et l'innovation agricole", "healthtech": "la technologie de santé et les services de santé communautaire", "cleantech": "les solutions d'énergie propre et renouvelable", "edtech": "l'éducation numérique et l'apprentissage hors-ligne", "fintech": "la finance numérique et l'inclusion financière", "wastetech": "la gestion des déchets et l'économie circulaire", "general": "le développement général et l'innovation", } REGION_PHRASES_EN = { "East Africa": "East African markets", "West Africa": "West African communities", "Central Africa": "Central African regions", "Southern Africa": "Southern African areas", "Africa": "pan-African initiatives", } REGION_PHRASES_FR = { "East Africa": "les marchés d'Afrique de l'Est", "West Africa": "les communautés d'Afrique de l'Ouest", "Central Africa": "les régions d'Afrique Centrale", "Southern Africa": "les zones d'Afrique Australe", "Africa": "les initiatives panafricaines", } def _truncate_to_words(text: str, max_words: int = 80) -> str: """Truncate text to max_words, ending at a sentence boundary if possible.""" words = text.split() if len(words) <= max_words: return text truncated = " ".join(words[:max_words]) # Try to end at last sentence for punct in [".", "!", "?"]: idx = truncated.rfind(punct) if idx > len(truncated) // 2: return truncated[:idx + 1] return truncated + "..." def generate_summary( profile: dict, tender: dict, rank: int, score: float, breakdown: dict, language: str = "en", max_words: int = 80, ) -> str: """ Generate a ≤80-word explanation of why this tender matches the profile. Args: profile: business profile dict tender: matched tender dict rank: rank position (1–5) score: composite match score (0–1) breakdown: dict with tfidf_similarity, sector_match, budget_score, urgency_score language: "en" or "fr" max_words: word limit (default 80) Returns: Formatted explanation string """ lang = language if language in ["en", "fr"] else "en" # Derived values tfidf_pct = int(breakdown.get("tfidf_similarity", 0) * 100) sector_pct = int(breakdown.get("sector_match", 0) * 100) budget_pct = int(breakdown.get("budget_score", 0) * 100) urgency_pct = int(breakdown.get("urgency_score", 0) * 100) sector = tender.get("sector", "general") region = tender.get("region", "Africa") needs_text = profile.get("needs_text", "") needs_snippet = " ".join(needs_text.split()[:6]) + "..." if needs_text else "various areas" if lang == "fr": templates = FR_TEMPLATES region_phrase = REGION_PHRASES_FR.get(region, "les régions africaines") else: templates = EN_TEMPLATES region_phrase = REGION_PHRASES_EN.get(region, "African regions") template = templates[rank % len(templates)] summary = template.format( org_name=profile.get("name", "Your organization"), tender_title=tender.get("title", "This Tender"), score=score, sector=sector, country=profile.get("country", "your country"), budget=tender.get("budget", 0), deadline=tender.get("deadline", "TBD"), tfidf_pct=tfidf_pct, sector_pct=sector_pct, budget_pct=budget_pct, urgency_pct=urgency_pct, rank=rank, needs_snippet=needs_snippet, tender_region=region, region_phrase=region_phrase, ) return _truncate_to_words(summary, max_words) def generate_summary_md( profile: dict, matches: list, language: str = "en", ) -> str: """ Generate a complete markdown summary file for all matches of a profile. Args: profile: business profile dict matches: list of ranked tender dicts (from ranker.rank()) language: "en" or "fr" Returns: Full markdown string """ lang = language if language in ["en", "fr"] else "en" lines = [] if lang == "fr": lines.append(f"# Correspondances de Subventions — {profile.get('name', 'Profil')}") lines.append(f"\n**Profil :** {profile.get('name')} | **Secteur :** {profile.get('sector')} | **Pays :** {profile.get('country')}") lines.append(f"\n**Besoins :** {profile.get('needs_text', '')}\n") lines.append("---\n") lines.append("## Top 5 Appels à Candidatures\n") else: lines.append(f"# Grant Matches — {profile.get('name', 'Profile')}") lines.append(f"\n**Profile:** {profile.get('name')} | **Sector:** {profile.get('sector')} | **Country:** {profile.get('country')}") lines.append(f"\n**Needs:** {profile.get('needs_text', '')}\n") lines.append("---\n") lines.append("## Top 5 Matched Tenders\n") for rank, match in enumerate(matches, 1): score = match["score"] breakdown = match["breakdown"] summary = generate_summary( profile=profile, tender=match, rank=rank, score=score, breakdown=breakdown, language=lang, ) if lang == "fr": lines.append(f"### #{rank} — {match['title']}") lines.append(f"**ID :** {match['tender_id']} | **Score :** {score:.4f} | **Langue :** {match['language'].upper()}") lines.append(f"\n**Explication :**\n{summary}\n") lines.append(f"**Détail du score :**") lines.append(f"- Similarité TF-IDF : {breakdown['tfidf_similarity']:.3f}") lines.append(f"- Correspondance sectorielle : {breakdown['sector_match']:.3f}") lines.append(f"- Compatibilité budgétaire : {breakdown['budget_score']:.3f}") lines.append(f"- Urgence deadline : {breakdown['urgency_score']:.3f}\n") else: lines.append(f"### #{rank} — {match['title']}") lines.append(f"**ID:** {match['tender_id']} | **Score:** {score:.4f} | **Language:** {match['language'].upper()}") lines.append(f"\n**Explanation:**\n{summary}\n") lines.append(f"**Score Breakdown:**") lines.append(f"- TF-IDF Similarity: {breakdown['tfidf_similarity']:.3f}") lines.append(f"- Sector Match: {breakdown['sector_match']:.3f}") lines.append(f"- Budget Compatibility: {breakdown['budget_score']:.3f}") lines.append(f"- Deadline Urgency: {breakdown['urgency_score']:.3f}\n") lines.append("---\n") return "\n".join(lines) def generate_individual_summary_md( profile: dict, match: dict, rank: int, language: str = "en", disqualifier: str = "", ) -> str: """ Generate a single .md file for one (profile, tender) match pair. Spec requires one .md per (profile, tender) match in summaries/. Args: profile: business profile dict match: single ranked tender dict (from ranker.rank()) rank: rank position (1-based) language: "en" or "fr" disqualifier: pre-computed top disqualifier string Returns: Markdown string for this individual match """ from src.utils import format_budget lang = language if language in ["en", "fr"] else "en" score = match["score"] breakdown = match["breakdown"] tid = match["tender_id"] summary_text = generate_summary( profile=profile, tender=match, rank=rank, score=score, breakdown=breakdown, language=lang, ) budget_str = format_budget(match.get("budget", 0)) disq = disqualifier or "No major disqualifier identified." if lang == "fr": return ( f"# {match['title']}\n" f"**Profil :** {profile.get('name')} | **ID :** {profile.get('id')} " f"| **Langue :** {lang.upper()}\n\n" "---\n\n" f"## Résumé de Correspondance (#{rank})\n\n" f"{summary_text}\n\n" "---\n\n" "## Détails\n\n" "| Champ | Valeur |\n|-------|--------|\n" f"| ID Appel | {tid} |\n" f"| Score Composite | {score:.4f} |\n" f"| Secteur | {match['sector']} |\n" f"| Budget | {budget_str} |\n" f"| Date Limite | {match['deadline']} |\n" f"| Région | {match['region']} |\n" f"| Langue du Document | {match['language'].upper()} |\n\n" "## Détail du Score\n\n" "| Composant | Score |\n|-----------|-------|\n" f"| Similarité TF-IDF | {breakdown['tfidf_similarity']:.3f} |\n" f"| Correspondance Sectorielle | {breakdown['sector_match']:.3f} |\n" f"| Compatibilité Budgétaire | {breakdown['budget_score']:.3f} |\n" f"| Urgence Deadline | {breakdown['urgency_score']:.3f} |\n\n" f"## ⚠ Principal Facteur Disqualifiant\n\n{disq}\n" ) else: return ( f"# {match['title']}\n" f"**Profile:** {profile.get('name')} | **ID:** {profile.get('id')} " f"| **Language:** {lang.upper()}\n\n" "---\n\n" f"## Match Summary (#{rank})\n\n" f"{summary_text}\n\n" "---\n\n" "## Details\n\n" "| Field | Value |\n|-------|-------|\n" f"| Tender ID | {tid} |\n" f"| Composite Score | {score:.4f} |\n" f"| Sector | {match['sector']} |\n" f"| Budget | {budget_str} |\n" f"| Deadline | {match['deadline']} |\n" f"| Region | {match['region']} |\n" f"| Document Language | {match['language'].upper()} |\n\n" "## Score Breakdown\n\n" "| Component | Score |\n|-----------|-------|\n" f"| TF-IDF Similarity | {breakdown['tfidf_similarity']:.3f} |\n" f"| Sector Match | {breakdown['sector_match']:.3f} |\n" f"| Budget Compatibility | {breakdown['budget_score']:.3f} |\n" f"| Deadline Urgency | {breakdown['urgency_score']:.3f} |\n\n" f"## ⚠ Biggest Disqualifier\n\n{disq}\n" ) if __name__ == "__main__": # Quick test profile = { "id": "01", "name": "AgriGrow Rwanda", "sector": "agritech", "country": "Rwanda", "budget_max": 50000, "needs_text": "We need funding to scale our precision farming app.", "languages": ["en"] } tender = { "id": "T004", "title": "Digital Agriculture Innovation Grant", "sector": "agritech", "budget": 50000, "deadline": "15 August 2025", "region": "East Africa", "language": "en" } breakdown = {"tfidf_similarity": 0.45, "sector_match": 1.0, "budget_score": 1.0, "urgency_score": 0.65} print("=== EN Summary ===") print(generate_summary(profile, tender, 1, 0.78, breakdown, "en")) print("\n=== FR Summary ===") print(generate_summary(profile, tender, 1, 0.78, breakdown, "fr"))