Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| src/summarizer.py — Match Explanation Generator | |
| Generates ≤80-word explanations in EN or FR explaining why a tender matches a profile. | |
| Uses template-based generation (CPU-only, no LLM dependency required). | |
| """ | |
| import random | |
| # ─── English Templates ──────────────────────────────────────────────────────── | |
| EN_TEMPLATES = [ | |
| ( | |
| "{org_name} matches **{tender_title}** (score: {score:.2f}). " | |
| "This {sector} grant from {tender_region} aligns with your operations in {country}. " | |
| "The available funding of USD {budget:,} fits your budget range. " | |
| "Deadline: {deadline}. " | |
| "Sector overlap and {tfidf_pct}% content similarity drive this ranking." | |
| ), | |
| ( | |
| "**{tender_title}** is ranked #{rank} for {org_name}. " | |
| "Sector: {sector} ✓. Budget: USD {budget:,}. Deadline: {deadline}. " | |
| "Your needs in {needs_snippet} closely match this tender's objectives. " | |
| "Score breakdown — similarity: {tfidf_pct}%, sector: {sector_pct}%, budget: {budget_pct}%." | |
| ), | |
| ( | |
| "This {sector} opportunity suits {org_name} because your profile in {country} aligns " | |
| "with the tender's focus on {region_phrase}. " | |
| "Budget of USD {budget:,} is within reach. Apply before {deadline}. " | |
| "Composite match score: {score:.2f}/1.00." | |
| ), | |
| ] | |
| # ─── French Templates ───────────────────────────────────────────────────────── | |
| FR_TEMPLATES = [ | |
| ( | |
| "{org_name} correspond à **{tender_title}** (score : {score:.2f}). " | |
| "Cette subvention {sector} en {tender_region} s'aligne avec vos activités en {country}. " | |
| "Le financement disponible de USD {budget:,} correspond à votre capacité budgétaire. " | |
| "Date limite : {deadline}. " | |
| "La correspondance sectorielle et {tfidf_pct}% de similarité de contenu motivent ce classement." | |
| ), | |
| ( | |
| "**{tender_title}** est classé #{rank} pour {org_name}. " | |
| "Secteur : {sector} ✓. Budget : USD {budget:,}. Date limite : {deadline}. " | |
| "Vos besoins en {needs_snippet} correspondent étroitement aux objectifs de cet appel. " | |
| "Détail du score — similarité : {tfidf_pct}%, secteur : {sector_pct}%, budget : {budget_pct}%." | |
| ), | |
| ( | |
| "Cette opportunité {sector} convient à {org_name} car votre profil en {country} s'aligne " | |
| "avec l'appel ciblant {region_phrase}. " | |
| "Le budget de USD {budget:,} est accessible. Déposez votre candidature avant le {deadline}. " | |
| "Score composite : {score:.2f}/1.00." | |
| ), | |
| ] | |
| SECTOR_PHRASES_EN = { | |
| "agritech": "digital agriculture and farming innovation", | |
| "healthtech": "health technology and community health services", | |
| "cleantech": "clean and renewable energy solutions", | |
| "edtech": "digital education and offline learning", | |
| "fintech": "digital finance and financial inclusion", | |
| "wastetech": "waste management and circular economy", | |
| "general": "general development and innovation", | |
| } | |
| SECTOR_PHRASES_FR = { | |
| "agritech": "l'agriculture numérique et l'innovation agricole", | |
| "healthtech": "la technologie de santé et les services de santé communautaire", | |
| "cleantech": "les solutions d'énergie propre et renouvelable", | |
| "edtech": "l'éducation numérique et l'apprentissage hors-ligne", | |
| "fintech": "la finance numérique et l'inclusion financière", | |
| "wastetech": "la gestion des déchets et l'économie circulaire", | |
| "general": "le développement général et l'innovation", | |
| } | |
| REGION_PHRASES_EN = { | |
| "East Africa": "East African markets", | |
| "West Africa": "West African communities", | |
| "Central Africa": "Central African regions", | |
| "Southern Africa": "Southern African areas", | |
| "Africa": "pan-African initiatives", | |
| } | |
| REGION_PHRASES_FR = { | |
| "East Africa": "les marchés d'Afrique de l'Est", | |
| "West Africa": "les communautés d'Afrique de l'Ouest", | |
| "Central Africa": "les régions d'Afrique Centrale", | |
| "Southern Africa": "les zones d'Afrique Australe", | |
| "Africa": "les initiatives panafricaines", | |
| } | |
| def _truncate_to_words(text: str, max_words: int = 80) -> str: | |
| """Truncate text to max_words, ending at a sentence boundary if possible.""" | |
| words = text.split() | |
| if len(words) <= max_words: | |
| return text | |
| truncated = " ".join(words[:max_words]) | |
| # Try to end at last sentence | |
| for punct in [".", "!", "?"]: | |
| idx = truncated.rfind(punct) | |
| if idx > len(truncated) // 2: | |
| return truncated[:idx + 1] | |
| return truncated + "..." | |
| def generate_summary( | |
| profile: dict, | |
| tender: dict, | |
| rank: int, | |
| score: float, | |
| breakdown: dict, | |
| language: str = "en", | |
| max_words: int = 80, | |
| ) -> str: | |
| """ | |
| Generate a ≤80-word explanation of why this tender matches the profile. | |
| Args: | |
| profile: business profile dict | |
| tender: matched tender dict | |
| rank: rank position (1–5) | |
| score: composite match score (0–1) | |
| breakdown: dict with tfidf_similarity, sector_match, budget_score, urgency_score | |
| language: "en" or "fr" | |
| max_words: word limit (default 80) | |
| Returns: | |
| Formatted explanation string | |
| """ | |
| lang = language if language in ["en", "fr"] else "en" | |
| # Derived values | |
| tfidf_pct = int(breakdown.get("tfidf_similarity", 0) * 100) | |
| sector_pct = int(breakdown.get("sector_match", 0) * 100) | |
| budget_pct = int(breakdown.get("budget_score", 0) * 100) | |
| urgency_pct = int(breakdown.get("urgency_score", 0) * 100) | |
| sector = tender.get("sector", "general") | |
| region = tender.get("region", "Africa") | |
| needs_text = profile.get("needs_text", "") | |
| needs_snippet = " ".join(needs_text.split()[:6]) + "..." if needs_text else "various areas" | |
| if lang == "fr": | |
| templates = FR_TEMPLATES | |
| region_phrase = REGION_PHRASES_FR.get(region, "les régions africaines") | |
| else: | |
| templates = EN_TEMPLATES | |
| region_phrase = REGION_PHRASES_EN.get(region, "African regions") | |
| template = templates[rank % len(templates)] | |
| summary = template.format( | |
| org_name=profile.get("name", "Your organization"), | |
| tender_title=tender.get("title", "This Tender"), | |
| score=score, | |
| sector=sector, | |
| country=profile.get("country", "your country"), | |
| budget=tender.get("budget", 0), | |
| deadline=tender.get("deadline", "TBD"), | |
| tfidf_pct=tfidf_pct, | |
| sector_pct=sector_pct, | |
| budget_pct=budget_pct, | |
| urgency_pct=urgency_pct, | |
| rank=rank, | |
| needs_snippet=needs_snippet, | |
| tender_region=region, | |
| region_phrase=region_phrase, | |
| ) | |
| return _truncate_to_words(summary, max_words) | |
| def generate_summary_md( | |
| profile: dict, | |
| matches: list, | |
| language: str = "en", | |
| ) -> str: | |
| """ | |
| Generate a complete markdown summary file for all matches of a profile. | |
| Args: | |
| profile: business profile dict | |
| matches: list of ranked tender dicts (from ranker.rank()) | |
| language: "en" or "fr" | |
| Returns: | |
| Full markdown string | |
| """ | |
| lang = language if language in ["en", "fr"] else "en" | |
| lines = [] | |
| if lang == "fr": | |
| lines.append(f"# Correspondances de Subventions — {profile.get('name', 'Profil')}") | |
| lines.append(f"\n**Profil :** {profile.get('name')} | **Secteur :** {profile.get('sector')} | **Pays :** {profile.get('country')}") | |
| lines.append(f"\n**Besoins :** {profile.get('needs_text', '')}\n") | |
| lines.append("---\n") | |
| lines.append("## Top 5 Appels à Candidatures\n") | |
| else: | |
| lines.append(f"# Grant Matches — {profile.get('name', 'Profile')}") | |
| lines.append(f"\n**Profile:** {profile.get('name')} | **Sector:** {profile.get('sector')} | **Country:** {profile.get('country')}") | |
| lines.append(f"\n**Needs:** {profile.get('needs_text', '')}\n") | |
| lines.append("---\n") | |
| lines.append("## Top 5 Matched Tenders\n") | |
| for rank, match in enumerate(matches, 1): | |
| score = match["score"] | |
| breakdown = match["breakdown"] | |
| summary = generate_summary( | |
| profile=profile, | |
| tender=match, | |
| rank=rank, | |
| score=score, | |
| breakdown=breakdown, | |
| language=lang, | |
| ) | |
| if lang == "fr": | |
| lines.append(f"### #{rank} — {match['title']}") | |
| lines.append(f"**ID :** {match['tender_id']} | **Score :** {score:.4f} | **Langue :** {match['language'].upper()}") | |
| lines.append(f"\n**Explication :**\n{summary}\n") | |
| lines.append(f"**Détail du score :**") | |
| lines.append(f"- Similarité TF-IDF : {breakdown['tfidf_similarity']:.3f}") | |
| lines.append(f"- Correspondance sectorielle : {breakdown['sector_match']:.3f}") | |
| lines.append(f"- Compatibilité budgétaire : {breakdown['budget_score']:.3f}") | |
| lines.append(f"- Urgence deadline : {breakdown['urgency_score']:.3f}\n") | |
| else: | |
| lines.append(f"### #{rank} — {match['title']}") | |
| lines.append(f"**ID:** {match['tender_id']} | **Score:** {score:.4f} | **Language:** {match['language'].upper()}") | |
| lines.append(f"\n**Explanation:**\n{summary}\n") | |
| lines.append(f"**Score Breakdown:**") | |
| lines.append(f"- TF-IDF Similarity: {breakdown['tfidf_similarity']:.3f}") | |
| lines.append(f"- Sector Match: {breakdown['sector_match']:.3f}") | |
| lines.append(f"- Budget Compatibility: {breakdown['budget_score']:.3f}") | |
| lines.append(f"- Deadline Urgency: {breakdown['urgency_score']:.3f}\n") | |
| lines.append("---\n") | |
| return "\n".join(lines) | |
| def generate_individual_summary_md( | |
| profile: dict, | |
| match: dict, | |
| rank: int, | |
| language: str = "en", | |
| disqualifier: str = "", | |
| ) -> str: | |
| """ | |
| Generate a single .md file for one (profile, tender) match pair. | |
| Spec requires one .md per (profile, tender) match in summaries/. | |
| Args: | |
| profile: business profile dict | |
| match: single ranked tender dict (from ranker.rank()) | |
| rank: rank position (1-based) | |
| language: "en" or "fr" | |
| disqualifier: pre-computed top disqualifier string | |
| Returns: | |
| Markdown string for this individual match | |
| """ | |
| from src.utils import format_budget | |
| lang = language if language in ["en", "fr"] else "en" | |
| score = match["score"] | |
| breakdown = match["breakdown"] | |
| tid = match["tender_id"] | |
| summary_text = generate_summary( | |
| profile=profile, | |
| tender=match, | |
| rank=rank, | |
| score=score, | |
| breakdown=breakdown, | |
| language=lang, | |
| ) | |
| budget_str = format_budget(match.get("budget", 0)) | |
| disq = disqualifier or "No major disqualifier identified." | |
| if lang == "fr": | |
| return ( | |
| f"# {match['title']}\n" | |
| f"**Profil :** {profile.get('name')} | **ID :** {profile.get('id')} " | |
| f"| **Langue :** {lang.upper()}\n\n" | |
| "---\n\n" | |
| f"## Résumé de Correspondance (#{rank})\n\n" | |
| f"{summary_text}\n\n" | |
| "---\n\n" | |
| "## Détails\n\n" | |
| "| Champ | Valeur |\n|-------|--------|\n" | |
| f"| ID Appel | {tid} |\n" | |
| f"| Score Composite | {score:.4f} |\n" | |
| f"| Secteur | {match['sector']} |\n" | |
| f"| Budget | {budget_str} |\n" | |
| f"| Date Limite | {match['deadline']} |\n" | |
| f"| Région | {match['region']} |\n" | |
| f"| Langue du Document | {match['language'].upper()} |\n\n" | |
| "## Détail du Score\n\n" | |
| "| Composant | Score |\n|-----------|-------|\n" | |
| f"| Similarité TF-IDF | {breakdown['tfidf_similarity']:.3f} |\n" | |
| f"| Correspondance Sectorielle | {breakdown['sector_match']:.3f} |\n" | |
| f"| Compatibilité Budgétaire | {breakdown['budget_score']:.3f} |\n" | |
| f"| Urgence Deadline | {breakdown['urgency_score']:.3f} |\n\n" | |
| f"## ⚠ Principal Facteur Disqualifiant\n\n{disq}\n" | |
| ) | |
| else: | |
| return ( | |
| f"# {match['title']}\n" | |
| f"**Profile:** {profile.get('name')} | **ID:** {profile.get('id')} " | |
| f"| **Language:** {lang.upper()}\n\n" | |
| "---\n\n" | |
| f"## Match Summary (#{rank})\n\n" | |
| f"{summary_text}\n\n" | |
| "---\n\n" | |
| "## Details\n\n" | |
| "| Field | Value |\n|-------|-------|\n" | |
| f"| Tender ID | {tid} |\n" | |
| f"| Composite Score | {score:.4f} |\n" | |
| f"| Sector | {match['sector']} |\n" | |
| f"| Budget | {budget_str} |\n" | |
| f"| Deadline | {match['deadline']} |\n" | |
| f"| Region | {match['region']} |\n" | |
| f"| Document Language | {match['language'].upper()} |\n\n" | |
| "## Score Breakdown\n\n" | |
| "| Component | Score |\n|-----------|-------|\n" | |
| f"| TF-IDF Similarity | {breakdown['tfidf_similarity']:.3f} |\n" | |
| f"| Sector Match | {breakdown['sector_match']:.3f} |\n" | |
| f"| Budget Compatibility | {breakdown['budget_score']:.3f} |\n" | |
| f"| Deadline Urgency | {breakdown['urgency_score']:.3f} |\n\n" | |
| f"## ⚠ Biggest Disqualifier\n\n{disq}\n" | |
| ) | |
| if __name__ == "__main__": | |
| # Quick test | |
| profile = { | |
| "id": "01", "name": "AgriGrow Rwanda", "sector": "agritech", | |
| "country": "Rwanda", "budget_max": 50000, | |
| "needs_text": "We need funding to scale our precision farming app.", | |
| "languages": ["en"] | |
| } | |
| tender = { | |
| "id": "T004", "title": "Digital Agriculture Innovation Grant", | |
| "sector": "agritech", "budget": 50000, "deadline": "15 August 2025", | |
| "region": "East Africa", "language": "en" | |
| } | |
| breakdown = {"tfidf_similarity": 0.45, "sector_match": 1.0, "budget_score": 1.0, "urgency_score": 0.65} | |
| print("=== EN Summary ===") | |
| print(generate_summary(profile, tender, 1, 0.78, breakdown, "en")) | |
| print("\n=== FR Summary ===") | |
| print(generate_summary(profile, tender, 1, 0.78, breakdown, "fr")) | |