Tender_Matcher / src /summarizer.py
Samson NIYIZURUGERO
code migration
dffabb7
#!/usr/bin/env python3
"""
src/summarizer.py — Match Explanation Generator
Generates ≤80-word explanations in EN or FR explaining why a tender matches a profile.
Uses template-based generation (CPU-only, no LLM dependency required).
"""
import random
# ─── English Templates ────────────────────────────────────────────────────────
EN_TEMPLATES = [
(
"{org_name} matches **{tender_title}** (score: {score:.2f}). "
"This {sector} grant from {tender_region} aligns with your operations in {country}. "
"The available funding of USD {budget:,} fits your budget range. "
"Deadline: {deadline}. "
"Sector overlap and {tfidf_pct}% content similarity drive this ranking."
),
(
"**{tender_title}** is ranked #{rank} for {org_name}. "
"Sector: {sector} ✓. Budget: USD {budget:,}. Deadline: {deadline}. "
"Your needs in {needs_snippet} closely match this tender's objectives. "
"Score breakdown — similarity: {tfidf_pct}%, sector: {sector_pct}%, budget: {budget_pct}%."
),
(
"This {sector} opportunity suits {org_name} because your profile in {country} aligns "
"with the tender's focus on {region_phrase}. "
"Budget of USD {budget:,} is within reach. Apply before {deadline}. "
"Composite match score: {score:.2f}/1.00."
),
]
# ─── French Templates ─────────────────────────────────────────────────────────
FR_TEMPLATES = [
(
"{org_name} correspond à **{tender_title}** (score : {score:.2f}). "
"Cette subvention {sector} en {tender_region} s'aligne avec vos activités en {country}. "
"Le financement disponible de USD {budget:,} correspond à votre capacité budgétaire. "
"Date limite : {deadline}. "
"La correspondance sectorielle et {tfidf_pct}% de similarité de contenu motivent ce classement."
),
(
"**{tender_title}** est classé #{rank} pour {org_name}. "
"Secteur : {sector} ✓. Budget : USD {budget:,}. Date limite : {deadline}. "
"Vos besoins en {needs_snippet} correspondent étroitement aux objectifs de cet appel. "
"Détail du score — similarité : {tfidf_pct}%, secteur : {sector_pct}%, budget : {budget_pct}%."
),
(
"Cette opportunité {sector} convient à {org_name} car votre profil en {country} s'aligne "
"avec l'appel ciblant {region_phrase}. "
"Le budget de USD {budget:,} est accessible. Déposez votre candidature avant le {deadline}. "
"Score composite : {score:.2f}/1.00."
),
]
SECTOR_PHRASES_EN = {
"agritech": "digital agriculture and farming innovation",
"healthtech": "health technology and community health services",
"cleantech": "clean and renewable energy solutions",
"edtech": "digital education and offline learning",
"fintech": "digital finance and financial inclusion",
"wastetech": "waste management and circular economy",
"general": "general development and innovation",
}
SECTOR_PHRASES_FR = {
"agritech": "l'agriculture numérique et l'innovation agricole",
"healthtech": "la technologie de santé et les services de santé communautaire",
"cleantech": "les solutions d'énergie propre et renouvelable",
"edtech": "l'éducation numérique et l'apprentissage hors-ligne",
"fintech": "la finance numérique et l'inclusion financière",
"wastetech": "la gestion des déchets et l'économie circulaire",
"general": "le développement général et l'innovation",
}
REGION_PHRASES_EN = {
"East Africa": "East African markets",
"West Africa": "West African communities",
"Central Africa": "Central African regions",
"Southern Africa": "Southern African areas",
"Africa": "pan-African initiatives",
}
REGION_PHRASES_FR = {
"East Africa": "les marchés d'Afrique de l'Est",
"West Africa": "les communautés d'Afrique de l'Ouest",
"Central Africa": "les régions d'Afrique Centrale",
"Southern Africa": "les zones d'Afrique Australe",
"Africa": "les initiatives panafricaines",
}
def _truncate_to_words(text: str, max_words: int = 80) -> str:
"""Truncate text to max_words, ending at a sentence boundary if possible."""
words = text.split()
if len(words) <= max_words:
return text
truncated = " ".join(words[:max_words])
# Try to end at last sentence
for punct in [".", "!", "?"]:
idx = truncated.rfind(punct)
if idx > len(truncated) // 2:
return truncated[:idx + 1]
return truncated + "..."
def generate_summary(
profile: dict,
tender: dict,
rank: int,
score: float,
breakdown: dict,
language: str = "en",
max_words: int = 80,
) -> str:
"""
Generate a ≤80-word explanation of why this tender matches the profile.
Args:
profile: business profile dict
tender: matched tender dict
rank: rank position (1–5)
score: composite match score (0–1)
breakdown: dict with tfidf_similarity, sector_match, budget_score, urgency_score
language: "en" or "fr"
max_words: word limit (default 80)
Returns:
Formatted explanation string
"""
lang = language if language in ["en", "fr"] else "en"
# Derived values
tfidf_pct = int(breakdown.get("tfidf_similarity", 0) * 100)
sector_pct = int(breakdown.get("sector_match", 0) * 100)
budget_pct = int(breakdown.get("budget_score", 0) * 100)
urgency_pct = int(breakdown.get("urgency_score", 0) * 100)
sector = tender.get("sector", "general")
region = tender.get("region", "Africa")
needs_text = profile.get("needs_text", "")
needs_snippet = " ".join(needs_text.split()[:6]) + "..." if needs_text else "various areas"
if lang == "fr":
templates = FR_TEMPLATES
region_phrase = REGION_PHRASES_FR.get(region, "les régions africaines")
else:
templates = EN_TEMPLATES
region_phrase = REGION_PHRASES_EN.get(region, "African regions")
template = templates[rank % len(templates)]
summary = template.format(
org_name=profile.get("name", "Your organization"),
tender_title=tender.get("title", "This Tender"),
score=score,
sector=sector,
country=profile.get("country", "your country"),
budget=tender.get("budget", 0),
deadline=tender.get("deadline", "TBD"),
tfidf_pct=tfidf_pct,
sector_pct=sector_pct,
budget_pct=budget_pct,
urgency_pct=urgency_pct,
rank=rank,
needs_snippet=needs_snippet,
tender_region=region,
region_phrase=region_phrase,
)
return _truncate_to_words(summary, max_words)
def generate_summary_md(
profile: dict,
matches: list,
language: str = "en",
) -> str:
"""
Generate a complete markdown summary file for all matches of a profile.
Args:
profile: business profile dict
matches: list of ranked tender dicts (from ranker.rank())
language: "en" or "fr"
Returns:
Full markdown string
"""
lang = language if language in ["en", "fr"] else "en"
lines = []
if lang == "fr":
lines.append(f"# Correspondances de Subventions — {profile.get('name', 'Profil')}")
lines.append(f"\n**Profil :** {profile.get('name')} | **Secteur :** {profile.get('sector')} | **Pays :** {profile.get('country')}")
lines.append(f"\n**Besoins :** {profile.get('needs_text', '')}\n")
lines.append("---\n")
lines.append("## Top 5 Appels à Candidatures\n")
else:
lines.append(f"# Grant Matches — {profile.get('name', 'Profile')}")
lines.append(f"\n**Profile:** {profile.get('name')} | **Sector:** {profile.get('sector')} | **Country:** {profile.get('country')}")
lines.append(f"\n**Needs:** {profile.get('needs_text', '')}\n")
lines.append("---\n")
lines.append("## Top 5 Matched Tenders\n")
for rank, match in enumerate(matches, 1):
score = match["score"]
breakdown = match["breakdown"]
summary = generate_summary(
profile=profile,
tender=match,
rank=rank,
score=score,
breakdown=breakdown,
language=lang,
)
if lang == "fr":
lines.append(f"### #{rank}{match['title']}")
lines.append(f"**ID :** {match['tender_id']} | **Score :** {score:.4f} | **Langue :** {match['language'].upper()}")
lines.append(f"\n**Explication :**\n{summary}\n")
lines.append(f"**Détail du score :**")
lines.append(f"- Similarité TF-IDF : {breakdown['tfidf_similarity']:.3f}")
lines.append(f"- Correspondance sectorielle : {breakdown['sector_match']:.3f}")
lines.append(f"- Compatibilité budgétaire : {breakdown['budget_score']:.3f}")
lines.append(f"- Urgence deadline : {breakdown['urgency_score']:.3f}\n")
else:
lines.append(f"### #{rank}{match['title']}")
lines.append(f"**ID:** {match['tender_id']} | **Score:** {score:.4f} | **Language:** {match['language'].upper()}")
lines.append(f"\n**Explanation:**\n{summary}\n")
lines.append(f"**Score Breakdown:**")
lines.append(f"- TF-IDF Similarity: {breakdown['tfidf_similarity']:.3f}")
lines.append(f"- Sector Match: {breakdown['sector_match']:.3f}")
lines.append(f"- Budget Compatibility: {breakdown['budget_score']:.3f}")
lines.append(f"- Deadline Urgency: {breakdown['urgency_score']:.3f}\n")
lines.append("---\n")
return "\n".join(lines)
def generate_individual_summary_md(
profile: dict,
match: dict,
rank: int,
language: str = "en",
disqualifier: str = "",
) -> str:
"""
Generate a single .md file for one (profile, tender) match pair.
Spec requires one .md per (profile, tender) match in summaries/.
Args:
profile: business profile dict
match: single ranked tender dict (from ranker.rank())
rank: rank position (1-based)
language: "en" or "fr"
disqualifier: pre-computed top disqualifier string
Returns:
Markdown string for this individual match
"""
from src.utils import format_budget
lang = language if language in ["en", "fr"] else "en"
score = match["score"]
breakdown = match["breakdown"]
tid = match["tender_id"]
summary_text = generate_summary(
profile=profile,
tender=match,
rank=rank,
score=score,
breakdown=breakdown,
language=lang,
)
budget_str = format_budget(match.get("budget", 0))
disq = disqualifier or "No major disqualifier identified."
if lang == "fr":
return (
f"# {match['title']}\n"
f"**Profil :** {profile.get('name')} | **ID :** {profile.get('id')} "
f"| **Langue :** {lang.upper()}\n\n"
"---\n\n"
f"## Résumé de Correspondance (#{rank})\n\n"
f"{summary_text}\n\n"
"---\n\n"
"## Détails\n\n"
"| Champ | Valeur |\n|-------|--------|\n"
f"| ID Appel | {tid} |\n"
f"| Score Composite | {score:.4f} |\n"
f"| Secteur | {match['sector']} |\n"
f"| Budget | {budget_str} |\n"
f"| Date Limite | {match['deadline']} |\n"
f"| Région | {match['region']} |\n"
f"| Langue du Document | {match['language'].upper()} |\n\n"
"## Détail du Score\n\n"
"| Composant | Score |\n|-----------|-------|\n"
f"| Similarité TF-IDF | {breakdown['tfidf_similarity']:.3f} |\n"
f"| Correspondance Sectorielle | {breakdown['sector_match']:.3f} |\n"
f"| Compatibilité Budgétaire | {breakdown['budget_score']:.3f} |\n"
f"| Urgence Deadline | {breakdown['urgency_score']:.3f} |\n\n"
f"## ⚠ Principal Facteur Disqualifiant\n\n{disq}\n"
)
else:
return (
f"# {match['title']}\n"
f"**Profile:** {profile.get('name')} | **ID:** {profile.get('id')} "
f"| **Language:** {lang.upper()}\n\n"
"---\n\n"
f"## Match Summary (#{rank})\n\n"
f"{summary_text}\n\n"
"---\n\n"
"## Details\n\n"
"| Field | Value |\n|-------|-------|\n"
f"| Tender ID | {tid} |\n"
f"| Composite Score | {score:.4f} |\n"
f"| Sector | {match['sector']} |\n"
f"| Budget | {budget_str} |\n"
f"| Deadline | {match['deadline']} |\n"
f"| Region | {match['region']} |\n"
f"| Document Language | {match['language'].upper()} |\n\n"
"## Score Breakdown\n\n"
"| Component | Score |\n|-----------|-------|\n"
f"| TF-IDF Similarity | {breakdown['tfidf_similarity']:.3f} |\n"
f"| Sector Match | {breakdown['sector_match']:.3f} |\n"
f"| Budget Compatibility | {breakdown['budget_score']:.3f} |\n"
f"| Deadline Urgency | {breakdown['urgency_score']:.3f} |\n\n"
f"## ⚠ Biggest Disqualifier\n\n{disq}\n"
)
if __name__ == "__main__":
# Quick test
profile = {
"id": "01", "name": "AgriGrow Rwanda", "sector": "agritech",
"country": "Rwanda", "budget_max": 50000,
"needs_text": "We need funding to scale our precision farming app.",
"languages": ["en"]
}
tender = {
"id": "T004", "title": "Digital Agriculture Innovation Grant",
"sector": "agritech", "budget": 50000, "deadline": "15 August 2025",
"region": "East Africa", "language": "en"
}
breakdown = {"tfidf_similarity": 0.45, "sector_match": 1.0, "budget_score": 1.0, "urgency_score": 0.65}
print("=== EN Summary ===")
print(generate_summary(profile, tender, 1, 0.78, breakdown, "en"))
print("\n=== FR Summary ===")
print(generate_summary(profile, tender, 1, 0.78, breakdown, "fr"))