Spaces:

NSamson1
/

Tender_Matcher

Sleeping

Samson NIYIZURUGERO

Update app.py

8d6d2a9 unverified about 1 month ago

24.4 kB

	#!/usr/bin/env python3
	"""
	app.py — Streamlit UI for CPI Tender Matcher
	Enhanced with:
	- Agentic resoning for intelligent match explanations
	- Custom profile input: manual form OR PDF upload
	- Existing pre-built profile selection preserved

	Deploy on Streamlit Community Cloud: https://streamlit.io/cloud
	Run locally: streamlit run app.py
	"""

	import sys
	import json
	import time
	import io
	from pathlib import Path

	import streamlit as st

	sys.path.insert(0, str(Path(__file__).parent))

	from src.parser import load_tenders, load_profiles
	from src.ranker import TenderRanker, get_top_disqualifier
	from src.summarizer import generate_summary
	from src.utils import get_profile_language, format_budget, ensure_dir

	# ── Optional imports (graceful fallback if not installed) ──────────────────────
	try:
	from groq import Groq
	GROQ_AVAILABLE = True
	except ImportError:
	GROQ_AVAILABLE = False

	try:
	import pypdf
	PYPDF_AVAILABLE = True
	except ImportError:
	PYPDF_AVAILABLE = False


	# ─── Config ───────────────────────────────────────────────────────────────────
	import os

	def _load_groq_api_key() -> str:
	"""Load GROQ_API_KEY from Streamlit secrets, then environment variable fallback."""
	# 1. Streamlit Cloud secrets (st.secrets)
	try:
	key = st.secrets["GROQ_API_KEY"]
	if key:
	return key
	except (KeyError, FileNotFoundError):
	pass
	# 2. Local environment variable (e.g. from a .env file loaded externally)
	key = os.environ.get("GROQ_API_KEY", "")
	return key

	GROQ_API_KEY = _load_groq_api_key()
	GROQ_MODEL = "openai/gpt-oss-20b"
	# Disable LLM features silently if the key was not found
	GROQ_AVAILABLE = GROQ_AVAILABLE and bool(GROQ_API_KEY)

	SECTORS = ["agritech", "healthtech", "cleantech", "edtech", "fintech", "wastetech", "general"]
	REGIONS = ["East Africa", "West Africa", "Central Africa", "Southern Africa", "Africa"]

	# ─── Page Config ──────────────────────────────────────────────────────────────
	st.set_page_config(
	page_title="CPI Tender Matcher",
	page_icon="🌍",
	layout="wide",
	)

	# ─── Load tender data once (cached) ───────────────────────────────────────────
	@st.cache_resource
	def load_data():
	tenders = load_tenders("data/tenders")
	profiles = load_profiles("data/profiles.json")
	ranker = TenderRanker(tenders)
	return tenders, profiles, ranker


	TENDERS, PROFILES, RANKER = load_data()
	PROFILE_MAP = {p["id"]: p for p in PROFILES}
	PROFILE_CHOICES = [f"{p['id']} — {p['name']} ({p['country']})" for p in PROFILES]


	# ─── Groq LLM helper ──────────────────────────────────────────────────────────
	def groq_explain_match(profile, tender, rank, score, breakdown, language="en"):
	"""
	Use Groq (openai/gpt-oss-20b) to generate a rich, personalised match explanation.
	Falls back to the template-based summarizer if Groq is unavailable or fails.
	"""
	if not GROQ_AVAILABLE:
	return generate_summary(profile, tender, rank, score, breakdown, language)

	lang_instruction = "Respond in French." if language == "fr" else "Respond in English."

	system_prompt = (
	"You are an expert grant advisor helping African cooperatives and startups "
	"find the best-matched tenders and grants. Your explanations are concise, "
	"actionable, and encouraging. Always respond in at most 100 words."
	)

	user_prompt = (
	f"Explain why this tender is a great (or partial) match for the organization below.\n"
	f"{lang_instruction}\n\n"
	f"## Organization Profile\n"
	f"- Name: {profile.get('name')}\n"
	f"- Sector: {profile.get('sector')}\n"
	f"- Country: {profile.get('country')}\n"
	f"- Employees: {profile.get('employees', 'N/A')}\n"
	f"- Needs: {profile.get('needs_text', '')}\n"
	f"- Budget capacity: USD {profile.get('budget_max', 0):,}\n\n"
	f"## Matched Tender (Rank #{rank})\n"
	f"- Title: {tender.get('title')}\n"
	f"- Sector: {tender.get('sector')}\n"
	f"- Budget: USD {tender.get('budget', 0):,}\n"
	f"- Deadline: {tender.get('deadline')}\n"
	f"- Region: {tender.get('region')}\n\n"
	f"## Scoring Breakdown\n"
	f"- Composite score: {score:.4f}/1.0\n"
	f"- TF-IDF similarity: {breakdown.get('tfidf_similarity', 0):.2%}\n"
	f"- Sector match: {breakdown.get('sector_match', 0):.2%}\n"
	f"- Budget fit: {breakdown.get('budget_score', 0):.2%}\n"
	f"- Deadline urgency: {breakdown.get('urgency_score', 0):.2%}\n\n"
	f"Write a compelling max-100-word explanation and suggest one next step."
	)

	try:
	client = Groq(api_key=GROQ_API_KEY)
	response = client.chat.completions.create(
	model=GROQ_MODEL,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	],
	max_tokens=180,
	temperature=0.7,
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	st.warning(f"Groq LLM unavailable ({e}); using template summary.")
	return generate_summary(profile, tender, rank, score, breakdown, language)


	def groq_parse_pdf_profile(pdf_text):
	"""
	Ask Groq to extract a structured profile from raw PDF text.
	Returns a dict matching the profiles.json schema.
	"""
	if not GROQ_AVAILABLE:
	return {}

	system_prompt = (
	"You are a data extraction assistant. Extract organisation profile information "
	"from the text and return ONLY valid JSON — no markdown, no explanation."
	)

	user_prompt = (
	"Extract the following fields from the document text below and return a JSON object.\n"
	"If a field is not found, use a sensible default.\n\n"
	"Required JSON keys:\n"
	" id — use \"custom_pdf\"\n"
	" name — organisation name (string)\n"
	" sector — one of: agritech, healthtech, cleantech, edtech, fintech, wastetech, general\n"
	" country — country name (string)\n"
	" employees — number of employees (integer, default 0)\n"
	" languages — list of language codes, e.g. [\"en\"] or [\"fr\"]\n"
	" needs_text — 1-3 sentence description of what funding is needed for\n"
	" past_funding — past funding received in USD (integer, default 0)\n"
	" budget_max — maximum grant amount sought in USD (integer, default 50000)\n"
	" region — one of: East Africa, West Africa, Central Africa, Southern Africa, Africa\n\n"
	f"Document text:\n\"\"\"\n{pdf_text[:4000]}\n\"\"\"\n\n"
	"Return ONLY the JSON object."
	)

	try:
	client = Groq(api_key=GROQ_API_KEY)
	response = client.chat.completions.create(
	model=GROQ_MODEL,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	],
	max_tokens=500,
	temperature=0.2,
	)
	raw = response.choices[0].message.content.strip()
	raw = raw.strip("```json").strip("```").strip()
	return json.loads(raw)
	except Exception as e:
	st.error(f"Could not parse profile from PDF: {e}")
	return {}


	def extract_pdf_text(uploaded_file):
	"""Extract raw text from an uploaded PDF file."""
	if not PYPDF_AVAILABLE:
	return ""
	reader = pypdf.PdfReader(io.BytesIO(uploaded_file.read()))
	pages = [page.extract_text() or "" for page in reader.pages]
	return "\n".join(pages)


	# ─── Header ───────────────────────────────────────────────────────────────────
	st.title("🌍 CPI Tender Matcher")
	st.markdown(
	"Multilingual Grant Finder for African Cooperatives \n"
	"AIMS KTT Hackathon · T2.2 \| Author: Samson Niyizurugero \n"
	"Supports English 🇬🇧 and French 🇫🇷 ·"
	)
	if GROQ_AVAILABLE:
	st.success(f" Groq LLM active — model: `{GROQ_MODEL}`")
	else:
	st.warning(" `groq` package not installed. Run `pip install groq` to enable LLM explanations.")

	st.divider()


	# ─── Sidebar: Settings ────────────────────────────────────────────────────────
	with st.sidebar:
	st.header("⚙️ Settings")

	language = st.selectbox(
	"Output Language",
	options=["EN", "FR"],
	help="Language for match explanations",
	)

	top_k = st.slider(
	"Top-K Results",
	min_value=1, max_value=10, value=5, step=1,
	help="Number of tenders to return",
	)

	use_llm = st.toggle(
	"Use Groq LLM Explanations",
	value=True,
	help="Use Groq AI for richer explanations (slower but smarter)",
	)

	st.divider()
	st.markdown("### 📖 How It Works")
	st.markdown(
	"1. Profile — Select an existing profile, fill in the form, or upload a PDF \n"
	"2. Parse — Tenders parsed from TXT/HTML/PDF, language detected \n"
	"3. Rank — Hybrid scoring: `0.45×TF-IDF + 0.25×Sector + 0.20×Budget + 0.10×Urgency` \n"
	"4. Explain — Groq LLM generates personalised explanations \n"
	"5. Deploy — Designed for rural cooperatives via WhatsApp/SMS/voice"
	)


	# ─── Profile Input — Three Modes ──────────────────────────────────────────────
	st.subheader("👤 Business Profile")

	profile_mode = st.radio(
	"How would you like to provide your profile?",
	options=["📋 Select existing profile", "✏️ Fill in manually", "📄 Upload PDF"],
	horizontal=True,
	)

	profile = None

	# ── Mode 1: Select existing ────────────────────────────────────────────────────
	if profile_mode == "📋 Select existing profile":
	profile_choice = st.selectbox(
	"Select Business Profile",
	options=[""] + PROFILE_CHOICES,
	help="Choose a pre-loaded cooperative or business profile",
	)
	if profile_choice:
	profile_id = profile_choice.split("—")[0].strip()
	profile = PROFILE_MAP.get(profile_id)
	if profile:
	with st.expander("👁️ Profile Details", expanded=True):
	c1, c2, c3, c4 = st.columns(4)
	c1.metric("Name", profile.get("name"))
	c2.metric("Sector", profile.get("sector"))
	c3.metric("Country", profile.get("country"))
	c4.metric("Employees", profile.get("employees", "—"))
	st.markdown(f"Languages: {', '.join(profile.get('languages', ['en'])).upper()}")
	st.markdown(f"Needs: {profile.get('needs_text', '')}")

	# ── Mode 2: Manual form ────────────────────────────────────────────────────────
	elif profile_mode == "✏️ Fill in manually":
	with st.form("manual_profile_form"):
	st.markdown("#### Enter your organisation details")

	col_a, col_b = st.columns(2)
	with col_a:
	m_name = st.text_input("Organisation Name *", placeholder="e.g. AgriGrow Rwanda")
	m_country = st.text_input("Country *", placeholder="e.g. Rwanda")
	m_sector = st.selectbox("Sector *", SECTORS)
	m_region = st.selectbox("Region *", REGIONS)
	with col_b:
	m_employees = st.number_input("Number of Employees", min_value=0, value=10)
	m_budget_max = st.number_input("Max Grant Sought (USD)", min_value=0, value=50000, step=5000)
	m_past_fund = st.number_input("Past Funding Received (USD)", min_value=0, value=0, step=1000)
	m_lang = st.multiselect("Languages", ["en", "fr"], default=["en"])

	m_needs = st.text_area(
	"Describe your funding needs *",
	placeholder="e.g. We need funding to scale our precision farming app...",
	height=100,
	)
	submitted = st.form_submit_button("✅ Use This Profile", type="primary")

	if submitted:
	if not m_name or not m_country or not m_needs:
	st.error("Please fill in the required fields: Name, Country, and Needs.")
	else:
	profile = {
	"id": "custom_manual",
	"name": m_name,
	"sector": m_sector,
	"country": m_country,
	"employees": int(m_employees),
	"languages": m_lang or ["en"],
	"needs_text": m_needs,
	"past_funding": int(m_past_fund),
	"budget_max": int(m_budget_max),
	"region": m_region,
	}
	st.session_state["manual_profile"] = profile
	st.success(f"✅ Profile set: {m_name} ({m_sector}, {m_country})")

	if "manual_profile" in st.session_state and profile is None:
	profile = st.session_state["manual_profile"]
	st.info(f"Using saved manual profile: {profile['name']}")

	# ── Mode 3: PDF Upload ─────────────────────────────────────────────────────────
	elif profile_mode == "📄 Upload PDF":
	if not PYPDF_AVAILABLE:
	st.error("`pypdf` is not installed. Run `pip install pypdf` to enable PDF upload.")
	elif not GROQ_AVAILABLE:
	st.error("`groq` is not installed. Run `pip install groq` to enable PDF profile extraction.")
	else:
	uploaded_pdf = st.file_uploader(
	"Upload your organisation profile as a PDF",
	type=["pdf"],
	help="The AI will extract your organisation details automatically",
	)
	if uploaded_pdf:
	if st.button("🤖 Extract Profile from PDF", type="primary"):
	with st.spinner("Groq AI is reading your PDF..."):
	pdf_text = extract_pdf_text(uploaded_pdf)
	if not pdf_text.strip():
	st.error("Could not extract text from the PDF. Try a text-based PDF.")
	else:
	extracted = groq_parse_pdf_profile(pdf_text)
	if extracted:
	profile = extracted
	st.session_state["pdf_profile"] = profile
	st.success("✅ Profile extracted successfully!")
	with st.expander("📋 Extracted Profile", expanded=True):
	st.json(profile)
	else:
	st.error("Could not extract a profile. Try filling in the form manually.")

	if "pdf_profile" in st.session_state and profile is None:
	profile = st.session_state["pdf_profile"]
	st.info(f"Using PDF-extracted profile: {profile.get('name', 'Unknown')}")


	st.divider()

	# ─── Match Button ─────────────────────────────────────────────────────────────
	match_btn = st.button("🔍 Find Matching Tenders", type="primary", use_container_width=True)


	# ─── Main Results ─────────────────────────────────────────────────────────────
	if match_btn:
	if not profile:
	st.warning("Please provide a business profile first (select, fill in, or upload).")
	else:
	lang = language.lower()

	with st.spinner("Matching tenders..."):
	t0 = time.time()
	matches = RANKER.rank(profile, top_k=int(top_k))
	elapsed = time.time() - t0

	label = profile.get("name", "Your Organisation")
	if lang == "fr":
	st.success(f"🏆 Top {top_k} Subventions pour {label} — Traité en {elapsed:.2f}s · {len(TENDERS)} appels analysés")
	else:
	st.success(f"🏆 Top {top_k} Tenders for {label} — Processed in {elapsed:.2f}s · {len(TENDERS)} tenders analysed")

	results_lines = []

	for rank_idx, match in enumerate(matches, 1):
	score = match["score"]
	breakdown = match["breakdown"]
	budget_str = format_budget(match.get("budget", 0))
	lang_badge = "🇫🇷 FR" if match["language"] == "fr" else "🇬🇧 EN"
	disq = get_top_disqualifier(profile, match)

	if use_llm and GROQ_AVAILABLE:
	with st.spinner(f"⚡ Groq generating explanation for match #{rank_idx}..."):
	summary = groq_explain_match(profile, match, rank_idx, score, breakdown, lang)
	else:
	summary = generate_summary(profile, match, rank_idx, score, breakdown, lang)

	with st.container():
	st.markdown(f"### #{rank_idx} — {match['title']}")
	m1, m2, m3, m4 = st.columns(4)
	m1.metric("Score", f"{score:.4f}")
	m2.metric("Sector", match["sector"])
	m3.metric("Budget", budget_str)
	m4.metric("Language", lang_badge)
	st.markdown(
	f"ID: `{match['tender_id']}` \| "
	f"Deadline: {match['deadline']} \| "
	f"Region: {match['region']}"
	)
	st.info(summary)

	with st.expander("📊 Score Breakdown"):
	b1, b2, b3, b4 = st.columns(4)
	b1.metric("🔍 TF-IDF", f"{breakdown['tfidf_similarity']:.3f}")
	b2.metric("🏷 Sector", f"{breakdown['sector_match']:.3f}")
	b3.metric("💰 Budget", f"{breakdown['budget_score']:.3f}")
	b4.metric("⏰ Urgency", f"{breakdown['urgency_score']:.3f}")
	st.warning(f"⚠ Biggest Disqualifier: {disq}")

	st.divider()
	results_lines.append(f"### #{rank_idx} — {match['title']}")
	results_lines.append(f"Score: {score:.4f} \| Sector: {match['sector']} \| Budget: {budget_str} \| {lang_badge}")
	results_lines.append(f"Deadline: {match['deadline']} \| Region: {match['region']}")
	results_lines.append(f"\n> {summary}\n")
	results_lines.append(f"Biggest Disqualifier: {disq}\n---")

	results_md = "\n".join(results_lines)

	ensure_dir("summaries")
	profile_id = profile.get("id", "custom")
	summary_path = f"summaries/profile_{profile_id}_{lang}.md"
	with open(summary_path, "w", encoding="utf-8") as f:
	f.write(results_md)

	scores_data = {
	"profile_id": profile_id,
	"profile_name": profile.get("name"),
	"language": lang,
	"llm_used": use_llm and GROQ_AVAILABLE,
	"elapsed_seconds": round(elapsed, 3),
	"matches": [
	{
	"rank": i + 1,
	"tender_id": m["tender_id"],
	"title": m["title"],
	"score": m["score"],
	"breakdown": m["breakdown"],
	}
	for i, m in enumerate(matches)
	],
	}
	scores_json = json.dumps(scores_data, indent=2)

	plain_summary = f"Results for {profile.get('name', 'your organisation')}. "
	for i, m in enumerate(matches, 1):
	plain_summary += f"Number {i}: {m['title']}, score {m['score']:.2f}. "

	st.subheader("📥 Export Results")
	tab1, tab2, tab3 = st.tabs(["📄 Markdown Preview", "📊 JSON Scores", "🔊 Plain Text (Audio/WhatsApp)"])

	with tab1:
	st.download_button(
	"⬇ Download Markdown",
	data=results_md,
	file_name=f"matches_{profile_id}_{lang}.md",
	mime="text/markdown",
	)
	st.divider()
	# Build a richer markdown document and render it properly
	header_md = (
	f"## 🌍 Tender Match Report\n"
	f"Organisation: {profile.get('name', '—')}  \|  "
	f"Sector: {profile.get('sector', '—')}  \|  "
	f"Country: {profile.get('country', '—')}\n\n"
	f"Language: {lang.upper()}  \|  "
	f"LLM Explanations: {'✅ Yes' if use_llm and GROQ_AVAILABLE else '❌ No'}  \|  "
	f"Processing time: {elapsed:.2f}s\n\n"
	f"---\n"
	)
	st.markdown(header_md)

	for rank_idx, match in enumerate(matches, 1):
	score = match["score"]
	breakdown = match["breakdown"]
	budget_str = format_budget(match.get("budget", 0))
	lang_badge = "🇫🇷 FR" if match["language"] == "fr" else "🇬🇧 EN"
	disq = get_top_disqualifier(profile, match)

	# Retrieve already-generated summary from results_lines
	summary_text = ""
	for line in results_lines:
	if line.startswith(f"> ") and results_lines.index(line) > 0:
	prev = results_lines[results_lines.index(line) - 2]
	if f"#{rank_idx} —" in prev:
	summary_text = line[2:].strip()
	break

	score_pct = int(score * 100)
	bar_filled = "█" * (score_pct // 5)
	bar_empty = "░" * (20 - score_pct // 5)

	match_md = (
	f"### #{rank_idx} — {match['title']}\n\n"
	f"\| Field \| Value \|\n"
	f"\|---\|---\|\n"
	f"\| 🏷 Sector \| `{match['sector']}` \|\n"
	f"\| 💰 Budget \| {budget_str} \|\n"
	f"\| 📅 Deadline \| {match['deadline']} \|\n"
	f"\| 🌍 Region \| {match['region']} \|\n"
	f"\| 🗣 Language \| {lang_badge} \|\n"
	f"\| 🎯 Score \| {score:.4f} — `{bar_filled}{bar_empty}` \|\n\n"
	f"Score Breakdown: "
	f"TF-IDF `{breakdown['tfidf_similarity']:.3f}` · "
	f"Sector `{breakdown['sector_match']:.3f}` · "
	f"Budget `{breakdown['budget_score']:.3f}` · "
	f"Urgency `{breakdown['urgency_score']:.3f}`\n\n"
	f"> ⚠️ Biggest disqualifier: {disq}\n\n"
	)
	st.markdown(match_md)
	if summary_text:
	st.info(summary_text)
	st.divider()

	with tab2:
	st.download_button(
	"⬇ Download JSON",
	data=scores_json,
	file_name=f"scores_{profile_id}_{lang}.json",
	mime="application/json",
	)
	st.code(scores_json, language="json")

	with tab3:
	st.text_area(
	"Audio-friendly summary (copy for WhatsApp/SMS)",
	plain_summary,
	height=120,
	)