Spaces:

tejasashinde
/

HackathonSpaceRecommender

Running

File size: 5,009 Bytes

6f97a9b

from __future__ import annotations

import argparse
import re
import sys
from pathlib import Path


ROOT_DIR = Path(__file__).resolve().parents[1]
if str(ROOT_DIR) not in sys.path:
    sys.path.insert(0, str(ROOT_DIR))

from backend.spaces_index import recommend_spaces


SCENARIOS = [
    {
        "query": "kid games apps",
        "expected_ids": {"build-small-hackathon/reachy-to-the-rescue"},
        "description": "kid-friendly game intent should surface a clearly game-like result",
        "reason_terms": {"kid", "play", "game", "younger"},
    },
    {
        "query": "suggest me some good writing apps",
        "expected_ids": {
            "build-small-hackathon/copy-campfire",
            "build-small-hackathon/anti-ill-comix",
            "build-small-hackathon/cuneiform-translator",
        },
        "description": "writing intent should favor writing-first spaces over generic assistants",
        "reason_terms": {"writing", "write", "journal", "creative"},
    },
    {
        "query": "provide card games",
        "expected_ids": {"build-small-hackathon/MatchWise"},
        "description": "card-game intent should find a space whose central loop is card-based",
        "reason_terms": {"card", "cards", "match", "flip"},
    },
    {
        "query": "do we have tutor apps?",
        "expected_ids": {
            "build-small-hackathon/tutori",
            "build-small-hackathon/educrate",
            "build-small-hackathon/pocket-tutor",
        },
        "description": "tutor intent should surface tutoring or teaching spaces",
        "reason_terms": {"tutor", "learning", "lesson", "teach"},
    },
    {
        "query": "playing cards",
        "expected_ids": {"build-small-hackathon/MatchWise"},
        "description": "plain-language card intent should still map to the same relevant result",
        "reason_terms": {"card", "cards", "match", "flip"},
    },
]


def _sentence_count(text: str) -> int:
    return len([part for part in re.split(r"(?<=[.!?])\s+", text.strip()) if part.strip()])


def evaluate(top_k: int, verbose: bool) -> int:
    failures: list[str] = []
    for scenario in SCENARIOS:
        query = scenario["query"]
        expected_ids = set(scenario["expected_ids"])
        results = recommend_spaces(query, "", "All", top_k=top_k)
        result_ids = [str(item.get("id", "")).strip() for item in results]
        matched = expected_ids & set(result_ids)
        if verbose:
            print(f"\nQUERY: {query}")
            print(f"Expected any of: {sorted(expected_ids)}")
            print(f"Description: {scenario['description']}")
            for idx, item in enumerate(results, 1):
                print(
                    f"{idx}. {item.get('id')} | {item.get('name')} | "
                    f"{item.get('category')} | {item.get('matched_signals')} | "
                    f"{(item.get('reason') or '')[:180]}"
                )
        if not matched:
            failures.append(
                f'Query "{query}" did not return any expected ids in top {top_k}. '
                f"Expected one of {sorted(expected_ids)}, got {result_ids}."
            )
            continue

        if not results:
            failures.append(f'Query "{query}" returned no results.')
            continue

        top_reason = str(results[0].get("reason", "") or "").strip()
        if top_reason:
            sentence_count = _sentence_count(top_reason)
            if sentence_count > 2:
                failures.append(
                    f'Query "{query}" returned a top reason with {sentence_count} sentences; expected at most 2.'
                )

            lowered_reason = top_reason.lower()
            if "readme" in lowered_reason:
                failures.append(
                    f'Query "{query}" returned a top reason that mentioned README explicitly. '
                    f"Reason was: {top_reason}"
                )

            reason_terms = set(scenario.get("reason_terms", set()) or set())
            if reason_terms and not any(term in lowered_reason for term in reason_terms):
                failures.append(
                    f'Query "{query}" returned a top reason that did not mention expected concepts {sorted(reason_terms)}. '
                    f"Reason was: {top_reason}"
                )

    if failures:
        print("\nRecommendation evaluation failed:")
        for failure in failures:
            print(f"- {failure}")
        return 1

    print("Recommendation evaluation passed.")
    return 0


def main() -> int:
    parser = argparse.ArgumentParser(description="Run lightweight recommendation quality checks.")
    parser.add_argument("--top-k", type=int, default=5, help="Number of results to inspect for each scenario.")
    parser.add_argument("--quiet", action="store_true", help="Only print pass/fail summary.")
    args = parser.parse_args()
    return evaluate(top_k=max(1, args.top_k), verbose=not args.quiet)


if __name__ == "__main__":
    raise SystemExit(main())