Spaces:
Sleeping
Sleeping
| """ | |
| FastAPI backend for Gairaigo Map. | |
| Endpoints: | |
| GET /health - liveness check | |
| GET /languages - returns the 3 classifiable languages | |
| POST /predict - classifies a katakana word | |
| POST /emotion - detects emotion from plain text, returns music list + loanwords | |
| Usage: | |
| uvicorn main:app --reload --port 8000 | |
| """ | |
| import re | |
| import numpy as np | |
| import joblib | |
| from pathlib import Path | |
| from contextlib import asynccontextmanager | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, field_validator | |
| from transformers import pipeline | |
| # ── Paths ────────────────────────────────────────────────────────────────────── | |
| BASE_DIR = Path(__file__).parent | |
| MODELS_DIR = BASE_DIR.parent / "models" | |
| # Fallback for Docker where /home/user/app is the working root | |
| if not MODELS_DIR.exists(): | |
| MODELS_DIR = Path("/home/user/app/models") | |
| MODEL_PATH = MODELS_DIR / "model.joblib" | |
| VECTORIZER_PATH = MODELS_DIR / "vectorizer.joblib" | |
| ENCODER_PATH = MODELS_DIR / "encoder.joblib" | |
| KATAKANA_RE = re.compile(r"^[\u30A0-\u30FF\u30FC\u30FB\u30FE\u30FD]+$") | |
| def is_katakana(text: str) -> bool: | |
| return bool(KATAKANA_RE.match(text.strip())) | |
| # ── Language metadata (SVM classifier) ──────────────────────────────────────── | |
| LANGUAGE_META = { | |
| "English": {"iso2": "GB", "country": "United Kingdom", "color": "#4a90d9"}, | |
| "French": {"iso2": "FR", "country": "France", "color": "#e85d5d"}, | |
| "German": {"iso2": "DE", "country": "Germany", "color": "#f0a500"}, | |
| } | |
| # ── Emotion → Music playlist (multiple songs per emotion for variety) ────────── | |
| # All video IDs verified via Wikipedia / official sources | |
| EMOTION_MUSIC: dict[str, list[dict]] = { | |
| "joy": [ | |
| {"title": "ATARASHII GAKKO! - Que Sera Sera", "video_id": "0S1-b9xGQac"}, | |
| {"title": "Wonderland x Showtime - Kyoufuu All Back", "video_id": "nq_x3D0_lgw"}, | |
| {"title": "Sumika - Fiction", "video_id": "IKHGAuNaGuA"}, | |
| {"title": "ATARASHII GAKKO! - OTONABLUE", "video_id": "l446hUqQ7GY"}, | |
| {"title": "Wonderland x Showtime - SEIBAITAAAAAASU!", "video_id": "w0lpuKNZRQ0"}, | |
| {"title": "Creepy Nuts - Bling-Bang-Bang-Born", "video_id": "mLW35YMzELE"}, | |
| {"title": "Wonderland x Showtime - Taiyoukei Disco", "video_id": "oA6aCY4bMg4"}, | |
| {"title": "Gen Hoshino - Koi", "video_id": "jhOVibLEDhA"}, | |
| {"title": "Yumi Arai - Rouge no Dengon", "video_id": "MH-P4mXvDPE"}, | |
| ], | |
| "sadness": [ | |
| {"title": "ZONE - Kimi ga Kureta Mono", "video_id": "Of36Qh7WLSQ"}, | |
| {"title": "YOSHIKI - Red Swan", "video_id": "r1XE8ON8fos"}, | |
| {"title": "Galileo Galilei - Aoi Shiori", "video_id": "T3bxbVGWy5k"}, | |
| {"title": "seven oops - Orange", "video_id": "nf-L5R8U-Q0"}, | |
| {"title": "DAOKO x Kenshi Yonezu - Fireworks", "video_id": "-tKVN2mAKRI"}, | |
| {"title": "Yorushika - Say It", "video_id": "F64yFFnZfkI"}, | |
| {"title": "Kenshi Yonezu - Lemon", "video_id": "SX_ViT4Ra7k"}, | |
| {"title": "Yoh Kamiyama - Irokousui", "video_id": "kQYLHjgUh_g"}, | |
| ], | |
| "anger": [ | |
| {"title": "Ado - Usseewa", "video_id": "Qp3b-RXtz4w"}, | |
| {"title": "Neru - Lost One's Weeping", "video_id": "U1aS62Juz70"}, | |
| {"title": "Hige Dandism - Cry Baby", "video_id": "O1bhZgkC4Gw"}, | |
| {"title": "Minami - Crying for Rain", "video_id": "0YF8vecQWYs"}, | |
| {"title": "Eve - Dramaturgy", "video_id": "jJzw1h5CR-I"}, | |
| {"title": "Kenshi Yonezu - Kick Back", "video_id": "M2cckDmNLMI"}, | |
| ], | |
| "fear": [ | |
| {"title": "TK - Unravel", "video_id": "Fve_lHIPa-I"}, | |
| {"title": "Nightcord at 25:00 x KAITO - Bakenohana", "video_id": "UFRIsspP9UE"}, | |
| {"title": "ATARASHII GAKKO! - Tokyo Calling", "video_id": "pHMH408ltEM"}, | |
| {"title": "Nightcord at 25:00 x KAITO - Heat Abnormal", "video_id": "ToqKNyZi2NQ"}, | |
| {"title": "Yuzu - Hyori Ittai", "video_id": "eKoD2CRr_KA"}, | |
| {"title": "Nightcord at 25:00 - Bug", "video_id": "2Ii7UBMxWVw"}, | |
| {"title": "RADWIMPS - Nandemonaiya", "video_id": "n89SKAymNfA"}, | |
| {"title": "sakanaction - Arukuaround", "video_id": "cADu9rtlZGQ"}, | |
| ], | |
| "surprise": [ | |
| {"title": "YOASOBI - Idol", "video_id": "ZRtdQ81jPUQ"}, | |
| {"title": "Ado - Buriki no Dance", "video_id": "iL7uoLCbJoc"}, | |
| {"title": "Ado - New Genesis", "video_id": "1FliVTcX8bQ"}, | |
| {"title": "RADWIMPS - Grand Escape", "video_id": "epQGR34yiTY"}, | |
| ], | |
| "disgust": [ | |
| {"title": "Nightcord at 25:00 - Bocca della Verità", "video_id": "ZjNUJUgyoOw"}, | |
| {"title": "Ado - Readymade", "video_id": "jg09lNupc1s"}, | |
| {"title": "Eve - Literary Nonsense", "video_id": "OskXF3s0UT8"}, | |
| ], | |
| "neutral": [ | |
| {"title": "Vaundy - Odoriko", "video_id": "7HgJIAUtICU"}, | |
| {"title": "Hanae - Kamisama Hajimemashita", "video_id": "gZaelu4lieE"}, | |
| {"title": "Fuji Kaze - Matsuri", "video_id": "NwOvu-j_WjY"}, | |
| {"title": "Tomofumi Tanizawa - Kimi ni Todoke", "video_id": "9o7tKXUjC6E"}, | |
| {"title": "ATARASHII GAKKO! - Dounimo Tomaranai", "video_id": "59bnq4wlGx8"}, | |
| {"title": "Yorushika - Just a Sunny Day for You", "video_id": "-VKIqrvVOpo"}, | |
| {"title": "natori - Overdose", "video_id": "H08YWE4CIFQ"}, | |
| {"title": "Mitchie M - Tokugawa Cup Noodle Kinshirei", "video_id": "jPXAgWkqbo4"}, | |
| {"title": "Homecomings - Cakes", "video_id": "u1A53wFN9A0"}, | |
| ], | |
| } | |
| # ── Emotion → curated loanwords ──────────────────────────────────────────────── | |
| EMOTION_LOANWORDS: dict[str, list[dict]] = { | |
| "joy": [ | |
| {"katakana": "カーニバル", "meaning": "carnival", "language": "English", "iso2": "GB"}, | |
| {"katakana": "フェスティバル", "meaning": "festival", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ダンス", "meaning": "dance", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ショーロ", "meaning": "choro; chorinho; style of Brazilian popular music", "language": "Portuguese", "iso2": "PT"}, | |
| {"katakana": "カステラ", "meaning": "castella (type of sponge cake)", "language": "Portuguese", "iso2": "PT"}, | |
| {"katakana": "バレエ", "meaning": "ballet", "language": "French", "iso2": "FR"}, | |
| {"katakana": "シャンソン", "meaning": "chanson; French song", "language": "French", "iso2": "FR"}, | |
| {"katakana": "フェット", "meaning": "fête; festival; celebration", "language": "French", "iso2": "FR"}, | |
| ], | |
| "sadness": [ | |
| {"katakana": "ノスタルジー", "meaning": "nostalgia", "language": "French", "iso2": "FR"}, | |
| {"katakana": "メランコリー", "meaning": "melancholy", "language": "French", "iso2": "FR"}, | |
| {"katakana": "アデュー", "meaning": "adieu; goodbye", "language": "French", "iso2": "FR"}, | |
| {"katakana": "ミンネ", "meaning": "love of a knight for a courtly lady (upon which he is unable to act)", "language": "German", "iso2": "DE"}, | |
| {"katakana": "フロイライン", "meaning": "miss (German title for an unmarried woman)", "language": "German", "iso2": "DE"}, | |
| {"katakana": "カッパ", "meaning": "raincoat", "language": "Portuguese", "iso2": "PT"}, | |
| {"katakana": "ロンリー", "meaning": "lonely", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ブルース", "meaning": "blues (music genre)", "language": "English", "iso2": "GB"}, | |
| ], | |
| "anger": [ | |
| {"katakana": "ネリチャギ", "meaning": "axe kick; ax kick", "language": "Korean", "iso2": "KR"}, | |
| {"katakana": "サンダ", "meaning": "sanda; sanshou; Chinese boxing; Chinese kickboxing", "language": "Chinese", "iso2": "CN"}, | |
| {"katakana": "テロル", "meaning": "terror; terrorism", "language": "German", "iso2": "DE"}, | |
| {"katakana": "ストライキ", "meaning": "strike (labor action)", "language": "English", "iso2": "GB"}, | |
| {"katakana": "プロテスト", "meaning": "protest", "language": "English", "iso2": "GB"}, | |
| {"katakana": "レジスタンス", "meaning": "resistance (movement)", "language": "French", "iso2": "FR"}, | |
| {"katakana": "バトル", "meaning": "battle", "language": "English", "iso2": "GB"}, | |
| {"katakana": "パワー", "meaning": "power", "language": "English", "iso2": "GB"}, | |
| ], | |
| "fear": [ | |
| {"katakana": "ノワール", "meaning": "black; dark", "language": "French", "iso2": "FR"}, | |
| {"katakana": "エトランゼ", "meaning": "stranger; outsider; foreigner", "language": "French", "iso2": "FR"}, | |
| {"katakana": "テロル", "meaning": "terror; terrorism", "language": "German", "iso2": "DE"}, | |
| {"katakana": "デマゴギー", "meaning": "false rumor; false alarm; misinformation", "language": "German", "iso2": "DE"}, | |
| {"katakana": "ゴースト", "meaning": "ghost", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ホラー", "meaning": "horror", "language": "English", "iso2": "GB"}, | |
| {"katakana": "パニック", "meaning": "panic", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ミステリー", "meaning": "mystery", "language": "English", "iso2": "GB"}, | |
| ], | |
| "surprise": [ | |
| {"katakana": "ゲリラライブ", "meaning": "surprise concert", "language": "English", "iso2": "GB"}, | |
| {"katakana": "スライハンド", "meaning": "sleight of hand (e.g. in magic tricks)", "language": "English", "iso2": "GB"}, | |
| {"katakana": "マジック", "meaning": "magic", "language": "English", "iso2": "GB"}, | |
| {"katakana": "イリュージョン", "meaning": "illusion", "language": "English", "iso2": "GB"}, | |
| {"katakana": "サーカス", "meaning": "circus", "language": "English", "iso2": "GB"}, | |
| {"katakana": "スペクタクル", "meaning": "spectacle", "language": "French", "iso2": "FR"}, | |
| {"katakana": "ブリュット", "meaning": "brut; dry sparkling wine", "language": "French", "iso2": "FR"}, | |
| {"katakana": "サプライズ", "meaning": "surprise", "language": "English", "iso2": "GB"}, | |
| ], | |
| "disgust": [ | |
| {"katakana": "チョウドウフ", "meaning": "stinky tofu; fermented tofu", "language": "Chinese", "iso2": "CN"}, | |
| {"katakana": "シックハウス", "meaning": "sick building; building which causes people to feel unwell", "language": "English", "iso2": "GB"}, | |
| {"katakana": "トキシック", "meaning": "toxic", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ダストシュート", "meaning": "garbage chute; trash chute", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ポイズン", "meaning": "poison", "language": "English", "iso2": "GB"}, | |
| {"katakana": "スキャンダル", "meaning": "scandal", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ネガティブ", "meaning": "negative", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ゴミ", "meaning": "rubbish; trash; garbage", "language": "English", "iso2": "GB"}, | |
| ], | |
| "neutral": [ | |
| {"katakana": "アルバイター", "meaning": "part-time worker; part-timer", "language": "German", "iso2": "DE"}, | |
| {"katakana": "ピンイン", "meaning": "Pinyin (Chinese romanization system)", "language": "Chinese", "iso2": "CN"}, | |
| {"katakana": "スケジュール", "meaning": "schedule", "language": "English", "iso2": "GB"}, | |
| {"katakana": "システム", "meaning": "system", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ドキュメント", "meaning": "document", "language": "English", "iso2": "GB"}, | |
| {"katakana": "ネットワーク", "meaning": "network", "language": "English", "iso2": "GB"}, | |
| {"katakana": "マネジメント", "meaning": "management", "language": "English", "iso2": "GB"}, | |
| {"katakana": "スタンダード", "meaning": "standard", "language": "English", "iso2": "GB"}, | |
| ], | |
| } | |
| # ── Startup ──────────────────────────────────────────────────────────────────── | |
| artifacts: dict = {} | |
| async def lifespan(app: FastAPI): | |
| for path in (MODEL_PATH, VECTORIZER_PATH, ENCODER_PATH): | |
| if not path.exists(): | |
| raise RuntimeError(f"Model artifact not found: {path}") | |
| artifacts["model"] = joblib.load(MODEL_PATH) | |
| artifacts["vectorizer"] = joblib.load(VECTORIZER_PATH) | |
| artifacts["encoder"] = joblib.load(ENCODER_PATH) | |
| artifacts["emotion"] = pipeline( | |
| "text-classification", | |
| model="j-hartmann/emotion-english-distilroberta-base", | |
| top_k=1, | |
| ) | |
| print("✓ Model artifacts loaded") | |
| print("✓ Emotion classifier loaded") | |
| yield | |
| artifacts.clear() | |
| # ── App ──────────────────────────────────────────────────────────────────────── | |
| app = FastAPI(title="Gairaigo Map API", version="2.0.0", lifespan=lifespan) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=[ | |
| "http://localhost:5173", | |
| "http://127.0.0.1:5173", | |
| "https://kotabi.vercel.app", | |
| ], | |
| allow_methods=["GET", "POST"], | |
| allow_headers=["*"], | |
| ) | |
| # ── Schemas ──────────────────────────────────────────────────────────────────── | |
| class PredictRequest(BaseModel): | |
| word: str | |
| def must_be_katakana(cls, v: str) -> str: | |
| v = v.strip() | |
| if not v: | |
| raise ValueError("Word must not be empty.") | |
| if not is_katakana(v): | |
| raise ValueError("Input must be katakana (e.g. コーヒー).") | |
| return v | |
| class LanguageResult(BaseModel): | |
| language: str | |
| country: str | |
| iso2: str | |
| confidence: float | |
| color: str | |
| class PredictResponse(BaseModel): | |
| word: str | |
| prediction: LanguageResult | |
| all_scores: list[LanguageResult] | |
| class EmotionRequest(BaseModel): | |
| text: str | |
| def must_not_be_empty(cls, v: str) -> str: | |
| v = v.strip() | |
| if not v: | |
| raise ValueError("Text must not be empty.") | |
| return v | |
| class MusicEntry(BaseModel): | |
| title: str | |
| video_id: str | |
| class LoanwordResult(BaseModel): | |
| katakana: str | |
| meaning: str | |
| language: str | |
| iso2: str | |
| class EmotionResponse(BaseModel): | |
| text: str | |
| emotion: str | |
| music_list: list[MusicEntry] # full playlist — frontend cycles through these | |
| loanwords: list[LoanwordResult] | |
| # ── Helpers ──────────────────────────────────────────────────────────────────── | |
| def softmax(scores: np.ndarray) -> np.ndarray: | |
| exp_scores = np.exp(scores - np.max(scores)) | |
| return exp_scores / exp_scores.sum() | |
| def classify(word: str) -> PredictResponse: | |
| model = artifacts["model"] | |
| vectorizer = artifacts["vectorizer"] | |
| encoder = artifacts["encoder"] | |
| X = vectorizer.transform([word]) | |
| decision_scores = model.decision_function(X)[0] | |
| confidences = softmax(decision_scores) | |
| classes = encoder.classes_ | |
| all_scores = [ | |
| LanguageResult( | |
| language=classes[i], | |
| country=LANGUAGE_META[classes[i]]["country"], | |
| iso2=LANGUAGE_META[classes[i]]["iso2"], | |
| confidence=round(float(confidences[i]), 4), | |
| color=LANGUAGE_META[classes[i]]["color"], | |
| ) | |
| for i in range(len(classes)) | |
| ] | |
| all_scores.sort(key=lambda r: r.confidence, reverse=True) | |
| return PredictResponse(word=word, prediction=all_scores[0], all_scores=all_scores) | |
| # ── Routes ───────────────────────────────────────────────────────────────────── | |
| def health(): | |
| return {"status": "ok", "model_loaded": bool(artifacts)} | |
| def get_languages(): | |
| return {lang: meta for lang, meta in LANGUAGE_META.items()} | |
| def predict(body: PredictRequest): | |
| try: | |
| return classify(body.word) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def detect_emotion(body: EmotionRequest): | |
| """ | |
| Detect emotion from plain English text. | |
| Returns the detected emotion, a playlist of matching Japanese songs, and related loanwords. | |
| The frontend can cycle through music_list to let users skip to the next song. | |
| """ | |
| try: | |
| result = artifacts["emotion"](body.text) | |
| label: str = result[0][0]["label"].lower() | |
| if label not in EMOTION_MUSIC: | |
| label = "neutral" | |
| music_list = [MusicEntry(**m) for m in EMOTION_MUSIC[label]] | |
| loanwords = [LoanwordResult(**w) for w in EMOTION_LOANWORDS[label]] | |
| return EmotionResponse( | |
| text=body.text, | |
| emotion=label, | |
| music_list=music_list, | |
| loanwords=loanwords, | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |