Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import torch | |
| from transformers import AutoTokenizer, T5ForConditionalGeneration | |
| MODEL_NAME = "google/byt5-small" | |
| app = FastAPI() | |
| print("Loading model...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME) | |
| model.eval() | |
| print("Model loaded.") | |
| class TextRequest(BaseModel): | |
| text: str | |
| def text_to_ipa(text: str) -> str: | |
| # Few-shot examples for better IPA predictions | |
| prompt = f""" | |
| You are a Scottish Gaelic teacher. | |
| Convert Scottish Gaelic text into the International Phonetic Alphabet (IPA). | |
| Only return the IPA transcription. | |
| Examples: | |
| Text: halò | |
| IPA: /haˈloː/ | |
| Text: uisge | |
| IPA: /ˈɯʃkʲə/ | |
| Text: {text} | |
| IPA: | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=64, | |
| do_sample=False # deterministic output | |
| ) | |
| # Decode and return only the IPA portion | |
| result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return result.split("IPA:")[-1].strip() | |
| def predict(request: TextRequest): | |
| ipa_result = text_to_ipa(request.text) | |
| return {"ipa": ipa_result} | |