Spaces:

MitchellKil
/

phonetic-generator-api

Sleeping

Mitchell Kilpatrick SE2022

Prompt engineering

eead65a 8 days ago

1.32 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	import torch
	from transformers import AutoTokenizer, T5ForConditionalGeneration

	MODEL_NAME = "google/byt5-small"

	app = FastAPI()

	print("Loading model...")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)
	model.eval()

	print("Model loaded.")

	class TextRequest(BaseModel):
	text: str

	def text_to_ipa(text: str) -> str:
	# Few-shot examples for better IPA predictions
	prompt = f"""
	You are a Scottish Gaelic teacher.
	Convert Scottish Gaelic text into the International Phonetic Alphabet (IPA).
	Only return the IPA transcription.

	Examples:
	Text: halò
	IPA: /haˈloː/

	Text: uisge
	IPA: /ˈɯʃkʲə/

	Text: {text}
	IPA:
	"""

	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=64,
	do_sample=False # deterministic output
	)

	# Decode and return only the IPA portion
	result = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return result.split("IPA:")[-1].strip()


	@app.post("/predict")
	def predict(request: TextRequest):
	ipa_result = text_to_ipa(request.text)
	return {"ipa": ipa_result}