Spaces:
Running
Running
| """ | |
| S2S Local Cache Builder - Runs INSIDE HuggingFace Space | |
| Add this as a separate file in your HF Space repo. | |
| It calls the translation functions directly without HTTP. | |
| Run via: python build_cache_local.py | |
| Output: translation_cache.json | |
| """ | |
| import json | |
| import os | |
| import time | |
| from datetime import datetime | |
| # ── Import translation functions directly from app.py ───────────────────────── | |
| import torch | |
| from pathlib import Path | |
| from transformers import MarianMTModel, MarianTokenizer | |
| from transformers import AutoModelForSeq2SeqLM, NllbTokenizer | |
| from transformers import VitsModel, AutoTokenizer | |
| from optimum.onnxruntime import ORTModelForSeq2SeqLM | |
| from huggingface_hub import snapshot_download | |
| import tempfile | |
| import scipy.io.wavfile as wavfile | |
| OUTPUT_FILE = "translation_cache.json" | |
| SAVE_EVERY = 50 | |
| LANGUAGES = ["fr", "tw", "ee", "hau", "fuv"] | |
| LANG_LABEL_MAP = { | |
| "fr": "French 🇫🇷", | |
| "tw": "Asante Twi 🇬🇭", | |
| "ee": "Ewe 🇬🇭", | |
| "hau": "Hausa 🇬🇭", | |
| "fuv": "Fulani 🇬🇭", | |
| } | |
| PHRASES = [ | |
| "Hello", "Hi", "Hey", "Good morning", "Good afternoon", "Good evening", | |
| "Good night", "Goodbye", "Bye", "See you later", "See you tomorrow", | |
| "See you soon", "Take care", "Have a good day", "Have a nice day", | |
| "Have a good evening", "Have a good night", "Have a safe journey", | |
| "Safe travels", "Welcome", "Welcome back", "Come in", "Sit down", | |
| "How are you", "How are you doing", "How is your day", | |
| "How was your day", "How is the family", "How is your health", | |
| "I am fine", "I am good", "I am okay", "I am not well", | |
| "I am tired", "I am happy", "I am sad", "I am busy", | |
| "Nice to meet you", "It is nice to see you", "Long time no see", | |
| "Thank you", "Thank you very much", "Thanks a lot", "Many thanks", | |
| "You are welcome", "No problem", "Do not worry", | |
| "Please", "Sorry", "I am sorry", "Excuse me", "Pardon me", | |
| "Congratulations", "Well done", "Good job", "Bravo", | |
| "What is your name", "My name is John", "My name is Mary", | |
| "Where are you from", "I am from Ghana", "I am from Nigeria", | |
| "I am from France", "I live in Accra", "I live in Kumasi", | |
| "How old are you", "I am twenty years old", "I am thirty years old", | |
| "What do you do", "I am a doctor", "I am a teacher", | |
| "I am a nurse", "I am an engineer", "I am a farmer", | |
| "I am a student", "I am a businessman", "I am a trader", | |
| "I am retired", "I work in a hospital", "I work in a school", | |
| "Are you married", "I am married", "I am single", | |
| "I have children", "I have two children", "I have a son", | |
| "I have a daughter", "This is my husband", "This is my wife", | |
| "My mother", "My father", "My parents", "My brother", "My sister", | |
| "My son", "My daughter", "My children", "My baby", | |
| "My husband", "My wife", "My family", "My friend", | |
| "My grandfather", "My grandmother", "My uncle", "My aunt", | |
| "My cousin", "My nephew", "My niece", | |
| "The baby is crying", "The child is sleeping", | |
| "My child is sick", "We are a big family", "I love my family", | |
| "The children are playing", "Feed the baby", | |
| "Take care of the children", "The baby needs milk", | |
| "What time is it", "What day is it today", "What is the date", | |
| "What is this", "What is that", "What happened", | |
| "Where are you", "Where are you going", "Where do you live", | |
| "Where is the toilet", "Where is the bathroom", | |
| "Where is the hospital", "Where is the pharmacy", | |
| "Where is the police station", "Where is the market", | |
| "Where is the church", "Where is the mosque", | |
| "Where is the school", "Where is the office", | |
| "Where is the hotel", "Where is the restaurant", | |
| "Where is the bus station", "Where is the airport", | |
| "Where is the bank", "Where is the ATM", | |
| "When does it open", "When does it close", | |
| "How far is it", "How long will it take", | |
| "How much does it cost", "How many do you want", | |
| "Can you help me", "Can I help you", | |
| "Do you understand", "Do you speak English", | |
| "I do not understand", "I do not know", | |
| "Can you repeat that", "Please speak slowly", | |
| "I am sick", "I am not feeling well", "I feel weak", | |
| "I need a doctor", "I need a nurse", "Call an ambulance", | |
| "Take me to the hospital", "Please call for help", | |
| "I have a headache", "I have a stomachache", "I have a backache", | |
| "I have a toothache", "I have chest pain", "I have a fever", | |
| "I have a cold", "I have a cough", "I have diarrhea", | |
| "I am vomiting", "I feel dizzy", "I cannot breathe", | |
| "I am bleeding", "I am in pain", "The pain is severe", | |
| "I broke my arm", "I broke my leg", "I sprained my ankle", | |
| "I was in an accident", "I fell down", | |
| "I am pregnant", "I am due soon", "I need a midwife", | |
| "The baby is coming", "I need pain relief", | |
| "I am allergic to penicillin", "I am diabetic", | |
| "I have malaria", "I have typhoid", "I have high blood pressure", | |
| "I need my medication", "What is this medicine for", | |
| "How many times a day", "Take it with water", | |
| "Take it after meals", "Take it before sleeping", | |
| "I need a blood test", "What is my diagnosis", | |
| "Will I be okay", "I want to go home", | |
| "The patient needs rest", "This is urgent", | |
| "I am hungry", "I am very hungry", "I am starving", | |
| "I am thirsty", "I want water", "I want food", | |
| "I want to eat", "I want to drink", | |
| "I want rice", "I want bread", "I want soup", | |
| "I want chicken", "I want fish", "I want beef", | |
| "I want vegetables", "I want fruit", "I want eggs", | |
| "I want porridge", "I want fufu", "I want banku", | |
| "I want kenkey", "I want jollof rice", "I want waakye", | |
| "I want yam", "I want plantain", "I want cassava", | |
| "I want groundnut soup", "I want palm nut soup", | |
| "Can I have tea", "Can I have coffee", "Can I have milk", | |
| "I am vegetarian", "I do not eat pork", "I do not eat meat", | |
| "The food is delicious", "This is too spicy", "This is too salty", | |
| "The bill please", "How much is it", "Keep the change", | |
| "How much is this", "What is the price", "That is too expensive", | |
| "Can you reduce the price", "Do you have a discount", | |
| "I want to buy this", "I will take it", | |
| "Do you have this in another colour", "Do you have a bigger size", | |
| "Do you have a smaller size", "I am just looking", | |
| "Can I pay by card", "Do you accept mobile money", | |
| "I want a receipt", "I want to return this", | |
| "This is damaged", "I want a refund", | |
| "Where is the airport", "I need to go to the airport", | |
| "Where is the bus station", "I need a taxi", | |
| "Take me to Accra", "Take me to Kumasi", | |
| "How much is the fare", "How long is the journey", | |
| "When does the bus leave", "When does it arrive", | |
| "I missed my bus", "I missed my flight", | |
| "My luggage is lost", "I need to check in", | |
| "I am a tourist", "I am visiting family", | |
| "I am here for work", "I have a visa", | |
| "I am lost", "Can you show me on the map", | |
| "Turn left", "Turn right", "Go straight", | |
| "Stop here", "Wait for me", | |
| "Where is my hotel", "I have a reservation", | |
| "Check in please", "Check out please", | |
| "I have a meeting", "I am looking for work", | |
| "I need a job", "I am the manager", | |
| "I want to place an order", "When can you deliver", | |
| "We need it urgently", "Please sign here", | |
| "This is the invoice", "This is the receipt", | |
| "The payment has been made", "I need a bank transfer", | |
| "I need a quotation", "Send me the proposal", | |
| "I agree to the terms", "The deal is done", | |
| "What is your phone number", "What is your email address", | |
| "I will call you back", "I am running late", | |
| "I am a student", "I want to learn", "I want to study", | |
| "I do not understand the lesson", "Can you explain again", | |
| "I have homework", "When is the exam", | |
| "I passed the exam", "I failed the exam", | |
| "I graduated", "I have a degree", "I need school fees", | |
| "God bless you", "God is good", "Praise God", | |
| "Peace be with you", "Have a blessed day", | |
| "I am Christian", "I am Muslim", "I am going to church", | |
| "I am fasting", "Happy Easter", "Happy Christmas", | |
| "Happy New Year", "Happy birthday", "Happy anniversary", | |
| "I am praying for you", "God will provide", | |
| "The funeral is tomorrow", "I am sorry for your loss", | |
| "May their soul rest in peace", | |
| "The wedding is on Saturday", "Congratulations on your wedding", | |
| "We are celebrating", "This is our tradition", | |
| "Help", "Help me", "I need help", "Emergency", | |
| "Fire", "There is a fire", "Call the fire service", | |
| "Call the police", "I need the police", | |
| "I have been robbed", "My phone was stolen", | |
| "My wallet was stolen", "I lost my passport", | |
| "There has been an accident", "Someone is hurt", | |
| "The car broke down", "I have a flat tyre", | |
| "I am stuck", "The road is blocked", | |
| "Stay inside", "It is not safe outside", | |
| "I am in danger", "Please save me", | |
| "I am happy", "I am very happy", "I am excited", | |
| "I am sad", "I am very sad", "I am crying", | |
| "I am angry", "I am frustrated", "I am disappointed", | |
| "I am scared", "I am worried", "I am stressed", | |
| "I am nervous", "I am confused", "I am shocked", | |
| "I am tired", "I am exhausted", "I am bored", | |
| "I am lonely", "I miss you", "I love you", | |
| "I care about you", "I am proud of you", | |
| "Do not give up", "Stay strong", "Everything will be fine", | |
| "I believe in you", "You can do it", | |
| "It is raining", "It is sunny", "It is very hot", | |
| "It is cold today", "The weather is nice", | |
| "There is a flood", "There is strong wind", | |
| "Plant the seeds", "Water the plants", | |
| "The harvest is good", "Feed the animals", | |
| "Do not litter", "Keep the environment clean", | |
| "Plant more trees", "Save water", | |
| "My phone is dead", "I need to charge my phone", | |
| "Do you have wifi", "What is the wifi password", | |
| "The internet is slow", "My battery is low", | |
| "I need to make a call", "I need to send a message", | |
| "Send me on whatsapp", "I will call you later", | |
| "Take a photo of me", "Send me the photo", | |
| "Today", "Tomorrow", "Yesterday", | |
| "This week", "Next week", "This month", | |
| "Monday", "Tuesday", "Wednesday", "Thursday", | |
| "Friday", "Saturday", "Sunday", | |
| "January", "February", "March", "April", "May", "June", | |
| "July", "August", "September", "October", "November", "December", | |
| "Do not be late", "I will wait for you", | |
| "It will take one hour", "I need more time", | |
| "One", "Two", "Three", "Four", "Five", | |
| "Six", "Seven", "Eight", "Nine", "Ten", | |
| "Twenty", "Thirty", "Fifty", "One hundred", "One thousand", | |
| "I have no money", "I need money", "Pay me back", | |
| "It is free", "Mobile money", "Pay cash", | |
| "Turn left", "Turn right", "Go straight", | |
| "It is nearby", "It is far away", | |
| "Upstairs", "Downstairs", "Inside", "Outside", | |
| "Behind the market", "In front of the school", | |
| "Next to the church", "Across the road", | |
| "You have arrived", "This is the place", | |
| "Good", "Bad", "Very good", "Big", "Small", | |
| "Long", "Short", "Heavy", "Light", "Full", "Empty", | |
| "Hot", "Cold", "Fast", "Slow", "New", "Old", | |
| "Clean", "Dirty", "Beautiful", "Strong", "Weak", | |
| "Rich", "Poor", "Cheap", "Expensive", | |
| "Easy", "Difficult", "Safe", "Dangerous", | |
| "Correct", "Wrong", "True", "False", "Ready", | |
| "Come", "Go", "Stop", "Wait", "Run", "Walk", | |
| "Sit", "Stand", "Sleep", "Wake up", "Eat", "Drink", | |
| "Cook", "Buy", "Sell", "Give", "Take", "Send", | |
| "Work", "Rest", "Play", "Sing", "Dance", | |
| "Read", "Write", "Listen", "Speak", "Call", | |
| "Open", "Close", "Lock", "Help", "Fix", "Clean", | |
| "Wash", "Cut", "Build", "Start", "Finish", | |
| "Return", "Leave", "Arrive", "Enter", "Exit", | |
| "Know", "Understand", "Remember", "Forget", "Learn", | |
| "Think", "Believe", "Want", "Need", "Love", "Like", | |
| "You can do it", "Do not give up", "Keep trying", | |
| "Stay strong", "Be brave", "Be patient", | |
| "Work hard", "Study hard", "Try your best", | |
| "You are talented", "You are smart", "You are beautiful", | |
| "Be kind", "Be honest", "Be respectful", | |
| "Health is wealth", "Education is the key", | |
| "Every day is a blessing", "All shall be well", | |
| "Trust the process", "Never stop learning", | |
| "Wake up early", "Brush your teeth", "Take a bath", | |
| "Get dressed", "Eat breakfast", "Go to school", | |
| "Go to work", "Come home early", "Eat dinner", | |
| "Do your homework", "Go to bed early", | |
| "Lock the door", "Turn off the lights", | |
| "Do the laundry", "Wash the dishes", | |
| "Sweep the house", "Buy groceries", "Prepare the food", | |
| "Feed the children", "Pay the bills", | |
| "Exercise daily", "Drink more water", | |
| "Get enough sleep", "Take your medication", | |
| "Call your parents", "Spend time with family", | |
| "We help each other", "Teamwork is important", | |
| "Let us work together", "Respect the elderly", | |
| "Take care of the children", "Stand up for justice", | |
| "Do not bribe", "Pay your taxes", "Obey the law", | |
| "Keep the community clean", "Save electricity", | |
| "Use water wisely", "Support local businesses", | |
| "Our culture is our identity", "Preserve our traditions", | |
| "Teach children our language", "Pass on our values", | |
| ] | |
| # Deduplicate | |
| seen = set() | |
| PHRASES = [p for p in PHRASES if not (p in seen or seen.add(p))] | |
| def load_models(): | |
| """Load all translation models directly.""" | |
| print("Loading models...") | |
| MODEL_REPO = "EnochQuayson/s2s-onnx-model" | |
| MODELS_DIR = Path("./models_cache") | |
| from huggingface_hub import snapshot_download | |
| snapshot_download(repo_id=MODEL_REPO, local_dir=str(MODELS_DIR), repo_type="model") | |
| # French MT | |
| mt_fr_path = str(MODELS_DIR / "mt/opus-mt-tc-big-en-fr") | |
| try: | |
| mt_fr_model = ORTModelForSeq2SeqLM.from_pretrained(mt_fr_path) | |
| mt_fr_tokenizer = MarianTokenizer.from_pretrained(mt_fr_path) | |
| except: | |
| mt_fr_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-tc-big-en-fr") | |
| mt_fr_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-tc-big-en-fr") | |
| # NLLB multilingual | |
| mt_nllb_tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
| mt_nllb_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") | |
| print("Models loaded!") | |
| return mt_fr_tokenizer, mt_fr_model, mt_nllb_tokenizer, mt_nllb_model | |
| LANG_CODES = { | |
| "tw": "twi_Latn", | |
| "ee": "ewe_Latn", | |
| "hau": "hau_Latn", | |
| "fuv": "fuv_Latn", | |
| } | |
| def translate_text(text, lang, mt_fr_tokenizer, mt_fr_model, mt_nllb_tokenizer, mt_nllb_model): | |
| """Translate text directly using loaded models.""" | |
| try: | |
| if lang == "fr": | |
| inputs = mt_fr_tokenizer([text], return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| out = mt_fr_model.generate(**inputs) | |
| return mt_fr_tokenizer.decode(out[0], skip_special_tokens=True) | |
| else: | |
| lang_code = LANG_CODES.get(lang) | |
| if not lang_code: | |
| return None | |
| token_id = mt_nllb_tokenizer.convert_tokens_to_ids(lang_code) | |
| mt_nllb_tokenizer.src_lang = "eng_Latn" | |
| inputs = mt_nllb_tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| out = mt_nllb_model.generate( | |
| **inputs, | |
| forced_bos_token_id=token_id, | |
| max_length=256, | |
| num_beams=4, | |
| early_stopping=True | |
| ) | |
| return mt_nllb_tokenizer.decode(out[0], skip_special_tokens=True) | |
| except Exception as e: | |
| print(f" Error translating '{text}' to {lang}: {e}") | |
| return None | |
| def load_cache(filepath): | |
| if os.path.exists(filepath): | |
| try: | |
| with open(filepath, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except: | |
| pass | |
| return {} | |
| def save_cache(cache, filepath): | |
| with open(filepath, "w", encoding="utf-8") as f: | |
| json.dump(cache, f, ensure_ascii=False, indent=2) | |
| def main(): | |
| print(f"\nS2S Local Cache Builder") | |
| print(f"Phrases: {len(PHRASES)} | Languages: {len(LANGUAGES)}") | |
| print(f"Total: {len(PHRASES) * len(LANGUAGES)}") | |
| print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| print("=" * 60) | |
| # Load models once | |
| mt_fr_tok, mt_fr_mod, nllb_tok, nllb_mod = load_models() | |
| cache = load_cache(OUTPUT_FILE) | |
| print(f"Existing cache: {len(cache)} entries\n") | |
| total = len(PHRASES) * len(LANGUAGES) | |
| done = 0 | |
| new_count = 0 | |
| failed = 0 | |
| for phrase in PHRASES: | |
| for lang in LANGUAGES: | |
| key = f"{phrase}|{lang}" | |
| done += 1 | |
| if key in cache: | |
| continue | |
| pct = (done / total) * 100 | |
| print(f"[{done}/{total} {pct:.1f}%] '{phrase}' → {lang} ...", end=" ", flush=True) | |
| result = translate_text(phrase, lang, mt_fr_tok, mt_fr_mod, nllb_tok, nllb_mod) | |
| if result: | |
| cache[key] = { | |
| "source_text": phrase, | |
| "translated_text": result, | |
| "target_language": lang, | |
| "audio_url": None, | |
| "cached_at": datetime.now().isoformat() | |
| } | |
| new_count += 1 | |
| print(f"✓ '{result[:50]}'") | |
| if new_count % SAVE_EVERY == 0: | |
| save_cache(cache, OUTPUT_FILE) | |
| kb = os.path.getsize(OUTPUT_FILE) / 1024 | |
| print(f" → Saved {len(cache)} entries ({kb:.0f} KB)") | |
| else: | |
| failed += 1 | |
| print(f"✗ Failed") | |
| save_cache(cache, OUTPUT_FILE) | |
| kb = os.path.getsize(OUTPUT_FILE) / 1024 | |
| print("\n" + "=" * 60) | |
| print(f"COMPLETE! {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| print(f"Total: {len(cache)} | New: {new_count} | Failed: {failed}") | |
| print(f"File: {OUTPUT_FILE} ({kb:.0f} KB)") | |
| print(f"\nDownload {OUTPUT_FILE} from the Files tab in your HF Space.") | |
| if __name__ == "__main__": | |
| main() | |