Spaces:

vertigo23
/

Njogerera

Sleeping

File size: 3,169 Bytes

fce2714
 
46b8af1
5defb07
febb4db
e686f1c
 
fce2714
 
 
765f98e
fce2714
 
 
 
 
 
 
 
 
46b8af1
 
 
 
fce2714
 
 
febb4db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fce2714
 
326d9c7
c4d755b
febb4db
 
 
 
 
85c949f
fce2714
 
 
 
 
85c949f
fce2714
 
 
 
 
febb4db
 
fce2714
 
e686f1c

import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from huggingface_hub import HfApi
import string
import os
from moviepy.editor import VideoFileClip, concatenate_videoclips, ImageClip

huggingface_token = os.getenv('NJOGERERA_TOKEN')
if not huggingface_token:
    raise ValueError("Hugging Face token is not set in the environment variables")

api = HfApi()
try:
    user_info = api.whoami(token=huggingface_token)
    print(f"Logged in as: {user_info['name']}")
except Exception as e:
    raise ValueError("Failed to authenticate with the provided Hugging Face token.")

model_path = "vertigo23/njogerera_translation_model_V003"
tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=huggingface_token)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_auth_token=huggingface_token)

translator = pipeline("translation", model=model, tokenizer=tokenizer)

prefix = "translate Luganda to English: "

filler_image_path = "alphabet/break.png"

def clean_and_split(text):
    text = text.lower().translate(str.maketrans('', '', string.punctuation))
    return text.split()

def map_word_to_media(word):
    if os.path.exists(f"KSL/{word}.mp4"):
        return [f"KSL/{word}.mp4"]
    else:
        spelled_word_media = [filler_image_path]
        spelled_word_media += [f"alphabet/{letter}.png" for letter in word if os.path.exists(f"alphabet/{letter}.png")]
        spelled_word_media.append(filler_image_path)
        return spelled_word_media

def stitch_media(media_paths):
    clips = []
    for path in media_paths:
        if path.endswith('.mp4'):
            clips.append(VideoFileClip(path))
        elif path.endswith('.png'):
            image_clip = ImageClip(path).set_duration(0.7)
            clips.append(image_clip)
    if not clips:
        raise ValueError("No media files to stitch.")
    
    final_clip = concatenate_videoclips(clips, method="compose")
    final_clip.fps = 24
    final_clip_path = "KSL/final_translation.mp4"
    final_clip.write_videofile(final_clip_path, codec="libx264", fps=24)
    return final_clip_path

def translate_lg_to_en(text):
    lg_input = prefix + text
    translated_text = translator(lg_input)
    english_translation = translated_text[0]['translation_text']
    words = clean_and_split(english_translation)
    media_paths = []
    for word in words:
        media_paths.extend(map_word_to_media(word))
    ksl_path = stitch_media(media_paths)
    return english_translation, ksl_path

# Gradio interface
gr.Interface(
    fn=translate_lg_to_en,
    inputs=gr.Text(),
    outputs=[gr.Textbox(label="English Translation"), gr.Video(label="KSL Sign Language Animation")],
    title="Njogerera Translation App",
    description="Type in a Luganda sentence and see the translation.",
    article="Above is some sample text to test the results of the model. Click to see the results.",
    examples=[
        ["Ebikolwa ebitali bya buntu tebikkirizibwa mu kitundu."],
        ["Olugudo olugenda e Masaka lugadwawo."],
        ["Abalwadde ba Malaria mu dwaliro lye Nsambya bafunye obujanjabi."],
    ],
    allow_flagging="never"
).launch()