| | from datasets import load_dataset |
| | from transformers import pipeline |
| | import soundfile as sf |
| | import torch |
| | import gradio as gr |
| | import numpy as np |
| | import sentencepiece |
| | from transformers import MarianMTModel, MarianTokenizer |
| |
|
| |
|
| | def predict_image(image): |
| | pipe = pipeline("image-classification", model="google/vit-base-patch16-224") |
| | ClassifedImage=pipe(image) |
| |
|
| | result=ClassifedImage[0]['label'] |
| | return result |
| |
|
| | def translate_to_arabic(text): |
| | pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar") |
| | result=pipe(text , max_length=100) |
| | return result[0]['translation_text'] |
| |
|
| |
|
| | def translate_to_arabic(text): |
| | model_name = "Helsinki-NLP/opus-mt-en-ar" |
| |
|
| | |
| | tokenizer = MarianTokenizer.from_pretrained(model_name) |
| | model = MarianMTModel.from_pretrained(model_name) |
| | |
| | inputs = tokenizer(text, return_tensors="pt", padding=True) |
| | outputs = model.generate(**inputs, max_length=100) |
| | translated = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | return translated |
| |
|
| | def text_to_speech(text): |
| | pipe = pipeline("text-to-speech", model="MBZUAI/speecht5_tts_clartts_ar") |
| | embedding_dataset=load_dataset("herwoww/arabic_xvector_embeddings" , split="validation") |
| | speaker_embedding=torch.tensor(embedding_dataset[100]['speaker_embeddings']).unsqueeze(0) |
| | speech=pipe(text , forward_params={'speaker_embeddings':speaker_embedding}) |
| |
|
| | return (speech['sampling_rate'],np.array(speech['audio'], dtype=np.float32)) |
| |
|
| | from PIL import Image |
| | with gr.Blocks() as app: |
| | gr.Markdown("Image Classification, Arabic Translation, TTS") |
| |
|
| | with gr.Row(): |
| | with gr.Column(): |
| | image_input=gr.Image(type="pil",label="Upload the Image to classify it" ) |
| | classify_image=gr.Button("Classify the Image") |
| | pred=gr.Textbox(label="Classifcation Result") |
| |
|
| | classify_image.click(fn=predict_image , inputs=image_input , outputs=pred) |
| |
|
| | with gr.Row(): |
| | translated_output=gr.Textbox(label="Translated Text") |
| | translate_btn=gr.Button("Translate to Arabic") |
| |
|
| | translate_btn.click(fn=translate_to_arabic , inputs=pred , outputs=translated_output) |
| |
|
| | with gr.Row(): |
| | tts_btn=gr.Button("Convert to Speech") |
| | audio_output=gr.Audio(label="Audio Output") |
| |
|
| | tts_btn.click(fn=text_to_speech , inputs=translated_output , outputs=audio_output) |
| |
|
| | app.launch() |