Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import BlipForImageTextRetrieval, AutoProcessor, WhisperForConditionalGeneration, AutoTokenizer | |
| from gtts import gTTS | |
| import speech_recognition as sr | |
| import torch | |
| from PIL import Image | |
| # تحميل النماذج والمعالجات | |
| image_model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco") | |
| image_processor = AutoProcessor.from_pretrained("Salesforce/blip-itm-base-coco") | |
| whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") | |
| whisper_tokenizer = AutoTokenizer.from_pretrained("openai/whisper-base") | |
| # دالة مطابقة الصورة مع النص | |
| def image_text_matching(img, text): | |
| raw_image = img.convert('RGB') | |
| inputs = image_processor(images=raw_image, text=text, return_tensors="pt") | |
| outputs = image_model(**inputs) | |
| result = outputs[0][0] | |
| softmax_result = torch.softmax(result, dim=0) | |
| max_index = torch.argmax(softmax_result).item() | |
| return 'Match' if max_index == 1 else 'No Match' | |
| # دالة تحويل النص إلى صوت | |
| def text_to_audio(text): | |
| tts = gTTS(text=text, lang='en') # يمكنك تعديل اللغة إلى 'ar' للنصوص العربية | |
| audio_file = "output.mp3" | |
| tts.save(audio_file) | |
| return audio_file | |
| # دالة تحويل الصوت إلى نص | |
| def audio_to_text(audio): | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio) as source: | |
| audio_data = recognizer.record(source) | |
| text = recognizer.recognize_google(audio_data, language='ar') | |
| return text | |
| # إعداد واجهة Gradio | |
| iface = gr.Interface( | |
| fn=lambda img, text, audio: ( | |
| image_text_matching(img, text), | |
| text_to_audio(text), | |
| audio_to_text(audio) if audio else "No audio uploaded" | |
| ), | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload Image"), | |
| gr.Textbox(label="Enter Text"), | |
| gr.Audio(label="Upload Audio", type="filepath") # تعديل هنا | |
| ], | |
| outputs=["text", "audio", "text"], | |
| title="AI Project: Image-Text Matching and Audio Tasks", | |
| description="Upload an image and enter text to see if they match. Also, convert text to audio and audio to text." | |
| ) | |
| # تشغيل الواجهة | |
| iface.launch() | |