import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration from deep_translator import GoogleTranslator from PIL import Image processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") def caption_image(image): if image is None: return " من فضلك ارفع صورة" inputs = processor(image, return_tensors="pt") output = model.generate(**inputs, max_new_tokens=50) caption_en = processor.decode(output[0], skip_special_tokens=True) caption_ar = GoogleTranslator(source='en', target='ar').translate(caption_en) return f"** الوصف:** {caption_ar}" demo = gr.Interface( fn=caption_image, inputs=gr.Image(label="ارفع الصوره التي تريد وصفها", type="pil"), outputs=gr.Markdown(label="الوصف"), title="مولّد أوصاف الصور", ) demo.launch()