Spaces:
Runtime error
Runtime error
| # Import needed library | |
| from PIL import Image | |
| import gradio as gr | |
| import torch | |
| import requests | |
| import re | |
| from transformers import pipeline,BlipProcessor, BlipForConditionalGeneration, TrOCRProcessor, VisionEncoderDecoderModel | |
| # load image examples | |
| img_urls_1 = ['https://i.pinimg.com/564x/f7/f5/bd/f7f5bd929e05a852ff423e6e02deea54.jpg', 'https://i.pinimg.com/564x/b4/29/69/b4296962cb76a72354a718109835caa3.jpg', | |
| 'https://i.pinimg.com/564x/f2/68/8e/f2688eccd6dd60fdad89ef78950b9ead.jpg'] | |
| for idx1, url1 in enumerate(img_urls_1): | |
| image = Image.open(requests.get(url1, stream=True).raw) | |
| image.save(f"image_{idx1}.png") | |
| # load image examples | |
| img_urls_2 = ['https://i.pinimg.com/564x/14/b0/07/14b0075ccd5ea35f7deffc9e5bd6de30.jpg', 'https://newsimg.bbc.co.uk/media/images/45510000/jpg/_45510184_the_writings_466_180.jpg', | |
| 'https://cdn.shopify.com/s/files/1/0047/1524/9737/files/Cetaphil_Face_Wash_Ingredients_Optimized.png?v=1680923920', 'https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText22.jpg?raw=true','https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText11.jpg?raw=true'] | |
| for idx2, url2 in enumerate(img_urls_2): | |
| image = Image.open(requests.get(url2, stream=True).raw) | |
| image.save(f"tx_image_{idx2}.png") | |
| # Load Blip model and processor for captioning | |
| processor_blip = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| # Load marefa model for translation (English to Arabic) | |
| translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar") | |
| def caption_and_translate(img, min_len, max_len): | |
| # Generate English caption | |
| raw_image = Image.open(img).convert('RGB') | |
| inputs_blip = processor_blip(raw_image, return_tensors="pt") | |
| out_blip = model_blip.generate(**inputs_blip, min_length=40, max_length=500) | |
| english_caption = processor_blip.decode(out_blip[0], skip_special_tokens=True) | |
| # Translate caption from English to Arabic | |
| arabic_caption = translate(english_caption) | |
| arabic_caption = arabic_caption[0]['translation_text'] | |
| translated_caption = f'<div dir="rtl">{arabic_caption}</div>' | |
| # Return both captions | |
| return english_caption, translated_caption | |
| # Gradio interface with multiple outputs | |
| img_cap_en_ar = gr.Interface( | |
| fn=caption_and_translate, | |
| inputs=[gr.Image(type='filepath', label='Image')], | |
| #gr.Slider(label='Minimum Length', minimum=1, maximum=500, value=30), | |
| #gr.Slider(label='Maximum Length', minimum=1, maximum=500, value=100)], | |
| outputs=[gr.Textbox(label='English Caption'), | |
| gr.HTML(label='Arabic Caption')], | |
| title='Image Captioning | وصف الصورة', | |
| description="Upload an image to generate an English & Arabic caption | قم برفع صورة وأرسلها ليظهر لك وصف للصورة", | |
| examples =[["image_0.png"],["image_2.png"]] | |
| ) | |
| # Load the model | |
| text_rec = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD") | |
| # Load MarianMT model for translation (English to Arabic) | |
| translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar") | |
| # Function to process the image and extract text | |
| def extract_text(image): | |
| # Pass the image to the pipeline | |
| result = text_rec(image) | |
| # Extract the plain text and remove tags | |
| text = result[0]['generated_text'] | |
| text = re.sub(r'<[^>]*>', '', text) # Remove all HTML tags | |
| # Translate extracted text from English to Arabic | |
| arabic_text3 = translate(text) | |
| arabic_text3 = arabic_text3[0]['translation_text'] | |
| htranslated_text = f'<div dir="rtl">{arabic_text3}</div>' | |
| # Return the extracted text | |
| return text,htranslated_text | |
| # Define the Gradio interface | |
| text_recognition = gr.Interface( | |
| fn=extract_text, # The function that processes the image | |
| inputs=gr.Image(type="pil"), # Input is an image (PIL format) | |
| outputs=[gr.Textbox(label='Extracted text'), gr.HTML(label= 'Translateted of Extracted text ')], # Output is text | |
| title="Text Extraction and Translation | إستخراج النص وترجمتة", | |
| description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة", | |
| examples =[["tx_image_0.png"], ["tx_image_2.png"]] | |
| ) | |
| # Load trocr model for handwritten text extraction | |
| processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten') | |
| model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten') | |
| # Load MarianMT model for translation (English to Arabic) | |
| translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar") | |
| def recognize_handwritten_text(image2): | |
| # process and and extract text | |
| pixel_values = processor(images=image2, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| # Translate extracted text from English to Arabic | |
| arabic_text2 = translate(generated_text) | |
| arabic_text2 = arabic_text2[0]['translation_text'] | |
| htranslated_text = f'<div dir="rtl">{arabic_text2}</div>' | |
| # Return the extracted text and translated text | |
| return generated_text, htranslated_text | |
| # Gradio interface with image upload input and text output | |
| handwritten_rec = gr.Interface( | |
| fn=recognize_handwritten_text, | |
| inputs=gr.Image(label="Upload Image"), | |
| outputs=[gr.Textbox(label='English Text'), | |
| gr.HTML(label='Arabic Text')], | |
| title="Handwritten Text Extraction | | إستخراج النص المكتوب بخط اليد وترجمتة", | |
| description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة", | |
| examples =[["tx_image_1.png"], ["tx_image_3.png"]] | |
| ) | |
| # Combine all interfaces into a tabbed interface | |
| demo = gr.TabbedInterface([img_cap_en_ar, text_recognition, handwritten_rec], ["Extract_Caption", " Extract_Digital_text", " Extract_HandWritten_text"]) | |
| demo.launch(debug=True) | |