Spaces:

janmayjay
/

Sonofica

Sleeping

File size: 1,792 Bytes

39a7537

from utils.japanese_ocr import MangaOcr
import cv2
from PIL import Image

def transcript_page(test_image,translator,predictions):
    image = cv2.imread(test_image)
    mocr = MangaOcr()
    # for bubble in predictions.speech_bubbles:
    #     xmin,ymin,xmax,ymax = bubble['bbox']
    #     text = mocr(Image.fromarray(cv2.cvtColor(image[ymin:ymax, xmin:xmax], cv2.COLOR_BGR2RGB)))
    #     print(text)
    #     result = translator(text)
    #     print(result[0]['translation_text'])

    # for panel in predictions.panels:
    #     print(panel)
    japanese_transcript = []
    english_transcript = []
    for i in range(len(predictions.panels)):
        # print("ver",i)
        cur_panel = [b for b in predictions.panels if b['order'] == i] 
        # print("ver",cur_panel)
        if(len(cur_panel) > 0):
            # If subpanels exist , no of panels will exceed the actual amount of panels
            panel_bubbles = [b for b in predictions.speech_bubbles if b['panel_id'] == cur_panel[0]['id']]   
            panel_bubbles.sort(key=lambda b: b['seq'])
            for bubble in panel_bubbles:
                xmin,ymin,xmax,ymax = bubble['bbox']
                char_id = bubble['char_id'] if 'char_id' in bubble.keys() else None
                text = mocr(Image.fromarray(cv2.cvtColor(image[ymin:ymax, xmin:xmax], cv2.COLOR_BGR2RGB)))
                print("Character ",char_id," : ",text)
                japanese_transcript.append({"char_id":char_id,"text":text})
                result = translator(text)
                print("Character ",char_id," : ",result[0]['translation_text'])
                english_transcript.append({"char_id":char_id,"text":result[0]['translation_text']})

    return japanese_transcript,english_transcript