Sonofica / utils /transcript_page.py
janmayjay's picture
Add application file
39a7537
from utils.japanese_ocr import MangaOcr
import cv2
from PIL import Image
def transcript_page(test_image,translator,predictions):
image = cv2.imread(test_image)
mocr = MangaOcr()
# for bubble in predictions.speech_bubbles:
# xmin,ymin,xmax,ymax = bubble['bbox']
# text = mocr(Image.fromarray(cv2.cvtColor(image[ymin:ymax, xmin:xmax], cv2.COLOR_BGR2RGB)))
# print(text)
# result = translator(text)
# print(result[0]['translation_text'])
# for panel in predictions.panels:
# print(panel)
japanese_transcript = []
english_transcript = []
for i in range(len(predictions.panels)):
# print("ver",i)
cur_panel = [b for b in predictions.panels if b['order'] == i]
# print("ver",cur_panel)
if(len(cur_panel) > 0):
# If subpanels exist , no of panels will exceed the actual amount of panels
panel_bubbles = [b for b in predictions.speech_bubbles if b['panel_id'] == cur_panel[0]['id']]
panel_bubbles.sort(key=lambda b: b['seq'])
for bubble in panel_bubbles:
xmin,ymin,xmax,ymax = bubble['bbox']
char_id = bubble['char_id'] if 'char_id' in bubble.keys() else None
text = mocr(Image.fromarray(cv2.cvtColor(image[ymin:ymax, xmin:xmax], cv2.COLOR_BGR2RGB)))
print("Character ",char_id," : ",text)
japanese_transcript.append({"char_id":char_id,"text":text})
result = translator(text)
print("Character ",char_id," : ",result[0]['translation_text'])
english_transcript.append({"char_id":char_id,"text":result[0]['translation_text']})
return japanese_transcript,english_transcript