Spaces:
Running
Running
| def ocr_demo(image, task, ocr_type, ocr_box, ocr_color): | |
| res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color) | |
| res = f"$$ {res} $$" | |
| # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}") | |
| # res = res.replace("\\end{tabular} $$", "\\end{tabular}") | |
| # res = res.replace("\\(", "") | |
| # res = res.replace("\\)", "") | |
| if html_content: | |
| html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>' | |
| return res, html_string | |
| return res, None | |
| def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None): | |
| demo_html = os.path.join(results_folder, "demo.html") | |
| html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html") | |
| tikz_file = os.path.join(results_folder, "tikz.html") | |
| unique_id = str(uuid.uuid4()) | |
| with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False, dir=results_folder) as temp_file: | |
| temp_html_path = temp_file.name | |
| if task == "Plain Text OCR": | |
| res = model.chat(tokenizer, image, ocr_type='ocr') | |
| return res, None, unique_id | |
| else: | |
| if task == "Format Text OCR": | |
| res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path) | |
| elif task == "Fine-grained OCR (Box)": | |
| res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=temp_html_path) | |
| elif task == "Fine-grained OCR (Color)": | |
| res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=temp_html_path) | |
| elif task == "Multi-crop OCR": | |
| res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path) | |
| elif task == "Render Formatted OCR": | |
| res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path) | |
| # html_content = None | |
| if os.path.exists(temp_html_path): | |
| with open(temp_html_path, 'r') as f: | |
| html_content = f.read() | |
| if os.path.exists(demo_html): | |
| with open(demo_html, 'r') as f: | |
| html_content = f.read() | |
| elif os.path.exists(html_file): | |
| with open(html_file, 'r') as f: | |
| html_content = f.read() | |
| elif os.path.exists(tikz_file): | |
| with open(tikz_file, 'r') as f: | |
| html_content = f.read() | |
| else: | |
| html_content = None | |
| return res, html_content, unique_id | |
| def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None): | |
| demo_html = os.path.join(results_folder, "demo.html") | |
| html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html") | |
| tikz_file = os.path.join(results_folder, "tikz.html") | |
| if task == "Plain Text OCR": | |
| res = model.chat(tokenizer, image, ocr_type='ocr') | |
| return res, None | |
| else: | |
| if task == "Format Text OCR": | |
| res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html) | |
| elif task == "Fine-grained OCR (Box)": | |
| res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=demo_html) | |
| elif task == "Fine-grained OCR (Color)": | |
| res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=demo_html) | |
| elif task == "Multi-crop OCR": | |
| res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html) | |
| elif task == "Render Formatted OCR": | |
| res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html) | |
| if os.path.exists(demo_html): | |
| with open(demo_html, 'r') as f: | |
| html_content = f.read() | |
| elif os.path.exists(html_file): | |
| with open(html_file, 'r') as f: | |
| html_content = f.read() | |
| elif os.path.exists(tikz_file): | |
| with open(tikz_file, 'r') as f: | |
| html_content = f.read() | |
| else: | |
| html_content = None | |
| return res, html_content | |