{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers in /opt/miniconda3/lib/python3.13/site-packages (4.52.4)\n", "Requirement already satisfied: filelock in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (3.18.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (0.33.0)\n", "Requirement already satisfied: numpy>=1.17 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (2.3.0)\n", "Requirement already satisfied: packaging>=20.0 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (0.21.1)\n", "Requirement already satisfied: safetensors>=0.4.3 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (0.5.3)\n", "Requirement already satisfied: tqdm>=4.27 in /opt/miniconda3/lib/python3.13/site-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /opt/miniconda3/lib/python3.13/site-packages (from huggingface-hub<1.0,>=0.30.0->transformers) (2025.5.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/miniconda3/lib/python3.13/site-packages (from huggingface-hub<1.0,>=0.30.0->transformers) (4.12.2)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /opt/miniconda3/lib/python3.13/site-packages (from huggingface-hub<1.0,>=0.30.0->transformers) (1.1.4)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/miniconda3/lib/python3.13/site-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/miniconda3/lib/python3.13/site-packages (from requests->transformers) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/miniconda3/lib/python3.13/site-packages (from requests->transformers) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/miniconda3/lib/python3.13/site-packages (from requests->transformers) (2025.4.26)\n", "Requirement already satisfied: torch in /opt/miniconda3/lib/python3.13/site-packages (2.7.1)\n", "Requirement already satisfied: torchvision in /opt/miniconda3/lib/python3.13/site-packages (0.22.1)\n", "Requirement already satisfied: torchaudio in /opt/miniconda3/lib/python3.13/site-packages (2.7.1)\n", "Requirement already satisfied: filelock in /opt/miniconda3/lib/python3.13/site-packages (from torch) (3.18.0)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in /opt/miniconda3/lib/python3.13/site-packages (from torch) (4.12.2)\n", "Requirement already satisfied: setuptools in /opt/miniconda3/lib/python3.13/site-packages (from torch) (78.1.1)\n", "Requirement already satisfied: sympy>=1.13.3 in /opt/miniconda3/lib/python3.13/site-packages (from torch) (1.14.0)\n", "Requirement already satisfied: networkx in /opt/miniconda3/lib/python3.13/site-packages (from torch) (3.5)\n", "Requirement already satisfied: jinja2 in /opt/miniconda3/lib/python3.13/site-packages (from torch) (3.1.6)\n", "Requirement already satisfied: fsspec in /opt/miniconda3/lib/python3.13/site-packages (from torch) (2025.5.1)\n", "Requirement already satisfied: numpy in /opt/miniconda3/lib/python3.13/site-packages (from torchvision) (2.3.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/miniconda3/lib/python3.13/site-packages (from torchvision) (11.2.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/miniconda3/lib/python3.13/site-packages (from sympy>=1.13.3->torch) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/miniconda3/lib/python3.13/site-packages (from jinja2->torch) (3.0.2)\n", "Requirement already satisfied: Pillow in /opt/miniconda3/lib/python3.13/site-packages (11.2.1)\n", "Requirement already satisfied: matplotlib in /opt/miniconda3/lib/python3.13/site-packages (3.10.3)\n", "Requirement already satisfied: contourpy>=1.0.1 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (1.3.2)\n", "Requirement already satisfied: cycler>=0.10 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (4.58.4)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (1.4.8)\n", "Requirement already satisfied: numpy>=1.23 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (2.3.0)\n", "Requirement already satisfied: packaging>=20.0 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (24.2)\n", "Requirement already satisfied: pillow>=8 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (11.2.1)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (3.2.3)\n", "Requirement already satisfied: python-dateutil>=2.7 in /opt/miniconda3/lib/python3.13/site-packages (from matplotlib) (2.9.0.post0)\n", "Requirement already satisfied: six>=1.5 in /opt/miniconda3/lib/python3.13/site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n" ] } ], "source": [ "#Run only at first\n", "!pip install transformers\n", "!pip install torch torchvision torchaudio\n", "!pip install Pillow\n", "!pip install matplotlib\n", "#필수 라이브러리" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Core function to convert image to text using mathOCR\n", "import torch\n", "import time\n", "from PIL import Image as PilImage, ImageOps # Renamed Image to PilImage to avoid conflict\n", "from transformers import TrOCRProcessor, VisionEncoderDecoderModel # Note: This still uses TrOCRProcessor/VisionEncoderDecoderModel from Hugging Face\n", "\n", "def run_mathOCR(image,\n", " processor,\n", " model,\n", " measure_time: bool = False):\n", " \"\"\"\n", " Converts an image to text using mathOCR.\n", "\n", " Args:\n", " image (PilImage.Image): Image to be OCR'd.\n", " processor (TrOCRProcessor): Hugging Face TrOCR Processor (used for mathOCR).\n", " model (VisionEncoderDecoderModel): Hugging Face TrOCR Model (used for mathOCR).\n", " measure_time (bool): If True, prints execution time.\n", " \"\"\"\n", " if measure_time:\n", " t0 = time.perf_counter()\n", " \n", " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", " model.to(device)\n", "\n", " # Process image to pixel values and move to device\n", " pixel_values = processor(images=image, return_tensors=\"pt\").pixel_values.to(device)\n", "\n", " # Generate text IDs\n", " generated_ids = model.generate(\n", " pixel_values,\n", " max_new_tokens=256,\n", " num_beams=4,\n", " early_stopping=True\n", " )\n", "\n", " # Decode generated IDs to text\n", " generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n", "\n", " if measure_time:\n", " t1 = time.perf_counter()\n", " print(f\"[Timer] Elapsed: {t1 - t0:0.4f} sec\")\n", "\n", " return generated_text" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Function to preprocess the image\n", "def preprocess_image(image_path, target_size=(384, 384)):\n", " img = PilImage.open(image_path).convert(\"RGB\")\n", " img = ImageOps.exif_transpose(img)\n", " img = ImageOps.pad(img, target_size, color=\"white\")\n", " return img" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Config of the encoder: is overwritten by shared encoder config: ViTConfig {\n", " \"attention_probs_dropout_prob\": 0.0,\n", " \"encoder_stride\": 16,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.0,\n", " \"hidden_size\": 1024,\n", " \"image_size\": 384,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 4096,\n", " \"layer_norm_eps\": 1e-12,\n", " \"model_type\": \"vit\",\n", " \"num_attention_heads\": 16,\n", " \"num_channels\": 3,\n", " \"num_hidden_layers\": 24,\n", " \"patch_size\": 16,\n", " \"pooler_act\": \"tanh\",\n", " \"pooler_output_size\": 1024,\n", " \"qkv_bias\": false,\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.51.3\"\n", "}\n", "\n", "Config of the decoder: is overwritten by shared decoder config: TrOCRConfig {\n", " \"activation_dropout\": 0.0,\n", " \"activation_function\": \"relu\",\n", " \"add_cross_attention\": true,\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": 0.0,\n", " \"d_model\": 1024,\n", " \"decoder_attention_heads\": 16,\n", " \"decoder_ffn_dim\": 4096,\n", " \"decoder_layerdrop\": 0.0,\n", " \"decoder_layers\": 12,\n", " \"decoder_start_token_id\": 2,\n", " \"dropout\": 0.1,\n", " \"encoder_hidden_size\": 1024,\n", " \"eos_token_id\": 2,\n", " \"init_std\": 0.02,\n", " \"is_decoder\": true,\n", " \"layernorm_embedding\": false,\n", " \"max_position_embeddings\": 1024,\n", " \"model_type\": \"trocr\",\n", " \"pad_token_id\": 1,\n", " \"scale_embedding\": true,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.51.3\",\n", " \"use_cache\": false,\n", " \"use_learned_position_embeddings\": false,\n", " \"vocab_size\": 50265\n", "}\n", "\n" ] } ], "source": [ "# --- Main execution flow ---\n", "\n", "# Load mathOCR model and processor once\n", "model_name = \"fhswf/TrOCR_Math_handwritten\" # This is the specific model ID for handwritten math\n", "processor = TrOCRProcessor.from_pretrained(model_name)\n", "model = VisionEncoderDecoderModel.from_pretrained(model_name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Timer] Elapsed: 2.1033 sec\n", "e^{i\\pi}+1=0.\n" ] } ], "source": [ "# Replace with your actual image path\n", "image_path = \".jpg your image path\"# !!! IMPORTANT: Update this path !!!\n", "img = preprocess_image(image_path)\n", "\n", "# Run mathOCR and print the result\n", "extracted_text = run_mathOCR(img, processor, model, measure_time=True) #False if you don't want to display timer\n", "print(extracted_text)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAFEAAAAQCAIAAADh6QSoAAAACXBIWXMAAA7EAAAOxAGVKw4bAAAB2ElEQVRIx+VX0XWrMAxVezqAwwiwgZMRnA1wRoANzArOBpARBBMAIySZwGQEmw38PtRDeSSlbV7S5rzeL45ssK4sXYkn7z38MjzPL2dZ5pz7zzg//cJ7fnlvoeu64/HYtm1RFM65JElOpxMA9H0fhmEcx2ma/uPZzjlErKqqaZrbstput4wx8jbLsumyfwd1XRtjhBD07L3P89x7j4j+EzDGzG/Y7/d5nmutOef+ptBaa60HFkmSTDbAzMtKqTFDpRR98TMHn590EYh4c86MMWvtG8Oze30e50NZlmVZHg4HslRVJYSgxGuaJgxDAGjb9pFrteu6vu8Xi8XYOKmdV87L5TKOYymllBIRySiEQMT1ek1UN5vN4+sTic4YjLG+76calqbparWKoojiFAQBrRVFMc6CizG7N+aVknP+oZQGQWCtnXLe7XZKqbIsrbWXhe7rzrVtO7YIIaSUV3x2HPfrMCEMAGCMmVHvq/FTGnZOBwCo7wx4oX77sPX51dyOoogx1nUdlSqBJOmv/swYG2JgraU+/D33nOf5PfrzQAERB0+MMdRogcYDrXVd14g4SYP7cSYPOOcAMBkEbkIbERGRZoohvmEYfjCTfMM9/wju9Y8xqaiHwh/ub+Dibj3pawAAAABJRU5ErkJggg==", "text/plain": [ "" ] }, "metadata": { "image/png": { "width": 200 } }, "output_type": "display_data" } ], "source": [ "# Save and display LaTeX output as an image\n", "from sympy import preview\n", "from IPython.display import Image, display\n", "import os\n", "\n", "latex_code = extracted_text\n", "output_path = os.path.join(os.path.expanduser('~'), \"latex_output.png\")\n", "\n", "# Generate the PNG image from LaTeX code\n", "preview(f\"${latex_code}$\", viewer='file', filename=output_path, euler=False)\n", "\n", "# Display the generated image in the output\n", "display(Image(filename=output_path, width=200))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.2" } }, "nbformat": 4, "nbformat_minor": 2 }