Spaces:

sk2003
/

Hist-TrOCR

Runtime error

App Files Files Community

sk2003 commited on Feb 14, 2023

Commit

c916c34

1 Parent(s): c607739

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -12

app.py CHANGED Viewed

@@ -1,8 +1,5 @@
 # Importing necessary packages
-from sklearn.model_selection import train_test_split # for splitting the data into train, validation and test sets
 import torch # PyTorch used for executing deep learning functions
-# to store the data and ground truths to be used by the model, loader is an iterable over the Datasets created, the last one is to concatenate Dataset objects
-from torch.utils.data import Dataset, DataLoader, ConcatDataset
 from PIL import Image, ImageTk # to display the image from the encoded pixels
 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel # importing the TrOCR processor representing the visual feature extrcator and tokenizer of the TrOCR model, and the TrOCR model
@@ -11,17 +8,22 @@ import os
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
 model = VisionEncoderDecoderModel.from_pretrained("sk2003/hist-trocr", use_auth_token=auth_token)
-def process_image(image):
-    # prepare image
-    pixel_values = processor(image, return_tensors="pt").pixel_values
-    # generate
-    generated_ids = model.generate(pixel_values)
-    # decode
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return generated_text
 title = "Hist-TrOCR"
 description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft's TrOCR which is an end-to-end transformer model used for recognition of text from single-line or word images. It has been fine-tuned on historical text images. Upload an image (or select from the given samples) and click 'submit' to get the transcriptions. Results may take a few seconds to show up."
@@ -30,7 +32,7 @@ description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft
 # image = Image.open(images_location_1 + data_1['file_name'][11])
 # examples =[image1, image]
-iface = gr.Interface(fn=process_image,
                      inputs=gr.inputs.Image(type="pil"),
                      outputs=gr.outputs.Textbox(),
                      title=title,

 # Importing necessary packages
 import torch # PyTorch used for executing deep learning functions
 from PIL import Image, ImageTk # to display the image from the encoded pixels
 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel # importing the TrOCR processor representing the visual feature extrcator and tokenizer of the TrOCR model, and the TrOCR model
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
 model = VisionEncoderDecoderModel.from_pretrained("sk2003/hist-trocr", use_auth_token=auth_token)
+# def process_image(image):
+#     # prepare image
+#     pixel_values = processor(image, return_tensors="pt").pixel_values
+#     # generate
+#     generated_ids = model.generate(pixel_values)
+#     # decode
+#     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+#     return generated_text
+def inference_on_image(image):
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    pred = custom_model.generate(pixel_values, max_new_tokens=100) #model or custom
+    dec_pred = processor.batch_decode(pred, skip_special_tokens=True)[0]
+    return dec_pred
 title = "Hist-TrOCR"
 description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft's TrOCR which is an end-to-end transformer model used for recognition of text from single-line or word images. It has been fine-tuned on historical text images. Upload an image (or select from the given samples) and click 'submit' to get the transcriptions. Results may take a few seconds to show up."
 # image = Image.open(images_location_1 + data_1['file_name'][11])
 # examples =[image1, image]
+iface = gr.Interface(fn=inference_on_image,
                      inputs=gr.inputs.Image(type="pil"),
                      outputs=gr.outputs.Textbox(),
                      title=title,