sk2003 commited on
Commit
c916c34
·
1 Parent(s): c607739

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -1,8 +1,5 @@
1
  # Importing necessary packages
2
- from sklearn.model_selection import train_test_split # for splitting the data into train, validation and test sets
3
  import torch # PyTorch used for executing deep learning functions
4
- # to store the data and ground truths to be used by the model, loader is an iterable over the Datasets created, the last one is to concatenate Dataset objects
5
- from torch.utils.data import Dataset, DataLoader, ConcatDataset
6
  from PIL import Image, ImageTk # to display the image from the encoded pixels
7
  import gradio as gr
8
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel # importing the TrOCR processor representing the visual feature extrcator and tokenizer of the TrOCR model, and the TrOCR model
@@ -11,17 +8,22 @@ import os
11
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
12
  auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
13
  model = VisionEncoderDecoderModel.from_pretrained("sk2003/hist-trocr", use_auth_token=auth_token)
14
- def process_image(image):
15
- # prepare image
16
- pixel_values = processor(image, return_tensors="pt").pixel_values
17
 
18
- # generate
19
- generated_ids = model.generate(pixel_values)
20
 
21
- # decode
22
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
23
 
24
- return generated_text
 
 
 
 
 
25
 
26
  title = "Hist-TrOCR"
27
  description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft's TrOCR which is an end-to-end transformer model used for recognition of text from single-line or word images. It has been fine-tuned on historical text images. Upload an image (or select from the given samples) and click 'submit' to get the transcriptions. Results may take a few seconds to show up."
@@ -30,7 +32,7 @@ description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft
30
  # image = Image.open(images_location_1 + data_1['file_name'][11])
31
  # examples =[image1, image]
32
 
33
- iface = gr.Interface(fn=process_image,
34
  inputs=gr.inputs.Image(type="pil"),
35
  outputs=gr.outputs.Textbox(),
36
  title=title,
 
1
  # Importing necessary packages
 
2
  import torch # PyTorch used for executing deep learning functions
 
 
3
  from PIL import Image, ImageTk # to display the image from the encoded pixels
4
  import gradio as gr
5
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel # importing the TrOCR processor representing the visual feature extrcator and tokenizer of the TrOCR model, and the TrOCR model
 
8
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
9
  auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
10
  model = VisionEncoderDecoderModel.from_pretrained("sk2003/hist-trocr", use_auth_token=auth_token)
11
+ # def process_image(image):
12
+ # # prepare image
13
+ # pixel_values = processor(image, return_tensors="pt").pixel_values
14
 
15
+ # # generate
16
+ # generated_ids = model.generate(pixel_values)
17
 
18
+ # # decode
19
+ # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
20
 
21
+ # return generated_text
22
+ def inference_on_image(image):
23
+ pixel_values = processor(image, return_tensors="pt").pixel_values
24
+ pred = custom_model.generate(pixel_values, max_new_tokens=100) #model or custom
25
+ dec_pred = processor.batch_decode(pred, skip_special_tokens=True)[0]
26
+ return dec_pred
27
 
28
  title = "Hist-TrOCR"
29
  description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft's TrOCR which is an end-to-end transformer model used for recognition of text from single-line or word images. It has been fine-tuned on historical text images. Upload an image (or select from the given samples) and click 'submit' to get the transcriptions. Results may take a few seconds to show up."
 
32
  # image = Image.open(images_location_1 + data_1['file_name'][11])
33
  # examples =[image1, image]
34
 
35
+ iface = gr.Interface(fn=inference_on_image,
36
  inputs=gr.inputs.Image(type="pil"),
37
  outputs=gr.outputs.Textbox(),
38
  title=title,