prithivMLmods commited on
Commit
eb1662e
·
verified ·
1 Parent(s): 8a5a6fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -15,13 +15,14 @@ from PIL import Image, ImageOps
15
  import requests
16
 
17
  from transformers import (
18
- AutoTokenizer,
 
 
19
  AutoProcessor,
20
  TextIteratorStreamer,
21
  )
 
22
  from transformers.image_utils import load_image
23
- # The custom model class is imported via trust_remote_code=True
24
- from transformers import AutoModelForImageTextToText
25
 
26
  from gradio.themes import Soft
27
  from gradio.themes.utils import colors, fonts, sizes
@@ -116,12 +117,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
116
  # Load Nanonets-OCR2-3B
117
  MODEL_ID_3B = "nanonets/Nanonets-OCR2-3B"
118
  processor_3b = AutoProcessor.from_pretrained(MODEL_ID_3B, trust_remote_code=True)
119
- model_3b = AutoModelForImageTextToText.from_pretrained(
120
  MODEL_ID_3B,
121
  dtype=torch.float16,
122
- #device_map="auto",
123
  trust_remote_code=True,
124
- attn_implementation="flash_attention_2"
125
  ).to(device).eval()
126
 
127
  # Load Nanonets-OCR2-1.5B-exp
@@ -130,7 +129,6 @@ processor_1_5b = AutoProcessor.from_pretrained(MODEL_ID_1_5B, trust_remote_code=
130
  model_1_5b = AutoModelForImageTextToText.from_pretrained(
131
  MODEL_ID_1_5B,
132
  dtype=torch.float16,
133
- #device_map="auto",
134
  trust_remote_code=True,
135
  attn_implementation="flash_attention_2"
136
  ).to(device).eval()
 
15
  import requests
16
 
17
  from transformers import (
18
+ Qwen2VLForConditionalGeneration,
19
+ Qwen2_5_VLForConditionalGeneration,
20
+ AutoModelForImageTextToText,
21
  AutoProcessor,
22
  TextIteratorStreamer,
23
  )
24
+
25
  from transformers.image_utils import load_image
 
 
26
 
27
  from gradio.themes import Soft
28
  from gradio.themes.utils import colors, fonts, sizes
 
117
  # Load Nanonets-OCR2-3B
118
  MODEL_ID_3B = "nanonets/Nanonets-OCR2-3B"
119
  processor_3b = AutoProcessor.from_pretrained(MODEL_ID_3B, trust_remote_code=True)
120
+ model_3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
121
  MODEL_ID_3B,
122
  dtype=torch.float16,
 
123
  trust_remote_code=True,
 
124
  ).to(device).eval()
125
 
126
  # Load Nanonets-OCR2-1.5B-exp
 
129
  model_1_5b = AutoModelForImageTextToText.from_pretrained(
130
  MODEL_ID_1_5B,
131
  dtype=torch.float16,
 
132
  trust_remote_code=True,
133
  attn_implementation="flash_attention_2"
134
  ).to(device).eval()