Check of cuda availability
Browse files- app.py +1 -1
- custom_got/modeling_GOT.py +12 -4
app.py
CHANGED
|
@@ -10,7 +10,7 @@ st.title("OCR and Document Search Web App")
|
|
| 10 |
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "png", "jpeg"])
|
| 11 |
|
| 12 |
if uploaded_image is not None:
|
| 13 |
-
with st.spinner("Processing
|
| 14 |
# Extract text from the uploaded image
|
| 15 |
extracted_text = extract_text(uploaded_image)
|
| 16 |
st.subheader("Extracted Text")
|
|
|
|
| 10 |
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "png", "jpeg"])
|
| 11 |
|
| 12 |
if uploaded_image is not None:
|
| 13 |
+
with st.spinner("Processing..."):
|
| 14 |
# Extract text from the uploaded image
|
| 15 |
extracted_text = extract_text(uploaded_image)
|
| 16 |
st.subheader("Extracted Text")
|
custom_got/modeling_GOT.py
CHANGED
|
@@ -18,6 +18,11 @@ DEFAULT_IMAGE_TOKEN = "<image>"
|
|
| 18 |
DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
|
| 19 |
DEFAULT_IM_START_TOKEN = '<img>'
|
| 20 |
DEFAULT_IM_END_TOKEN = '</img>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
from enum import auto, Enum
|
| 23 |
class SeparatorStyle(Enum):
|
|
@@ -557,8 +562,11 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
| 557 |
inputs = tokenizer([prompt])
|
| 558 |
|
| 559 |
image_tensor_1 = image_processor_high(image)
|
| 560 |
-
|
| 561 |
input_ids = torch.as_tensor(inputs.input_ids)
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
| 564 |
keywords = [stop_str]
|
|
@@ -566,7 +574,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
| 566 |
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 567 |
|
| 568 |
if stream_flag:
|
| 569 |
-
with torch.autocast(
|
| 570 |
output_ids = self.generate(
|
| 571 |
input_ids,
|
| 572 |
images=[image_tensor_1.unsqueeze(0).half().cuda()],
|
|
@@ -578,10 +586,10 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
| 578 |
stopping_criteria=[stopping_criteria]
|
| 579 |
)
|
| 580 |
else:
|
| 581 |
-
with torch.autocast(
|
| 582 |
output_ids = self.generate(
|
| 583 |
input_ids,
|
| 584 |
-
images=[image_tensor_1.unsqueeze(0).half()],
|
| 585 |
do_sample=False,
|
| 586 |
num_beams = 1,
|
| 587 |
no_repeat_ngram_size = 20,
|
|
|
|
| 18 |
DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
|
| 19 |
DEFAULT_IM_START_TOKEN = '<img>'
|
| 20 |
DEFAULT_IM_END_TOKEN = '</img>'
|
| 21 |
+
cuda_is_available = torch.cuda.is_available()
|
| 22 |
+
if cuda_is_available:
|
| 23 |
+
device = torch.device('cuda')
|
| 24 |
+
else:
|
| 25 |
+
device = torch.device('cpu')
|
| 26 |
|
| 27 |
from enum import auto, Enum
|
| 28 |
class SeparatorStyle(Enum):
|
|
|
|
| 562 |
inputs = tokenizer([prompt])
|
| 563 |
|
| 564 |
image_tensor_1 = image_processor_high(image)
|
| 565 |
+
|
| 566 |
input_ids = torch.as_tensor(inputs.input_ids)
|
| 567 |
+
|
| 568 |
+
if cuda_is_available:
|
| 569 |
+
input_ids = input_ids.cuda()
|
| 570 |
|
| 571 |
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
| 572 |
keywords = [stop_str]
|
|
|
|
| 574 |
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 575 |
|
| 576 |
if stream_flag:
|
| 577 |
+
with torch.autocast(device, dtype=torch.bfloat16):
|
| 578 |
output_ids = self.generate(
|
| 579 |
input_ids,
|
| 580 |
images=[image_tensor_1.unsqueeze(0).half().cuda()],
|
|
|
|
| 586 |
stopping_criteria=[stopping_criteria]
|
| 587 |
)
|
| 588 |
else:
|
| 589 |
+
with torch.autocast(device, dtype=torch.bfloat16):
|
| 590 |
output_ids = self.generate(
|
| 591 |
input_ids,
|
| 592 |
+
images=[image_tensor_1.unsqueeze(0).half().cuda()] if cuda_is_available else [image_tensor_1.unsqueeze(0).half()],
|
| 593 |
do_sample=False,
|
| 594 |
num_beams = 1,
|
| 595 |
no_repeat_ngram_size = 20,
|