Spaces:

kirchik47
/

ocr_task

Sleeping

kirchik47 commited on Sep 29, 2024

Commit

2286e1a

1 Parent(s): 4a69b50

Check of cuda availability

Files changed (2) hide show

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ st.title("OCR and Document Search Web App")
 uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "png", "jpeg"])
 if uploaded_image is not None:
-    with st.spinner("Processing image..."):
         # Extract text from the uploaded image
         extracted_text = extract_text(uploaded_image)
         st.subheader("Extracted Text")

 uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "png", "jpeg"])
 if uploaded_image is not None:
+    with st.spinner("Processing..."):
         # Extract text from the uploaded image
         extracted_text = extract_text(uploaded_image)
         st.subheader("Extracted Text")

custom_got/modeling_GOT.py CHANGED Viewed

@@ -18,6 +18,11 @@ DEFAULT_IMAGE_TOKEN = "<image>"
 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
 DEFAULT_IM_START_TOKEN = '<img>'
 DEFAULT_IM_END_TOKEN = '</img>'
 from enum import auto, Enum
 class SeparatorStyle(Enum):
@@ -557,8 +562,11 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         inputs = tokenizer([prompt])
         image_tensor_1 = image_processor_high(image)
         input_ids = torch.as_tensor(inputs.input_ids)
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
@@ -566,7 +574,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_tensor_1.unsqueeze(0).half().cuda()],
@@ -578,10 +586,10 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                     stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
-                    images=[image_tensor_1.unsqueeze(0).half()],
                     do_sample=False,
                     num_beams = 1,
                     no_repeat_ngram_size = 20,

 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
 DEFAULT_IM_START_TOKEN = '<img>'
 DEFAULT_IM_END_TOKEN = '</img>'
+cuda_is_available = torch.cuda.is_available()
+if cuda_is_available:
+    device = torch.device('cuda')
+else:
+    device = torch.device('cpu')
 from enum import auto, Enum
 class SeparatorStyle(Enum):
         inputs = tokenizer([prompt])
         image_tensor_1 = image_processor_high(image)
         input_ids = torch.as_tensor(inputs.input_ids)
+        if cuda_is_available:
+            input_ids = input_ids.cuda()
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            with torch.autocast(device, dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_tensor_1.unsqueeze(0).half().cuda()],
                     stopping_criteria=[stopping_criteria]
                     )
         else:
+            with torch.autocast(device, dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
+                    images=[image_tensor_1.unsqueeze(0).half().cuda()] if cuda_is_available else [image_tensor_1.unsqueeze(0).half()],
                     do_sample=False,
                     num_beams = 1,
                     no_repeat_ngram_size = 20,