Spaces:

Divyansh12
/

OCR_Application

Build error

App Files Files Community

Divyansh12 commited on Sep 29, 2024

Commit

f604f09

verified ·

1 Parent(s): 77415fc

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -63

app.py CHANGED Viewed

@@ -7,13 +7,17 @@ import uuid
 import time
 from pathlib import Path
-# Force the use of CPU
-os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-# Load tokenizer and model on CPU
-tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
-model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-model.eval()
 # Define folders for uploads and results
 UPLOAD_FOLDER = "./uploads"
@@ -23,44 +27,18 @@ for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
     if not os.path.exists(folder):
         os.makedirs(folder)
-# Function to run the GOT model
-def run_GOT(image, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""):
     unique_id = str(uuid.uuid4())
     image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
-    result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")
     image.save(image_path)
     try:
-        if got_mode == "plain texts OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='ocr')
-            return res, None
-        elif got_mode == "format texts OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-        elif got_mode == "plain multi-crop OCR":
-            res = model.chat_crop(tokenizer, image_path, ocr_type='ocr')
-            return res, None
-        elif got_mode == "format multi-crop OCR":
-            res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-        elif got_mode == "plain fine-grained OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color)
-            return res, None
-        elif got_mode == "format fine-grained OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
-        res_markdown = res
-        if "format" in got_mode and os.path.exists(result_path):
-            with open(result_path, 'r') as f:
-                html_content = f.read()
-            encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8')
-            iframe_src = f"data:text/html;base64,{encoded_html}"
-            iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
-            return res_markdown, iframe
-        else:
-            return res_markdown, None
     except Exception as e:
-        return f"Error: {str(e)}", None
     finally:
         if os.path.exists(image_path):
             os.remove(image_path)
@@ -81,6 +59,9 @@ uploaded_image = st.file_uploader("Upload your image", type=["png", "jpg", "jpeg
 # Create two columns for layout
 col1, col2 = st.columns(2)
 if uploaded_image:
     image = Image.open(uploaded_image)
@@ -88,33 +69,13 @@ if uploaded_image:
         st.image(image, caption='Uploaded Image', use_column_width=True)
     with col2:
-        got_mode = st.selectbox("Choose one mode of GOT", [
-            "plain texts OCR",
-            "format texts OCR",
-            "plain multi-crop OCR",
-            "format multi-crop OCR",
-            "plain fine-grained OCR",
-            "format fine-grained OCR",
-        ])
-        fine_grained_mode = None
-        ocr_color = ""
-        ocr_box = ""
-        if "fine-grained" in got_mode:
-            fine_grained_mode = st.selectbox("Fine-grained type", ["box", "color"])
-            if fine_grained_mode == "box":
-                ocr_box = st.text_input("Input box: [x1,y1,x2,y2]", value="[0,0,100,100]")
-            elif fine_grained_mode == "color":
-                ocr_color = st.selectbox("Color list", ["red", "green", "blue"])
-        if st.button("Submit"):
             with st.spinner("Processing..."):
-                result_text, html_result = run_GOT(image, got_mode, fine_grained_mode, ocr_color, ocr_box)
                 st.text_area("GOT Output", result_text, height=200)
-                if html_result:
-                    st.markdown(html_result, unsafe_allow_html=True)
 # Cleanup old files
 cleanup_old_files()

 import time
 from pathlib import Path
+# Define a function to load the model
+def load_model(model_name):
+    if model_name == "GOT_CPU":
+        tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
+        model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+        model = model.eval()  # Load model on CPU
+    elif model_name == "GOT_GPU":
+        tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+        model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+        model = model.eval().cuda()  # Load model on GPU
+    return tokenizer, model
 # Define folders for uploads and results
 UPLOAD_FOLDER = "./uploads"
     if not os.path.exists(folder):
         os.makedirs(folder)
+# Function to run the GOT model for plain text OCR
+def run_GOT(image, tokenizer, model):
     unique_id = str(uuid.uuid4())
     image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
     image.save(image_path)
     try:
+        res = model.chat(tokenizer, image_path, ocr_type='ocr')  # Only using plain text OCR
+        return res
     except Exception as e:
+        return f"Error: {str(e)}"
     finally:
         if os.path.exists(image_path):
             os.remove(image_path)
 # Create two columns for layout
 col1, col2 = st.columns(2)
+# Model selection
+model_option = st.selectbox("Select Model", ["GOT_CPU", "GOT_GPU"])
 if uploaded_image:
     image = Image.open(uploaded_image)
         st.image(image, caption='Uploaded Image', use_column_width=True)
     with col2:
+        if st.button("Run Plain Text OCR"):
             with st.spinner("Processing..."):
+                # Load the selected model
+                tokenizer, model = load_model(model_option)
+                result_text = run_GOT(image, tokenizer, model)
                 st.text_area("GOT Output", result_text, height=200)
 # Cleanup old files
 cleanup_old_files()