Spaces:

banao-tech
/

omniapi

Sleeping

App Files Files Community

banao-tech commited on Feb 4

Commit

ea2ade6

verified ·

1 Parent(s): d3c30f4

Rename app.py to main.py

Browse files

Files changed (1) hide show

app.py → main.py +30 -24

app.py → main.py RENAMED Viewed

@@ -8,49 +8,55 @@ from PIL import Image
 import torch
 import numpy as np
-# Import your custom utility functions
 from utils import (
     check_ocr_box,
     get_yolo_model,
     get_caption_model_processor,
     get_som_labeled_img,
 )
-# Load the YOLO model using the ultralytics class instead of torch.load
 from ultralytics import YOLO
-# Use the YOLO constructor to load the model properly
-yolo_model = YOLO("weights/icon_detect/best.pt")
-print(f"YOLO model type: {type(yolo_model)}")
-# Load the captioning model (Florence-2)
-from transformers import AutoProcessor, AutoModelForCausalLM
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16 if device == "cuda" else torch.float32
-processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
 try:
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
-        torch_dtype=dtype,
-        trust_remote_code=True
-    ).to(device)
 except Exception as e:
-    print(f"Error loading caption model: {str(e)}")
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
-        torch_dtype=torch.float32,
-        trust_remote_code=True
-    ).to("cpu")
-if not hasattr(model.config, 'vision_config'):
-    model.config.vision_config = {}
-if 'model_type' not in model.config.vision_config:
-    model.config.vision_config['model_type'] = 'davit'
 caption_model_processor = {"processor": processor, "model": model}
-print("Finish loading caption model!")
 app = FastAPI()

 import torch
 import numpy as np
+# Existing imports
 from utils import (
     check_ocr_box,
     get_yolo_model,
     get_caption_model_processor,
     get_som_labeled_img,
 )
 from ultralytics import YOLO
+from transformers import AutoProcessor, AutoModelForCausalLM
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# main.py (YOLO loading fix)
+from utils import get_yolo_model
+import torch
+# Load YOLO model using official method
+yolo_model = get_yolo_model(model_path="weights/icon_detect/best.pt")
+# Handle device placement
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+if str(device) == "cuda":
+    yolo_model = yolo_model.cuda()
+else:
+    yolo_model = yolo_model.cpu()
+# Load caption model and processor
 try:
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-base", trust_remote_code=True
+    )
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
+        torch_dtype=torch.float16,
+        trust_remote_code=True,
+    ).to("cuda")
 except Exception as e:
+    logger.warning(f"Failed to load caption model on GPU: {e}. Falling back to CPU.")
     model = AutoModelForCausalLM.from_pretrained(
         "weights/icon_caption_florence",
+        torch_dtype=torch.float16,
+        trust_remote_code=True,
+    )
 caption_model_processor = {"processor": processor, "model": model}
+logger.info("Finished loading models!!!")
 app = FastAPI()