Fred808 commited on
Commit
6c687cf
·
verified ·
1 Parent(s): 9a4c4f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -6
app.py CHANGED
@@ -5,16 +5,25 @@ from typing import Dict
5
  from PIL import Image
6
  from io import BytesIO
7
  import torch
8
- from transformers import AutoModelForCausalLM, AutoProcessor
 
9
  from fastapi import FastAPI, File, UploadFile
10
  from fastapi.responses import JSONResponse
11
  import uvicorn
12
 
13
- # Disable SDPA if not supported
 
 
 
14
 
15
  # ==== CONFIGURATION ====
 
 
 
 
 
16
  # Florence-2 Configuration
17
- MODEL_ID = "microsoft/Florence-2-large"
18
  DEVICE = "cpu" # Using CPU instead of GPU
19
 
20
  # Create FastAPI app
@@ -36,11 +45,27 @@ def load_florence_model():
36
  try:
37
  log_message("[*] Loading Florence-2 model and processor...")
38
 
39
- # Load model on CPU
40
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True).to(DEVICE)
 
 
 
 
 
 
 
 
 
 
 
 
41
  model.eval()
42
 
43
- processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
44
  log_message("[ ] Florence-2 loaded and ready on CPU")
45
  except Exception as e:
46
  log_message(f"[ERROR] Failed to load Florence-2 model: {e}")
 
5
  from PIL import Image
6
  from io import BytesIO
7
  import torch
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
9
+ from transformers import PretrainedConfig
10
  from fastapi import FastAPI, File, UploadFile
11
  from fastapi.responses import JSONResponse
12
  import uvicorn
13
 
14
+ # Configure PyTorch settings
15
+ torch.backends.cuda.enable_flash_sdp(False)
16
+ torch.backends.cuda.enable_math_sdp(False)
17
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
18
 
19
  # ==== CONFIGURATION ====
20
+ class SimpleFlorenceConfig(PretrainedConfig):
21
+ model_type = "florence"
22
+ def __init__(self, **kwargs):
23
+ super().__init__(**kwargs)
24
+
25
  # Florence-2 Configuration
26
+ MODEL_ID = "microsoft/Florence-2-base" # Using base model for stability
27
  DEVICE = "cpu" # Using CPU instead of GPU
28
 
29
  # Create FastAPI app
 
45
  try:
46
  log_message("[*] Loading Florence-2 model and processor...")
47
 
48
+ # Create simple configuration
49
+ config = SimpleFlorenceConfig()
50
+ config.use_flash_attention = False
51
+ config.architectures = ["Florence2ForConditionalGeneration"]
52
+
53
+ # Load model with simplified config
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ MODEL_ID,
56
+ config=config,
57
+ trust_remote_code=True,
58
+ torch_dtype=torch.float32,
59
+ use_flash_attention_2=False,
60
+ revision="main" # Explicitly use main branch
61
+ ).to(DEVICE)
62
  model.eval()
63
 
64
+ processor = AutoProcessor.from_pretrained(
65
+ MODEL_ID,
66
+ trust_remote_code=True,
67
+ revision="main" # Explicitly use main branch
68
+ )
69
  log_message("[ ] Florence-2 loaded and ready on CPU")
70
  except Exception as e:
71
  log_message(f"[ERROR] Failed to load Florence-2 model: {e}")