Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ logger.add(
|
|
| 24 |
# Initialize FastAPI app with metadata
|
| 25 |
app = FastAPI(
|
| 26 |
title="Clinical Report Generator API",
|
| 27 |
-
description="Production API for generating clinical report summaries using
|
| 28 |
version="1.0.0",
|
| 29 |
docs_url="/documentation", # Swagger UI
|
| 30 |
redoc_url="/redoc" # ReDoc
|
|
@@ -40,6 +40,9 @@ app.add_middleware(
|
|
| 40 |
max_age=3600, # Cache preflight requests
|
| 41 |
)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
| 43 |
class ModelManager:
|
| 44 |
def __init__(self):
|
| 45 |
self.model = None
|
|
@@ -64,30 +67,30 @@ class ModelManager:
|
|
| 64 |
if torch.cuda.is_available():
|
| 65 |
logger.info(f"CUDA memory: {torch.cuda.memory_allocated() / (1024*1024*1024):.2f}GB allocated")
|
| 66 |
|
| 67 |
-
# Load tokenizer
|
| 68 |
-
logger.info("Initializing
|
| 69 |
self.tokenizer = T5Tokenizer.from_pretrained(
|
| 70 |
-
|
| 71 |
-
use_fast=True,
|
| 72 |
model_max_length=512
|
| 73 |
)
|
| 74 |
-
logger.success("
|
| 75 |
|
| 76 |
# Load model configuration
|
| 77 |
logger.info("Fetching model configuration...")
|
| 78 |
config = AutoConfig.from_pretrained(
|
| 79 |
-
|
| 80 |
trust_remote_code=False
|
| 81 |
)
|
| 82 |
logger.success("Model configuration loaded successfully")
|
| 83 |
|
| 84 |
-
# Load the
|
| 85 |
-
logger.info("Loading
|
| 86 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 87 |
logger.info(f"Using device: {device}")
|
| 88 |
|
| 89 |
self.model = T5ForConditionalGeneration.from_pretrained(
|
| 90 |
-
|
| 91 |
config=config,
|
| 92 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 93 |
low_cpu_mem_usage=True
|
|
@@ -181,8 +184,8 @@ async def predict(request: PredictRequest) -> JSONResponse:
|
|
| 181 |
with torch.no_grad(), model_manager.accelerator.autocast():
|
| 182 |
outputs = model_manager.model.generate(
|
| 183 |
input_ids,
|
| 184 |
-
max_length=512, # Increased
|
| 185 |
-
num_beams=5, # Increased
|
| 186 |
no_repeat_ngram_size=3,
|
| 187 |
length_penalty=2.0,
|
| 188 |
early_stopping=True,
|
|
|
|
| 24 |
# Initialize FastAPI app with metadata
|
| 25 |
app = FastAPI(
|
| 26 |
title="Clinical Report Generator API",
|
| 27 |
+
description="Production API for generating clinical report summaries using T5",
|
| 28 |
version="1.0.0",
|
| 29 |
docs_url="/documentation", # Swagger UI
|
| 30 |
redoc_url="/redoc" # ReDoc
|
|
|
|
| 40 |
max_age=3600, # Cache preflight requests
|
| 41 |
)
|
| 42 |
|
| 43 |
+
# Model configuration
|
| 44 |
+
MODEL_ID = "pdarleyjr/iplc-t5-clinical"
|
| 45 |
+
|
| 46 |
class ModelManager:
|
| 47 |
def __init__(self):
|
| 48 |
self.model = None
|
|
|
|
| 67 |
if torch.cuda.is_available():
|
| 68 |
logger.info(f"CUDA memory: {torch.cuda.memory_allocated() / (1024*1024*1024):.2f}GB allocated")
|
| 69 |
|
| 70 |
+
# Load tokenizer
|
| 71 |
+
logger.info("Initializing tokenizer...")
|
| 72 |
self.tokenizer = T5Tokenizer.from_pretrained(
|
| 73 |
+
MODEL_ID,
|
| 74 |
+
use_fast=True,
|
| 75 |
model_max_length=512
|
| 76 |
)
|
| 77 |
+
logger.success("Tokenizer loaded successfully")
|
| 78 |
|
| 79 |
# Load model configuration
|
| 80 |
logger.info("Fetching model configuration...")
|
| 81 |
config = AutoConfig.from_pretrained(
|
| 82 |
+
MODEL_ID,
|
| 83 |
trust_remote_code=False
|
| 84 |
)
|
| 85 |
logger.success("Model configuration loaded successfully")
|
| 86 |
|
| 87 |
+
# Load the model
|
| 88 |
+
logger.info("Loading model (this may take a few minutes)...")
|
| 89 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 90 |
logger.info(f"Using device: {device}")
|
| 91 |
|
| 92 |
self.model = T5ForConditionalGeneration.from_pretrained(
|
| 93 |
+
MODEL_ID,
|
| 94 |
config=config,
|
| 95 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 96 |
low_cpu_mem_usage=True
|
|
|
|
| 184 |
with torch.no_grad(), model_manager.accelerator.autocast():
|
| 185 |
outputs = model_manager.model.generate(
|
| 186 |
input_ids,
|
| 187 |
+
max_length=512, # Increased for longer summaries
|
| 188 |
+
num_beams=5, # Increased for better coherence
|
| 189 |
no_repeat_ngram_size=3,
|
| 190 |
length_penalty=2.0,
|
| 191 |
early_stopping=True,
|