Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -26,7 +26,8 @@ class Config:
|
|
| 26 |
MAX_QUEUE_SIZE = 16 # Maximum number of requests to queue
|
| 27 |
QUANTIZE_MODEL = True # Enable quantization for improved performance
|
| 28 |
WARMUP_INPUTS = True # Pre-warm the model with sample inputs
|
| 29 |
-
|
|
|
|
| 30 |
ENABLE_PROFILING = False # Set to True to enable performance profiling
|
| 31 |
REQUEST_TIMEOUT = 30.0 # Timeout for request processing in seconds
|
| 32 |
|
|
@@ -44,20 +45,46 @@ class Config:
|
|
| 44 |
|
| 45 |
config = Config()
|
| 46 |
|
| 47 |
-
# Configure logging
|
| 48 |
-
|
| 49 |
-
logging.
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
config.LOG_DIR,
|
| 56 |
f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
# Request models
|
| 63 |
class GenerateRequest(BaseModel):
|
|
@@ -242,6 +269,13 @@ class ModelManager:
|
|
| 242 |
try:
|
| 243 |
logger.info(f"Initializing model on device: {config.DEVICE}")
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
await self.tokenization_service.initialize()
|
| 246 |
await self._load_and_optimize_model()
|
| 247 |
|
|
@@ -366,13 +400,15 @@ class ModelManager:
|
|
| 366 |
if config.DEVICE.type == 'cuda':
|
| 367 |
# Set optimization flags
|
| 368 |
torch.backends.cudnn.benchmark = True
|
| 369 |
-
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
# Convert model to TorchScript for faster inference
|
| 372 |
try:
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
)
|
| 376 |
logger.info("Model optimized with TorchScript")
|
| 377 |
except Exception as e:
|
| 378 |
logger.warning(f"TorchScript optimization failed: {str(e)}")
|
|
|
|
| 26 |
MAX_QUEUE_SIZE = 16 # Maximum number of requests to queue
|
| 27 |
QUANTIZE_MODEL = True # Enable quantization for improved performance
|
| 28 |
WARMUP_INPUTS = True # Pre-warm the model with sample inputs
|
| 29 |
+
# Use environment-specific log directory or default to a temp directory
|
| 30 |
+
LOG_DIR = os.environ.get('LOG_DIR', '/tmp/poetry_logs')
|
| 31 |
ENABLE_PROFILING = False # Set to True to enable performance profiling
|
| 32 |
REQUEST_TIMEOUT = 30.0 # Timeout for request processing in seconds
|
| 33 |
|
|
|
|
| 45 |
|
| 46 |
config = Config()
|
| 47 |
|
| 48 |
+
# Configure logging with proper error handling
|
| 49 |
+
def setup_logging():
|
| 50 |
+
logger = logging.getLogger(__name__)
|
| 51 |
+
logger.setLevel(logging.INFO)
|
| 52 |
+
|
| 53 |
+
formatter = logging.Formatter(
|
| 54 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Always add stdout handler
|
| 58 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 59 |
+
console_handler.setFormatter(formatter)
|
| 60 |
+
logger.addHandler(console_handler)
|
| 61 |
+
|
| 62 |
+
# Try to set up file handler, but handle permission issues gracefully
|
| 63 |
+
try:
|
| 64 |
+
# Attempt to create directory if it doesn't exist
|
| 65 |
+
os.makedirs(config.LOG_DIR, exist_ok=True)
|
| 66 |
+
|
| 67 |
+
log_file = os.path.join(
|
| 68 |
config.LOG_DIR,
|
| 69 |
f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Test if we can write to the file
|
| 73 |
+
with open(log_file, 'a') as f:
|
| 74 |
+
pass
|
| 75 |
+
|
| 76 |
+
file_handler = logging.FileHandler(log_file)
|
| 77 |
+
file_handler.setFormatter(formatter)
|
| 78 |
+
logger.addHandler(file_handler)
|
| 79 |
+
print(f"Log file created at: {log_file}")
|
| 80 |
+
except (PermissionError, OSError) as e:
|
| 81 |
+
print(f"Warning: Could not create log file: {e}")
|
| 82 |
+
print(f"Continuing with console logging only.")
|
| 83 |
+
|
| 84 |
+
return logger
|
| 85 |
+
|
| 86 |
+
# Initialize logger
|
| 87 |
+
logger = setup_logging()
|
| 88 |
|
| 89 |
# Request models
|
| 90 |
class GenerateRequest(BaseModel):
|
|
|
|
| 269 |
try:
|
| 270 |
logger.info(f"Initializing model on device: {config.DEVICE}")
|
| 271 |
|
| 272 |
+
# Check if model file exists
|
| 273 |
+
if not os.path.exists(config.MODEL_PATH):
|
| 274 |
+
logger.error(f"Model file not found at {config.MODEL_PATH}")
|
| 275 |
+
# Try to create directory in case it doesn't exist
|
| 276 |
+
os.makedirs(os.path.dirname(config.MODEL_PATH), exist_ok=True)
|
| 277 |
+
return False
|
| 278 |
+
|
| 279 |
await self.tokenization_service.initialize()
|
| 280 |
await self._load_and_optimize_model()
|
| 281 |
|
|
|
|
| 400 |
if config.DEVICE.type == 'cuda':
|
| 401 |
# Set optimization flags
|
| 402 |
torch.backends.cudnn.benchmark = True
|
| 403 |
+
|
| 404 |
+
# Enable TF32 precision if available (on A100 GPUs)
|
| 405 |
+
if hasattr(torch.backends.cuda, 'matmul') and hasattr(torch.backends.cuda.matmul, 'allow_tf32'):
|
| 406 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 407 |
|
| 408 |
# Convert model to TorchScript for faster inference
|
| 409 |
try:
|
| 410 |
+
# Use a safer approach to TorchScript optimization
|
| 411 |
+
self.model = torch.jit.script(self.model)
|
|
|
|
| 412 |
logger.info("Model optimized with TorchScript")
|
| 413 |
except Exception as e:
|
| 414 |
logger.warning(f"TorchScript optimization failed: {str(e)}")
|