Spaces:

abhisheksan
/

poetica

Running

App Files Files Community

abhisheksan commited on Jun 4

Commit

36d6b8e

verified ·

1 Parent(s): a22b331

Update main.py

Browse files

Files changed (1) hide show

main.py +53 -17

main.py CHANGED Viewed

@@ -26,7 +26,8 @@ class Config:
     MAX_QUEUE_SIZE = 16  # Maximum number of requests to queue
     QUANTIZE_MODEL = True  # Enable quantization for improved performance
     WARMUP_INPUTS = True  # Pre-warm the model with sample inputs
-    LOG_DIR = os.path.join(os.getcwd(), 'logs')
     ENABLE_PROFILING = False  # Set to True to enable performance profiling
     REQUEST_TIMEOUT = 30.0  # Timeout for request processing in seconds
@@ -44,20 +45,46 @@ class Config:
 config = Config()
-# Configure logging
-os.makedirs(config.LOG_DIR, exist_ok=True)
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(sys.stdout),
-        logging.FileHandler(os.path.join(
             config.LOG_DIR,
             f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
-        ))
-    ]
-)
-logger = logging.getLogger(__name__)
 # Request models
 class GenerateRequest(BaseModel):
@@ -242,6 +269,13 @@ class ModelManager:
         try:
             logger.info(f"Initializing model on device: {config.DEVICE}")
             await self.tokenization_service.initialize()
             await self._load_and_optimize_model()
@@ -366,13 +400,15 @@ class ModelManager:
             if config.DEVICE.type == 'cuda':
                 # Set optimization flags
                 torch.backends.cudnn.benchmark = True
-                torch.backends.cuda.matmul.allow_tf32 = True
                 # Convert model to TorchScript for faster inference
                 try:
-                    self.model = torch.jit.optimize_for_inference(
-                        torch.jit.script(self.model)
-                    )
                     logger.info("Model optimized with TorchScript")
                 except Exception as e:
                     logger.warning(f"TorchScript optimization failed: {str(e)}")

     MAX_QUEUE_SIZE = 16  # Maximum number of requests to queue
     QUANTIZE_MODEL = True  # Enable quantization for improved performance
     WARMUP_INPUTS = True  # Pre-warm the model with sample inputs
+    # Use environment-specific log directory or default to a temp directory
+    LOG_DIR = os.environ.get('LOG_DIR', '/tmp/poetry_logs')
     ENABLE_PROFILING = False  # Set to True to enable performance profiling
     REQUEST_TIMEOUT = 30.0  # Timeout for request processing in seconds
 config = Config()
+# Configure logging with proper error handling
+def setup_logging():
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.INFO)
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    # Always add stdout handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    # Try to set up file handler, but handle permission issues gracefully
+    try:
+        # Attempt to create directory if it doesn't exist
+        os.makedirs(config.LOG_DIR, exist_ok=True)
+        log_file = os.path.join(
             config.LOG_DIR,
             f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
+        )
+        # Test if we can write to the file
+        with open(log_file, 'a') as f:
+            pass
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        print(f"Log file created at: {log_file}")
+    except (PermissionError, OSError) as e:
+        print(f"Warning: Could not create log file: {e}")
+        print(f"Continuing with console logging only.")
+    return logger
+# Initialize logger
+logger = setup_logging()
 # Request models
 class GenerateRequest(BaseModel):
         try:
             logger.info(f"Initializing model on device: {config.DEVICE}")
+            # Check if model file exists
+            if not os.path.exists(config.MODEL_PATH):
+                logger.error(f"Model file not found at {config.MODEL_PATH}")
+                # Try to create directory in case it doesn't exist
+                os.makedirs(os.path.dirname(config.MODEL_PATH), exist_ok=True)
+                return False
             await self.tokenization_service.initialize()
             await self._load_and_optimize_model()
             if config.DEVICE.type == 'cuda':
                 # Set optimization flags
                 torch.backends.cudnn.benchmark = True
+                # Enable TF32 precision if available (on A100 GPUs)
+                if hasattr(torch.backends.cuda, 'matmul') and hasattr(torch.backends.cuda.matmul, 'allow_tf32'):
+                    torch.backends.cuda.matmul.allow_tf32 = True
                 # Convert model to TorchScript for faster inference
                 try:
+                    # Use a safer approach to TorchScript optimization
+                    self.model = torch.jit.script(self.model)
                     logger.info("Model optimized with TorchScript")
                 except Exception as e:
                     logger.warning(f"TorchScript optimization failed: {str(e)}")