Spaces:

AlekMan
/

HSE_AI

Sleeping

App Files Files Community

AlekMan commited on Jun 16, 2025

Commit

451e175

verified ·

1 Parent(s): 1148122

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -52

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from typing import Literal, Optional, Tuple
-from dataclasses import dataclass
 from pathlib import Path
 import logging
@@ -7,6 +6,7 @@ import gradio as gr
 from omegaconf import OmegaConf
 from dacite import Config as DaciteConfig, from_dict
 from transformers import GPT2Config, GPT2LMHeadModel
 from llm_trainer import LLMTrainer
 from xlstm import xLSTMLMModel, xLSTMLMModelConfig
@@ -15,25 +15,10 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-@dataclass
-class ModelConfig:
-    name: Literal["xLSTM", "GPT2"]
-    checkpoint_path: str
-    config_path: Optional[str] = None
-MODEL_CONFIGS = {
-    "GPT2": ModelConfig(
-        name="GPT2",
-        checkpoint_path="checkpoints/gpt/cp_3999.pth"
-    ),
-    "xLSTM": ModelConfig(
-        name="xLSTM",
-        checkpoint_path="checpoints/xlstm/cp_9999.pth",
-        config_path="research/xlstm_config.yaml"
-    )
-}
 GPT2_CONFIG = GPT2Config(
     vocab_size=50304,
     n_positions=256,
@@ -43,6 +28,9 @@ GPT2_CONFIG = GPT2Config(
     activation_function="gelu"
 )
 UI_CONFIG = {
     "title": "HSEAI",
     "description": "Enter your text below and the AI will continue it.",
@@ -57,6 +45,13 @@ UI_CONFIG = {
 }
 class ModelManager:
     """Manages model initialization and caching"""
@@ -64,26 +59,7 @@ class ModelManager:
         self._current_trainer: Optional[LLMTrainer] = None
         self._current_model: Optional[str] = None
-    def _create_gpt2_trainer(self) -> LLMTrainer:
-        """Create GPT2 trainer instance"""
-        model = GPT2LMHeadModel(GPT2_CONFIG)
-        return LLMTrainer(model=model, model_returns_logits=False)
-    def _create_xlstm_trainer(self, config_path: str) -> LLMTrainer:
-        """Create xLSTM trainer instance"""
-        if not Path(config_path).exists():
-            raise FileNotFoundError(f"xLSTM config file not found: {config_path}")
-        cfg = OmegaConf.load(config_path)
-        cfg = from_dict(
-            data_class=xLSTMLMModelConfig,
-            data=OmegaConf.to_container(cfg),
-            config=DaciteConfig(strict=True)
-        )
-        model = xLSTMLMModel(cfg)
-        return LLMTrainer(model=model, model_returns_logits=True)
-    def get_trainer(self, model_name: Literal["xLSTM", "GPT2"]) -> LLMTrainer:
         """Get trainer instance, creating if necessary"""
         if self._current_trainer is None or self._current_model != model_name:
             logger.info(f"Loading model: {model_name}")
@@ -95,25 +71,18 @@ class ModelManager:
     def _load_model(self, model_name: Literal["xLSTM", "GPT2"]) -> LLMTrainer:
         """Load and initialize model"""
-        if model_name not in MODEL_CONFIGS:
-            raise ValueError(f"Invalid model: {model_name}. Valid models: {list(MODEL_CONFIGS.keys())}")
-        config = MODEL_CONFIGS[model_name]
         try:
             if model_name == "GPT2":
-                trainer = self._create_gpt2_trainer()
             elif model_name == "xLSTM":
-                trainer = self._create_xlstm_trainer(config.config_path)
             else:
                 raise ValueError(f"Unsupported model: {model_name}")
-            checkpoint_path = Path(config.checkpoint_path)
-            if not checkpoint_path.exists():
-                raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
-            logger.info(f"Loading checkpoint: {checkpoint_path}")
-            trainer.load_checkpoint(str(checkpoint_path))
             return trainer
         except Exception as e:
@@ -178,7 +147,7 @@ def create_input_section() -> Tuple[gr.Textbox, gr.Dropdown, gr.Slider, gr.Butto
         with gr.Row():
             model_choice = gr.Dropdown(
-                choices=list(MODEL_CONFIGS.keys()),
                 value=UI_CONFIG["default_model"],
                 label="Model",
                 interactive=True

 from typing import Literal, Optional, Tuple
 from pathlib import Path
 import logging
 from omegaconf import OmegaConf
 from dacite import Config as DaciteConfig, from_dict
 from transformers import GPT2Config, GPT2LMHeadModel
+from huggingface_hub import PyTorchModelHubMixin
 from llm_trainer import LLMTrainer
 from xlstm import xLSTMLMModel, xLSTMLMModelConfig
 logger = logging.getLogger(__name__)
+class xLSTMWrapper(xLSTMLMModel, PyTorchModelHubMixin):
+    pass
 GPT2_CONFIG = GPT2Config(
     vocab_size=50304,
     n_positions=256,
     activation_function="gelu"
 )
+XLSTM_CONFIG = OmegaConf.load("xlstm_config.yaml")
+XLSTM_CONFIG = from_dict(data_class=xLSTMLMModelConfig, data=OmegaConf.to_container(XLSTM_CONFIG), config=DaciteConfig(strict=True))
 UI_CONFIG = {
     "title": "HSEAI",
     "description": "Enter your text below and the AI will continue it.",
 }
+xLSTM = xLSTMWrapper(XLSTM_CONFIG).from_pretrained("AlekMan/HSE_AI_XLSTM", config=XLSTM_CONFIG)
+xLSTM_ft = xLSTMWrapper(XLSTM_CONFIG).from_pretrained("AlekMan/HSE_AI_XLSTM_FT", config=XLSTM_CONFIG)
+gpt2 = GPT2LMHeadModel(GPT2_CONFIG).from_pretrained("AlekMan/HSE_AI_GPT2")
+gpt2_lora = GPT2LMHeadModel(GPT2_CONFIG).from_pretrained("AlekMan/HSE_AI_GPT2")
+gpt2_lora.load_adapter("AlekMan/HSE_AI_GPT2_LoRA")
 class ModelManager:
     """Manages model initialization and caching"""
         self._current_trainer: Optional[LLMTrainer] = None
         self._current_model: Optional[str] = None
+    def get_trainer(self, model_name: Literal["xLSTM", "GPT2", "xLSTM_FT", "GPT2_FT"]):
         """Get trainer instance, creating if necessary"""
         if self._current_trainer is None or self._current_model != model_name:
             logger.info(f"Loading model: {model_name}")
     def _load_model(self, model_name: Literal["xLSTM", "GPT2"]) -> LLMTrainer:
         """Load and initialize model"""
         try:
             if model_name == "GPT2":
+                trainer = LLMTrainer(model=gpt2, model_returns_logits=False)
             elif model_name == "xLSTM":
+                trainer = LLMTrainer(model=xLSTM, model_returns_logits=True)
+            elif model_name == "GPT2_FT":
+                trainer = LLMTrainer(model=gpt2_lora, model_returns_logits=False)
+            elif model_name == "xLSTM_FT":
+                trainer = LLMTrainer(model=xLSTM_ft, model_returns_logits=True)
             else:
                 raise ValueError(f"Unsupported model: {model_name}")
             return trainer
         except Exception as e:
         with gr.Row():
             model_choice = gr.Dropdown(
+                choices=["GPT2", "GPT2_FT", "xLSTM", "xLSTM_FT"],
                 value=UI_CONFIG["default_model"],
                 label="Model",
                 interactive=True