Spaces:

kdevoe
/

DialoGPT

Sleeping

App Files Files Community

kdevoe commited on Oct 16, 2024

Commit

e11bd6e

verified ·

1 Parent(s): 3f9b161

Loading only one model at a time to conserve memory

Browse files

Files changed (1) hide show

app.py +19 -10

app.py CHANGED Viewed

@@ -13,14 +13,23 @@ model_names = {
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Pre-load the models
-loaded_models = {
-    "DialoGPT-med-FT": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
-}
-loaded_models["DialoGPT-med-FT"].load_state_dict(torch.load(model_names["DialoGPT-med-FT"], map_location=device))
-loaded_models["DialoGPT-med-FT"].to(device)
-loaded_models["DialoGPT-medium"] = AutoModelForCausalLM.from_pretrained(model_names["DialoGPT-medium"]).to(device)
 def respond(
     message,
@@ -30,8 +39,8 @@ def respond(
     temperature,
     top_p,
 ):
-    # Select the pre-loaded model based on user's choice
-    model = loaded_models[model_choice]
     # Prepare the input by concatenating the history into a dialogue format
     input_text = ""
@@ -60,7 +69,7 @@ demo = gr.ChatInterface(
     respond,
     type='messages',
     additional_inputs=[
-        gr.Dropdown(choices=["DialoGPT-med-FT", "DialoGPT-medium"], value="DialoGPT-med-FT", label="Model"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the default model initially
+current_model_name = "DialoGPT-medium"
+model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
+model.load_state_dict(torch.load(model_names[current_model_name], map_location=device))
+model.to(device)
+def load_model(model_name):
+    global model, current_model_name
+    if model_name != current_model_name:
+        # Load the new model and update the current model reference
+        if model_name == "DialoGPT-medium":
+            model = AutoModelForCausalLM.from_pretrained(model_names[model_name]).to(device)
+        elif model_name == "DialoGPT-med-FT":
+            model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
+            model.load_state_dict(torch.load(model_names[model_name], map_location=device))
+            model.to(device)
+        current_model_name = model_name
 def respond(
     message,
     temperature,
     top_p,
 ):
+    # Load the selected model if it's different from the current one
+    load_model(model_choice)
     # Prepare the input by concatenating the history into a dialogue format
     input_text = ""
     respond,
     type='messages',
     additional_inputs=[
+        gr.Dropdown(choices=["DialoGPT-med-FT", "DialoGPT-medium"], value="DialoGPT-medium", label="Model"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),