zyzzyva commited on
Commit
062b730
·
1 Parent(s): 3fe7988

yeah we vibecoding

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -54,17 +54,17 @@ def load_model(repo_id, filename, config_override=None):
54
 
55
  # Load both models once at the start
56
  try:
57
- # This is the baseline model from your portfolio
58
  BASELINE_MODEL, BASELINE_CFG = load_model(
59
  repo_id="SQCU/pgptlformer-tinystories",
60
- filename="state_step040500.pt"
61
  )
62
 
63
- # This is the shift-attn model. Note the config_override.
64
  SHIFT_ATTN_MODEL, SHIFT_ATTN_CFG = load_model(
65
  repo_id="SQCU/pgptlformer-tinystories",
66
  filename="re-pqt-rmsXrmsx2x2-ATTNII-791967c5-5c59-4a5f-a2c5-07772bcf65ab/state_step040500.pt",
67
- config_override={"attention_deux": True} # Crucial: This enables the shift-attn mechanism in your code
68
  )
69
  except Exception as e:
70
  # If loading fails, show an error in the Gradio app instead of crashing
 
54
 
55
  # Load both models once at the start
56
  try:
57
+ # FIX #1: Add the correct subdirectory for the baseline model
58
  BASELINE_MODEL, BASELINE_CFG = load_model(
59
  repo_id="SQCU/pgptlformer-tinystories",
60
+ filename="re-pqt-rmsXrmsx2-70b91221-a39c-4824-a69c-48a034963529/state_step040500.pt"
61
  )
62
 
63
+ # FIX #2: The shift-attn model already had the directory, but ensure it's correct
64
  SHIFT_ATTN_MODEL, SHIFT_ATTN_CFG = load_model(
65
  repo_id="SQCU/pgptlformer-tinystories",
66
  filename="re-pqt-rmsXrmsx2x2-ATTNII-791967c5-5c59-4a5f-a2c5-07772bcf65ab/state_step040500.pt",
67
+ config_override={"attention_deux": True}
68
  )
69
  except Exception as e:
70
  # If loading fails, show an error in the Gradio app instead of crashing