Spaces:

ajsbsd
/

smollm2-zerocpu-demo

Running

ajsbsd commited on Jun 16

Commit

85c828a

verified ·

1 Parent(s): 23078b2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -71,6 +71,9 @@ def load_model_for_zerocpu():
 # --- Inference Function for Gradio ChatInterface ---
 def predict_chat(message: str, history: list):
     if model is None or tokenizer is None:
         yield "Error: Model or tokenizer failed to load. Please check the Space logs for details."
         return
@@ -82,6 +85,8 @@ def predict_chat(message: str, history: list):
     start_time = time.time()
     if isinstance(model, AutoModelForCausalLM_GGUF):
         prompt_input = ""
         for msg in messages:
             if msg["role"] == "system":
@@ -105,13 +110,14 @@ def predict_chat(message: str, history: list):
             generated_text += token
             yield generated_text
-    else: # This is the block where the error occurred
         input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         outputs = model.generate(
             inputs,
-            # Changed max_new_tokens to max_length for broader compatibility
             max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
             temperature=TEMPERATURE,
             top_k=TOP_K,
@@ -158,7 +164,6 @@ if __name__ == "__main__":
             ["What's the best way to stay motivated?"],
         ],
         cache_examples=False,
-        # clear_btn="Clear Chat" was removed in the previous step
     )
     demo.chatbot.value = initial_messages_for_value

 # --- Inference Function for Gradio ChatInterface ---
 def predict_chat(message: str, history: list):
+    # NEW DIAGNOSTIC PRINT: Check model type at the start of prediction
+    print(f"Model type in predict_chat: {type(model)}")
     if model is None or tokenizer is None:
         yield "Error: Model or tokenizer failed to load. Please check the Space logs for details."
         return
     start_time = time.time()
     if isinstance(model, AutoModelForCausalLM_GGUF):
+        # NEW DIAGNOSTIC PRINT: Confirm GGUF path is taken
+        print("Using GGUF model generation path.")
         prompt_input = ""
         for msg in messages:
             if msg["role"] == "system":
             generated_text += token
             yield generated_text
+    else:
+        # NEW DIAGNOSTIC PRINT: Confirm standard Hugging Face path is taken
+        print("Using standard Hugging Face model generation path.")
         input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         outputs = model.generate(
             inputs,
             max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
             temperature=TEMPERATURE,
             top_k=TOP_K,
             ["What's the best way to stay motivated?"],
         ],
         cache_examples=False,
     )
     demo.chatbot.value = initial_messages_for_value