Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,6 +71,9 @@ def load_model_for_zerocpu():
|
|
| 71 |
|
| 72 |
# --- Inference Function for Gradio ChatInterface ---
|
| 73 |
def predict_chat(message: str, history: list):
|
|
|
|
|
|
|
|
|
|
| 74 |
if model is None or tokenizer is None:
|
| 75 |
yield "Error: Model or tokenizer failed to load. Please check the Space logs for details."
|
| 76 |
return
|
|
@@ -82,6 +85,8 @@ def predict_chat(message: str, history: list):
|
|
| 82 |
start_time = time.time()
|
| 83 |
|
| 84 |
if isinstance(model, AutoModelForCausalLM_GGUF):
|
|
|
|
|
|
|
| 85 |
prompt_input = ""
|
| 86 |
for msg in messages:
|
| 87 |
if msg["role"] == "system":
|
|
@@ -105,13 +110,14 @@ def predict_chat(message: str, history: list):
|
|
| 105 |
generated_text += token
|
| 106 |
yield generated_text
|
| 107 |
|
| 108 |
-
else:
|
|
|
|
|
|
|
| 109 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 110 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 111 |
|
| 112 |
outputs = model.generate(
|
| 113 |
inputs,
|
| 114 |
-
# Changed max_new_tokens to max_length for broader compatibility
|
| 115 |
max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
|
| 116 |
temperature=TEMPERATURE,
|
| 117 |
top_k=TOP_K,
|
|
@@ -158,7 +164,6 @@ if __name__ == "__main__":
|
|
| 158 |
["What's the best way to stay motivated?"],
|
| 159 |
],
|
| 160 |
cache_examples=False,
|
| 161 |
-
# clear_btn="Clear Chat" was removed in the previous step
|
| 162 |
)
|
| 163 |
|
| 164 |
demo.chatbot.value = initial_messages_for_value
|
|
|
|
| 71 |
|
| 72 |
# --- Inference Function for Gradio ChatInterface ---
|
| 73 |
def predict_chat(message: str, history: list):
|
| 74 |
+
# NEW DIAGNOSTIC PRINT: Check model type at the start of prediction
|
| 75 |
+
print(f"Model type in predict_chat: {type(model)}")
|
| 76 |
+
|
| 77 |
if model is None or tokenizer is None:
|
| 78 |
yield "Error: Model or tokenizer failed to load. Please check the Space logs for details."
|
| 79 |
return
|
|
|
|
| 85 |
start_time = time.time()
|
| 86 |
|
| 87 |
if isinstance(model, AutoModelForCausalLM_GGUF):
|
| 88 |
+
# NEW DIAGNOSTIC PRINT: Confirm GGUF path is taken
|
| 89 |
+
print("Using GGUF model generation path.")
|
| 90 |
prompt_input = ""
|
| 91 |
for msg in messages:
|
| 92 |
if msg["role"] == "system":
|
|
|
|
| 110 |
generated_text += token
|
| 111 |
yield generated_text
|
| 112 |
|
| 113 |
+
else:
|
| 114 |
+
# NEW DIAGNOSTIC PRINT: Confirm standard Hugging Face path is taken
|
| 115 |
+
print("Using standard Hugging Face model generation path.")
|
| 116 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 117 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 118 |
|
| 119 |
outputs = model.generate(
|
| 120 |
inputs,
|
|
|
|
| 121 |
max_length=inputs.shape[-1] + MAX_NEW_TOKENS,
|
| 122 |
temperature=TEMPERATURE,
|
| 123 |
top_k=TOP_K,
|
|
|
|
| 164 |
["What's the best way to stay motivated?"],
|
| 165 |
],
|
| 166 |
cache_examples=False,
|
|
|
|
| 167 |
)
|
| 168 |
|
| 169 |
demo.chatbot.value = initial_messages_for_value
|