Spaces:

ajsbsd
/

smollm2-zerocpu-demo

Running

App Files Files Community

ajsbsd commited on Jun 16

Commit

fe3c5c3

verified ·

1 Parent(s): 72d5687

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -22

app.py CHANGED Viewed

@@ -88,25 +88,7 @@ def predict_chat(message: str, history: list):
     # CORRECTED: Check against ctransformers.llm.LLM directly
     if GGUF_AVAILABLE and isinstance(model, LLM):
         print("Using GGUF model generation path.")
-        prompt_input = ""
-        for msg in messages:
-            if msg["role"] == "system":
-                prompt_input += f"{msg['content']}\n"
-            elif msg["role"] == "user":
-                prompt_input += f"User: {msg['content']}\n"
-            elif msg["role"] == "assistant":
-                prompt_input += f"Assistant: {msg['content']}\n"
-        prompt_input += "Assistant:"
-        # FIXED: Use the correct ctransformers method - call model() directly for streaming
-        try:
-            for token in model(
-                prompt_input,
-                max_new_tokens=MAX_NEW_TOKENS,
-                temperature=TEMPERATURE,
-                top_k=TOP_K,
-                top_p=TOP_P,
-                do_sample=DO_SAMPLE,
                 repetition_penalty=1.1,
                 stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"],
                 stream=True
@@ -122,7 +104,7 @@ def predict_chat(message: str, history: list):
                 temperature=TEMPERATURE,
                 top_k=TOP_K,
                 top_p=TOP_P,
-                do_sample=DO_SAMPLE,
                 repetition_penalty=1.1,
                 stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
             )
@@ -141,7 +123,7 @@ def predict_chat(message: str, history: list):
             temperature=TEMPERATURE,
             top_k=TOP_K,
             top_p=TOP_P,
-            do_sample=DO_SAMPLE,
             pad_token_id=tokenizer.pad_token_id
         )
         generated_text = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True).strip()
@@ -187,4 +169,4 @@ if __name__ == "__main__":
     demo.chatbot.value = initial_messages_for_value
-    demo.launch()

     # CORRECTED: Check against ctransformers.llm.LLM directly
     if GGUF_AVAILABLE and isinstance(model, LLM):
         print("Using GGUF model generation path.")
+        prompt_input Edo_sampledo_sample=DO_SAMPLE,
                 repetition_penalty=1.1,
                 stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"],
                 stream=True
                 temperature=TEMPERATURE,
                 top_k=TOP_K,
                 top_p=TOP_P,
+                #do_sample=DO_SAMPLE,
                 repetition_penalty=1.1,
                 stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
             )
             temperature=TEMPERATURE,
             top_k=TOP_K,
             top_p=TOP_P,
+            #do_sample=DO_SAMPLE,
             pad_token_id=tokenizer.pad_token_id
         )
         generated_text = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True).strip()
     demo.chatbot.value = initial_messages_for_value
+    demo.launch()