prelington
/

CodexTrouter

Text Classification

Model card Files Files and versions

prelington commited on Oct 21, 2025

Commit

468dc51

·

verified ·

1 Parent(s): 8a2b266

Update ProTalk_MemoryChat.py

Files changed (1) hide show

ProTalk_MemoryChat.py +16 -44

ProTalk_MemoryChat.py CHANGED Viewed

@@ -1,52 +1,24 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import torch
-import threading
-model_name = "microsoft/phi-2"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-    low_cpu_mem_usage=True
-).to(device)
-system_prompt = (
-    "You are ProTalk, a professional AI assistant. "
-    "You remember everything the user said in this session and respond politely, "
-    "clearly, and intelligently. Keep a coherent conversation history."
-)
 chat_history = []
-def chat_loop():
-    print("ProTalk Memory Chat Online — type 'exit' to quit.\n")
-    while True:
-        user_input = input("User: ")
-        if user_input.lower() == "exit":
-            break
-        chat_history.append(f"User: {user_input}")
-        prompt = system_prompt + "\n" + "\n".join(chat_history) + "\nProTalk:"
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)
-        streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
-        thread = threading.Thread(target=model.generate, kwargs={
-            "input_ids": inputs["input_ids"],
-            "max_new_tokens": 300,
-            "do_sample": True,
-            "temperature": 0.7,
-            "top_p": 0.9,
-            "repetition_penalty": 1.2,
-            "streamer": streamer
-        })
-        thread.start()
-        output_text = ""
-        for token in streamer:
-            print(token, end="", flush=True)
-            output_text += token
-        thread.join()
-        print()
-        chat_history.append(f"ProTalk: {output_text}")
-if __name__ == "__main__":
-    chat_loop()

+from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+model_name = "./ProTalkModel.safetensors"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("./")
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)
+system_prompt = "You are ProTalk, a professional AI assistant. Remember everything in this conversation. Be polite, witty, and professional."
 chat_history = []
+while True:
+    user_input = input("User: ")
+    if user_input.lower() == "exit":
+        break
+    chat_history.append(f"User: {user_input}")
+    prompt = system_prompt + "\n" + "\n".join(chat_history) + "\nProTalk:"
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    outputs = model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"ProTalk: {response}")
+    chat_history.append(f"ProTalk: {response}")