Spaces:

LMSeed
/

Chat_with_Stu

Sleeping

App Files Files Community

Jia0603 commited on Dec 7, 2025

Commit

a0fadff

verified ·

1 Parent(s): 9222068

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -34

app.py CHANGED Viewed

@@ -9,8 +9,9 @@ MODEL_ID = "LMSeed/GPT2-small-distilled-900M_None_ppo-1000K-seed42"
 device = 0 if torch.cuda.is_available() else -1
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-# if tokenizer.pad_token is None:
-#     tokenizer.pad_token = tokenizer.eos_token_id
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
 if torch.cuda.is_available():
@@ -19,31 +20,44 @@ if torch.cuda.is_available():
 def generate_reply(prompt, max_new_tokens, temperature, top_p):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     output_ids = model.generate(
         **inputs,
         max_new_tokens=int(max_new_tokens),
         do_sample=True,
         temperature=float(temperature),
         top_p=float(top_p),
-        eos_token_id=None,
         pad_token_id=tokenizer.eos_token_id
     )
     text = tokenizer.decode(
         output_ids[0],
-        skip_special_tokens=False,
         clean_up_tokenization_spaces=True
     )
     return text
 def clean_reply(text):
     text = text.strip()
-    for prefix in ["Assistant:", "assistant:", "User:", "user:"]:
-        if text.startswith(prefix):
-            text = text[len(prefix):].strip()
     # lines = [l.strip() for l in text.split("\n")]
     # lines = [l for l in lines if l]
@@ -51,39 +65,56 @@ def clean_reply(text):
     # if len(lines) == 0:
     #     return ""
-    # return lines[0]
-    return text
 def chat_with_model(user_message, chat_history, max_new_tokens=256, temperature=0.8, top_p=0.9):
     if chat_history is None:
         chat_history = []
-    # Build conversation history
-    # history_text = "The following is a friendly conversation between a human and an AI assistant.\n"
-    history_text = "The following is a friendly conversation between a human and an AI story-telling assistant. \
-            The assistant should tell a story according to human's requirment.\n"
-    for msg in chat_history:
-        role = "Human" if msg["role"] == "user" else "AI"
-        history_text += f"{role}: {msg['content']}\n"
-    history_text += f"Human: {user_message}\nAI:"
-    # -------- generate ----------
-    raw = generate_reply(
-        history_text,
         max_new_tokens,
         temperature,
         top_p
     )
-    # Only keep new part
-    reply = raw[len(history_text):]
-    reply = clean_reply(reply)
-    # ------------------------------
     chat_history.append({"role": "user", "content": user_message})
-    chat_history.append({"role": "assistant", "content": reply})
     return "", chat_history, chat_history
@@ -96,12 +127,14 @@ with gr.Blocks() as demo:
             chat = gr.Chatbot(elem_id="chatbot", label="Conversation")
             msg = gr.Textbox(label="Your message")
             send = gr.Button("Send")
-            max_tokens = gr.Slider(50, 512, value=256, label="max_new_tokens")
-            temp = gr.Slider(0.6, 1.5, value=0.8, label="temperature")
-            top_p = gr.Slider(0.1, 1.0, value=0.9, label="top_p")
         with gr.Column(scale=1):
             gr.Markdown("Model: " + MODEL_ID)
     state = gr.State([])

 device = 0 if torch.cuda.is_available() else -1
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token_id
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
 if torch.cuda.is_available():
 def generate_reply(prompt, max_new_tokens, temperature, top_p):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    input_len = inputs["input_ids"].shape[1]
     output_ids = model.generate(
         **inputs,
         max_new_tokens=int(max_new_tokens),
         do_sample=True,
         temperature=float(temperature),
         top_p=float(top_p),
+        no_repeat_ngram_size=3,
+        repetition_penalty=1.2,
+        eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.eos_token_id
     )
+    generated_tokens = output_ids[0][input_len:]
     text = tokenizer.decode(
         output_ids[0],
+        skip_special_tokens=True,
         clean_up_tokenization_spaces=True
     )
     return text
 def clean_reply(text):
     text = text.strip()
+    stop_words = ["Human:", "User:", "AI:", "Assistant:"]
+    for word in stop_words:
+        if word in text:
+            text = text.split(word)[0]
+    return text.strip()
+# def clean_reply(text):
+#     text = text.strip()
+#     for prefix in ["Assistant:", "assistant:", "User:", "user:"]:
+#         if text.startswith(prefix):
+#             text = text[len(prefix):].strip()
     # lines = [l.strip() for l in text.split("\n")]
     # lines = [l for l in lines if l]
     # if len(lines) == 0:
     #     return ""
+    # return lines[0]
+# def chat_with_model(user_message, chat_history, max_new_tokens=256, temperature=0.8, top_p=0.9):
+#     if chat_history is None:
+#         chat_history = []
+#     # Build conversation history
+#     # history_text = "The following is a friendly conversation between a human and an AI assistant.\n"
+#     history_text = "The following is a friendly conversation between a human and an AI story-telling assistant. \
+#             The assistant should tell a story according to human's requirment.\n"
+#     for msg in chat_history:
+#         role = "Human" if msg["role"] == "user" else "AI"
+#         history_text += f"{role}: {msg['content']}\n"
+#     history_text += f"Human: {user_message}\nAI:"
+#     # -------- generate ----------
+#     raw = generate_reply(
+#         history_text,
+#         max_new_tokens,
+#         temperature,
+#         top_p
+#     )
+#     # Only keep new part
+#     reply = raw[len(history_text):]
+#     reply = clean_reply(reply)
+#     # ------------------------------
+#     chat_history.append({"role": "user", "content": user_message})
+#     chat_history.append({"role": "assistant", "content": reply})
+#     return "", chat_history, chat_history
 def chat_with_model(user_message, chat_history, max_new_tokens=256, temperature=0.8, top_p=0.9):
     if chat_history is None:
         chat_history = []
+    prompt_text = f"User request: {user_message}\n\nHere is a long, creative story based on the request:\nOnce upon a time,"
+    reply = generate_reply(
+        prompt_text,
         max_new_tokens,
         temperature,
         top_p
     )
+    final_reply = "Once upon a time, " + clean_reply(reply)
     chat_history.append({"role": "user", "content": user_message})
+    chat_history.append({"role": "assistant", "content": final_reply})
     return "", chat_history, chat_history
             chat = gr.Chatbot(elem_id="chatbot", label="Conversation")
             msg = gr.Textbox(label="Your message")
             send = gr.Button("Send")
+            max_tokens = gr.Slider(50, 1025, value=300, label="max_new_tokens")
+            temp = gr.Slider(0.6, 1.5, value=1.0, label="temperature")
+            top_p = gr.Slider(0.1, 1.0, value=0.95, label="top_p")
         with gr.Column(scale=1):
             gr.Markdown("Model: " + MODEL_ID)
+            gr.Markdown("Note: GPT-2 is a base model. If prompts are too complex, \
+                    it might get confused. This setup is optimized for storytelling.")
     state = gr.State([])