CMSManhattan
/

JiRack_empty

Model card Files Files and versions

xet

Community

kgrabko commited on Dec 19, 2025

Commit

0feea22

verified ·

1 Parent(s): ea7894f

Update chatbot_gpt2.py

Browse files

Files changed (1) hide show

chatbot_gpt2.py +178 -162

chatbot_gpt2.py CHANGED Viewed

@@ -1,163 +1,179 @@
-# Copyright (c) 2025 CMS Manhattan
-# All rights reserved.
-#
-# This file is part of a project authored by CMS Manhattan. You may use, distribute, and modify
-# this code under the terms of the APACHE 2.0 license.
-import torch
-import torch.nn.functional as F
-from transformers import GPT2TokenizerFast
-from gpt_pytorch import GPTPyTorch  # Using the same import as in fine_tune.py
-import os
-from pathlib import Path
-# ============================= GENERATION SETTINGS =============================
-# Temperature: Lower = more conservative and predictable answers.
-# Start with 0.7. Increase to 0.8 if the model starts repeating itself.
-TEMPERATURE = 0.7
-# Top-K: Limits sampling to the K most likely tokens.
-# Start with 50. Increase if responses feel too boring/repetitive.
-TOP_K = 50
-# Max Length: Maximum number of tokens to generate in one go
-MAX_LENGTH = 120
-# ============================= PATHS =============================
-# LAST_TRAINED_PATH = Path("models/gpt_last_trained.pt")
-LAST_TRAINED_PATH = Path("build/fine_tuning_output/epoch49/gpt_finetuned.pt")
-# FINAL_OUTPUT_DIR = Path("build/fine_tuning_output/final")
-FINAL_OUTPUT_DIR = Path("build/fine_tuning_output/epoch49/gpt_finetuned.pt")
-MODEL_SAVE_NAME = "gpt_finetuned.pt"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ============================= Chatbot CLASS =============================
-class Chatbot:
-    def __init__(self, model_path):
-        # 1. Tokenizer
-        print("Loading standard tokenizer (gpt2)...")
-        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
-        self.tokenizer.pad_token = self.tokenizer.eos_token
-        #2. Model
-        print("Initializing model...")
-        self.model = GPTPyTorch().to(device)
-        self.model.eval()
-        # Look for the latest weights: first check final folder, then last_trained
-        load_path = None
-        if (FINAL_OUTPUT_DIR / MODEL_SAVE_NAME).exists():
-            load_path = FINAL_OUTPUT_DIR / MODEL_SAVE_NAME
-            print(f"Weights from Epoch 50 found. Loading and moving to {device}...")
-        elif model_path.exists():
-            load_path = model_path
-            print(f"Loading weights from {load_path} and moving to {device}...")
-        if load_path:
-            self.model.load_state_dict(torch.load(load_path, map_location=device))
-        else:
-            print("Warning: No trained weights found. Using randomly initialized model.")
-        print(f"Model successfully loaded on {device} and ready for chat!")
-    def generate_response(self, prompt, max_length=MAX_LENGTH, temperature=TEMPERATURE, top_k=TOP_K):
-        # Tokenize input
-        input_ids = self.tokenizer.encode(prompt, return_tensors='pt').to(device)
-        # Generation loop
-        with torch.no_grad():
-            for _ in range(max_length):
-                # Forward pass through the model
-                logits, _ = self.model(input_ids)
-                # Take logits only for the last token
-                next_token_logits = logits[:, -1, :]
-                # Apply temperature
-                next_token_logits = next_token_logits / temperature
-                # Apply Top-K sampling
-                if top_k > 0:
-                    # Keep only the top-k most likely tokens
-                    values, indices = torch.topk(next_token_logits, top_k)
-                    # Zero out everything else (set to -inf)
-                    next_token_logits = torch.full_like(next_token_logits, float('-inf'))
-                    next_token_logits.scatter_(1, indices, values)
-                # Convert to probabilities and sample the next token
-                probabilities = F.softmax(next_token_logits, dim=-1)
-                next_token = torch.multinomial(probabilities, num_samples=1)
-                # Append generated token to the sequence
-                input_ids = torch.cat([input_ids, next_token], dim=-1)
-                # Stop if end-of-utterance (__eou__) or EOS token is generated
-                generated_token = self.tokenizer.decode(next_token.squeeze().item())
-                if "__eou__" in generated_token or next_token.squeeze().item() == self.tokenizer.eos_token_id:
-                    break
-        # Decode the full generated sequence
-        output = self.tokenizer.decode(input_ids.squeeze().tolist())
-        # Remove the original prompt from the output
-        response = output[len(prompt):].strip()
-        # Clean up any leftover end-of-utterance tokens
-        response = response.replace("__eou__", "").strip()
-        return response
-def main():
-    # Fix for modifying globals inside the function
-    global TEMPERATURE, TOP_K
-    chatbot = Chatbot(LAST_TRAINED_PATH)
-    print("\n" + "="*60)
-    print(f"CHATBOT ACTIVATED (PPL ~2.6 / Temperature {TEMPERATURE} / Top-K {TOP_K})")
-    print("Type 'exit' or 'quit' to quit. Use 'set temp=0.x' or 'set k=N' to change settings.")
-    print("="*60 + "\n")
-    while True:
-        try:
-            user_input = input(">>> You: ")
-            if user_input.lower() in ['quit', 'exit']:
-                print("Goodbye!")
-                break
-            # Settings commands
-            if user_input.lower().startswith('set temp='):
-                try:
-                    TEMPERATURE = float(user_input.split('=')[1].strip())
-                    print(f"Temperature updated to {TEMPERATURE}")
-                    continue
-                except ValueError:
-                    print("Invalid temperature. Use format: set temp=0.7")
-                    continue
-            if user_input.lower().startswith('set k='):
-                try:
-                    TOP_K = int(user_input.split('=')[1].strip())
-                    print(f"Top-K updated to {TOP_K}")
-                    continue
-                except ValueError:
-                    print("Invalid value. Use format: set k=50")
-                    continue
-            print("...Generating...")
-            response = chatbot.generate_response(user_input)
-            print(f"Model: {response}\n")
-        except KeyboardInterrupt:
-            print("\nGoodbye!")
-            break
-        except Exception as e:
-            print(f"An error occurred: {e}")
-            break
-if __name__ == "__main__":
     main()

+# Copyright (c) 2025 CMS Manhattan
+# All rights reserved.
+# Author: Konstantin Vladimirovich Grabko
+# Email: grabko@cmsmanhattan.com
+# Phone: +1(516)777-0945
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+# Additional terms:
+# Any commercial use or distribution of this software or derivative works
+# requires explicit written permission from the copyright holder.
+import torch
+import torch.nn.functional as F
+from transformers import GPT2TokenizerFast
+from gpt_pytorch import GPTPyTorch  # Using the same import as in fine_tune.py
+import os
+from pathlib import Path
+# ============================= GENERATION SETTINGS =============================
+# Temperature: Lower = more conservative and predictable answers.
+# Start with 0.7. Increase to 0.8 if the model starts repeating itself.
+TEMPERATURE = 0.7
+# Top-K: Limits sampling to the K most likely tokens.
+# Start with 50. Increase if responses feel too boring/repetitive.
+TOP_K = 50
+# Max Length: Maximum number of tokens to generate in one go
+MAX_LENGTH = 120
+# ============================= PATHS =============================
+# LAST_TRAINED_PATH = Path("models/gpt_last_trained.pt")
+LAST_TRAINED_PATH = Path("build/fine_tuning_output/epoch49/gpt_finetuned.pt")
+# FINAL_OUTPUT_DIR = Path("build/fine_tuning_output/final")
+FINAL_OUTPUT_DIR = Path("build/fine_tuning_output/epoch49/gpt_finetuned.pt")
+MODEL_SAVE_NAME = "gpt_finetuned.pt"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ============================= Chatbot CLASS =============================
+class Chatbot:
+    def __init__(self, model_path):
+        # 1. Tokenizer
+        print("Loading standard tokenizer (gpt2)...")
+        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        #2. Model
+        print("Initializing model...")
+        self.model = GPTPyTorch().to(device)
+        self.model.eval()
+        # Look for the latest weights: first check final folder, then last_trained
+        load_path = None
+        if (FINAL_OUTPUT_DIR / MODEL_SAVE_NAME).exists():
+            load_path = FINAL_OUTPUT_DIR / MODEL_SAVE_NAME
+            print(f"Weights from Epoch 50 found. Loading and moving to {device}...")
+        elif model_path.exists():
+            load_path = model_path
+            print(f"Loading weights from {load_path} and moving to {device}...")
+        if load_path:
+            self.model.load_state_dict(torch.load(load_path, map_location=device))
+        else:
+            print("Warning: No trained weights found. Using randomly initialized model.")
+        print(f"Model successfully loaded on {device} and ready for chat!")
+    def generate_response(self, prompt, max_length=MAX_LENGTH, temperature=TEMPERATURE, top_k=TOP_K):
+        # Tokenize input
+        input_ids = self.tokenizer.encode(prompt, return_tensors='pt').to(device)
+        # Generation loop
+        with torch.no_grad():
+            for _ in range(max_length):
+                # Forward pass through the model
+                logits, _ = self.model(input_ids)
+                # Take logits only for the last token
+                next_token_logits = logits[:, -1, :]
+                # Apply temperature
+                next_token_logits = next_token_logits / temperature
+                # Apply Top-K sampling
+                if top_k > 0:
+                    # Keep only the top-k most likely tokens
+                    values, indices = torch.topk(next_token_logits, top_k)
+                    # Zero out everything else (set to -inf)
+                    next_token_logits = torch.full_like(next_token_logits, float('-inf'))
+                    next_token_logits.scatter_(1, indices, values)
+                # Convert to probabilities and sample the next token
+                probabilities = F.softmax(next_token_logits, dim=-1)
+                next_token = torch.multinomial(probabilities, num_samples=1)
+                # Append generated token to the sequence
+                input_ids = torch.cat([input_ids, next_token], dim=-1)
+                # Stop if end-of-utterance (__eou__) or EOS token is generated
+                generated_token = self.tokenizer.decode(next_token.squeeze().item())
+                if "__eou__" in generated_token or next_token.squeeze().item() == self.tokenizer.eos_token_id:
+                    break
+        # Decode the full generated sequence
+        output = self.tokenizer.decode(input_ids.squeeze().tolist())
+        # Remove the original prompt from the output
+        response = output[len(prompt):].strip()
+        # Clean up any leftover end-of-utterance tokens
+        response = response.replace("__eou__", "").strip()
+        return response
+def main():
+    # Fix for modifying globals inside the function
+    global TEMPERATURE, TOP_K
+    chatbot = Chatbot(LAST_TRAINED_PATH)
+    print("\n" + "="*60)
+    print(f"CHATBOT ACTIVATED (PPL ~2.6 / Temperature {TEMPERATURE} / Top-K {TOP_K})")
+    print("Type 'exit' or 'quit' to quit. Use 'set temp=0.x' or 'set k=N' to change settings.")
+    print("="*60 + "\n")
+    while True:
+        try:
+            user_input = input(">>> You: ")
+            if user_input.lower() in ['quit', 'exit']:
+                print("Goodbye!")
+                break
+            # Settings commands
+            if user_input.lower().startswith('set temp='):
+                try:
+                    TEMPERATURE = float(user_input.split('=')[1].strip())
+                    print(f"Temperature updated to {TEMPERATURE}")
+                    continue
+                except ValueError:
+                    print("Invalid temperature. Use format: set temp=0.7")
+                    continue
+            if user_input.lower().startswith('set k='):
+                try:
+                    TOP_K = int(user_input.split('=')[1].strip())
+                    print(f"Top-K updated to {TOP_K}")
+                    continue
+                except ValueError:
+                    print("Invalid value. Use format: set k=50")
+                    continue
+            print("...Generating...")
+            response = chatbot.generate_response(user_input)
+            print(f"Model: {response}\n")
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            break
+if __name__ == "__main__":
     main()