CMSManhattan
/

JiRack_GPT3_empty

Model card Files Files and versions

xet

Community

kgrabko commited on Dec 19, 2025

Commit

8b7cba2

verified ·

1 Parent(s): 108847b

Update chatbot_1b.py

Browse files

Files changed (1) hide show

chatbot_1b.py +180 -164

chatbot_1b.py CHANGED Viewed

@@ -1,165 +1,181 @@
-# Copyright (c) 2025 CMS Manhattan
-# All rights reserved.
-#
-# This file is part of a project authored by CMS Manhattan.
-# You may use, distribute, and modify this code under the terms of the Apache 2.0 license.
-import torch
-import torch.nn.functional as F
-from transformers import GPT2TokenizerFast
-from gpt_modern_8b import JiRackPyTorch  # Same import used in fine-tuning
-from pathlib import Path
-# ============================= GENERATION SETTINGS =============================
-# Temperature: Lower = more focused, conservative, and predictable responses
-# Start with 0.7. Increase to 0.8–0.9 if the model starts repeating itself
-TEMPERATURE = 0.7
-# Top-K: Limits sampling to the K most likely next tokens
-# Start with 50. Increase if output feels too safe/boring
-TOP_K = 50
-# Max Length: Maximum number of new tokens to generate per response
-MAX_LENGTH = 120
-# ============================= PATHS =============================
-LAST_TRAINED_PATH = Path("build/fine_tuning_output/epoch2/gpt_finetuned.pt")
-FINAL_OUTPUT_DIR = Path("build/fine_tuning_output/epoch2")  # Folder containing the .pt
-MODEL_SAVE_NAME = "gpt_finetuned.pt"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
-# ============================= CHATBOT CLASS =============================
-class Chatbot:
-    def __init__(self, model_path: Path):
-        # 1. Load tokenizer (offline-safe recommended — see note below)
-        print("Loading standard GPT-2 tokenizer...")
-        # For full offline use, replace "gpt2" with "./tokenizers/gpt2" after first download
-        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
-        self.tokenizer.pad_token = self.tokenizer.eos_token
-        # 2. Initialize model architecture
-        print("Initializing JiRackPyTorch model...")
-        self.model = JiRackPyTorch().to(device)
-        self.model.eval()
-        # 3. Load latest trained weights
-        load_path = None
-        candidate1 = FINAL_OUTPUT_DIR / MODEL_SAVE_NAME
-        candidate2 = model_path if model_path.is_file() else None
-        if candidate1.exists():
-            load_path = candidate1
-            print(f"Found weights in final folder: {load_path}")
-        elif candidate2 and candidate2.exists():
-            load_path = candidate2
-            print(f"Loading weights from: {load_path}")
-        else:
-            print("Warning: No trained weights found. Running with randomly initialized model.")
-        if load_path:
-            print(f"Loading state dict from {load_path}...")
-            self.model.load_state_dict(torch.load(load_path, map_location=device))
-            print("Weights loaded successfully!")
-        print(f"Model is now running on {device} — ready for chat!\n")
-    def generate_response(self, prompt: str, max_length: int = MAX_LENGTH,
-                          temperature: float = TEMPERATURE, top_k: int = TOP_K) -> str:
-        input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(device)
-        with torch.no_grad():
-            for _ in range(max_length):
-                # Forward pass
-                logits, _ = self.model(input_ids)  # JiRackPyTorch returns (logits, past_kv)
-                # Get logits for the last generated token
-                next_token_logits = logits[:, -1, :]
-                # Apply temperature
-                if temperature != 1.0:
-                    next_token_logits = next_token_logits / temperature
-                # Apply Top-K sampling
-                if top_k > 0:
-                    values, indices = torch.topk(next_token_logits, top_k)
-                    next_token_logits = torch.full_like(next_token_logits, float('-inf'))
-                    next_token_logits.scatter_(1, indices, values)
-                # Sample next token
-                probabilities = F.softmax(next_token_logits, dim=-1)
-                next_token = torch.multinomial(probabilities, num_samples=1)
-                # Append to sequence
-                input_ids = torch.cat([input_ids, next_token], dim=-1)
-                # Early stop on EOS or custom end-of-utterance token
-                token_str = self.tokenizer.decode(next_token.item())
-                if "__eou__" in token_str or next_token.item() == self.tokenizer.eos_token_id:
-                    break
-        # Decode full output and strip prompt
-        full_output = self.tokenizer.decode(input_ids[0], skip_special_tokens=False)
-        response = full_output[len(prompt):].strip()
-        # Clean up any leftover markers
-        response = response.replace("__eou__", "").strip()
-        return response
-# ============================= MAIN CHAT LOOP =============================
-def main():
-    global TEMPERATURE, TOP_K
-    print("Starting JiRack Chatbot...")
-    chatbot = Chatbot(LAST_TRAINED_PATH)
-    print("\n" + "=" * 70)
-    print(f"JIRACK CHATBOT ONLINE")
-    print(f"Temperature: {TEMPERATURE} | Top-K: {TOP_K} | Max Length: {MAX_LENGTH}")
-    print("Type 'quit' or 'exit' to exit")
-    print("Change settings: set temp=0.8   or   set k=80")
-    print("=" * 70 + "\n")
-    while True:
-        try:
-            user_input = input("You: ").strip()
-            if user_input.lower() in {"quit", "exit", "bye"}:
-                print("Goodbye!")
-                break
-            # Live parameter tuning
-            if user_input.lower().startswith("set temp="):
-                try:
-                    TEMPERATURE = float(user_input.split("=")[1])
-                    print(f"Temperature → {TEMPERATURE}")
-                except:
-                    print("Invalid format. Use: set temp=0.7")
-                continue
-            if user_input.lower().startswith("set k="):
-                try:
-                    TOP_K = int(user_input.split("=")[1])
-                    print(f"Top-K → {TOP_K}")
-                except:
-                    print("Invalid format. Use: set k=50")
-                continue
-            if not user_input:
-                continue
-            print("Generating...", end="\r")
-            response = chatbot.generate_response(user_input)
-            print(f"JiRack: {response}\n")
-        except KeyboardInterrupt:
-            print("\n\nShutting down...")
-            break
-        except Exception as e:
-            print(f"Error: {e}")
-if __name__ == "__main__":
     main()

+# Copyright (c) 2025 CMS Manhattan
+# All rights reserved.
+# Author: Konstantin Vladimirovich Grabko
+# Email: grabko@cmsmanhattan.com
+# Phone: +1(516)777-0945
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+# Additional terms:
+# Any commercial use or distribution of this software or derivative works
+# requires explicit written permission from the copyright holder.
+import torch
+import torch.nn.functional as F
+from transformers import GPT2TokenizerFast
+from gpt_modern_8b import JiRackPyTorch  # Same import used in fine-tuning
+from pathlib import Path
+# ============================= GENERATION SETTINGS =============================
+# Temperature: Lower = more focused, conservative, and predictable responses
+# Start with 0.7. Increase to 0.8–0.9 if the model starts repeating itself
+TEMPERATURE = 0.7
+# Top-K: Limits sampling to the K most likely next tokens
+# Start with 50. Increase if output feels too safe/boring
+TOP_K = 50
+# Max Length: Maximum number of new tokens to generate per response
+MAX_LENGTH = 120
+# ============================= PATHS =============================
+LAST_TRAINED_PATH = Path("build/fine_tuning_output/epoch2/gpt_finetuned.pt")
+FINAL_OUTPUT_DIR = Path("build/fine_tuning_output/epoch2")  # Folder containing the .pt
+MODEL_SAVE_NAME = "gpt_finetuned.pt"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# ============================= CHATBOT CLASS =============================
+class Chatbot:
+    def __init__(self, model_path: Path):
+        # 1. Load tokenizer (offline-safe recommended — see note below)
+        print("Loading standard GPT-2 tokenizer...")
+        # For full offline use, replace "gpt2" with "./tokenizers/gpt2" after first download
+        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        # 2. Initialize model architecture
+        print("Initializing JiRackPyTorch model...")
+        self.model = JiRackPyTorch().to(device)
+        self.model.eval()
+        # 3. Load latest trained weights
+        load_path = None
+        candidate1 = FINAL_OUTPUT_DIR / MODEL_SAVE_NAME
+        candidate2 = model_path if model_path.is_file() else None
+        if candidate1.exists():
+            load_path = candidate1
+            print(f"Found weights in final folder: {load_path}")
+        elif candidate2 and candidate2.exists():
+            load_path = candidate2
+            print(f"Loading weights from: {load_path}")
+        else:
+            print("Warning: No trained weights found. Running with randomly initialized model.")
+        if load_path:
+            print(f"Loading state dict from {load_path}...")
+            self.model.load_state_dict(torch.load(load_path, map_location=device))
+            print("Weights loaded successfully!")
+        print(f"Model is now running on {device} — ready for chat!\n")
+    def generate_response(self, prompt: str, max_length: int = MAX_LENGTH,
+                          temperature: float = TEMPERATURE, top_k: int = TOP_K) -> str:
+        input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(device)
+        with torch.no_grad():
+            for _ in range(max_length):
+                # Forward pass
+                logits, _ = self.model(input_ids)  # JiRackPyTorch returns (logits, past_kv)
+                # Get logits for the last generated token
+                next_token_logits = logits[:, -1, :]
+                # Apply temperature
+                if temperature != 1.0:
+                    next_token_logits = next_token_logits / temperature
+                # Apply Top-K sampling
+                if top_k > 0:
+                    values, indices = torch.topk(next_token_logits, top_k)
+                    next_token_logits = torch.full_like(next_token_logits, float('-inf'))
+                    next_token_logits.scatter_(1, indices, values)
+                # Sample next token
+                probabilities = F.softmax(next_token_logits, dim=-1)
+                next_token = torch.multinomial(probabilities, num_samples=1)
+                # Append to sequence
+                input_ids = torch.cat([input_ids, next_token], dim=-1)
+                # Early stop on EOS or custom end-of-utterance token
+                token_str = self.tokenizer.decode(next_token.item())
+                if "__eou__" in token_str or next_token.item() == self.tokenizer.eos_token_id:
+                    break
+        # Decode full output and strip prompt
+        full_output = self.tokenizer.decode(input_ids[0], skip_special_tokens=False)
+        response = full_output[len(prompt):].strip()
+        # Clean up any leftover markers
+        response = response.replace("__eou__", "").strip()
+        return response
+# ============================= MAIN CHAT LOOP =============================
+def main():
+    global TEMPERATURE, TOP_K
+    print("Starting JiRack Chatbot...")
+    chatbot = Chatbot(LAST_TRAINED_PATH)
+    print("\n" + "=" * 70)
+    print(f"JIRACK CHATBOT ONLINE")
+    print(f"Temperature: {TEMPERATURE} | Top-K: {TOP_K} | Max Length: {MAX_LENGTH}")
+    print("Type 'quit' or 'exit' to exit")
+    print("Change settings: set temp=0.8   or   set k=80")
+    print("=" * 70 + "\n")
+    while True:
+        try:
+            user_input = input("You: ").strip()
+            if user_input.lower() in {"quit", "exit", "bye"}:
+                print("Goodbye!")
+                break
+            # Live parameter tuning
+            if user_input.lower().startswith("set temp="):
+                try:
+                    TEMPERATURE = float(user_input.split("=")[1])
+                    print(f"Temperature → {TEMPERATURE}")
+                except:
+                    print("Invalid format. Use: set temp=0.7")
+                continue
+            if user_input.lower().startswith("set k="):
+                try:
+                    TOP_K = int(user_input.split("=")[1])
+                    print(f"Top-K → {TOP_K}")
+                except:
+                    print("Invalid format. Use: set k=50")
+                continue
+            if not user_input:
+                continue
+            print("Generating...", end="\r")
+            response = chatbot.generate_response(user_input)
+            print(f"JiRack: {response}\n")
+        except KeyboardInterrupt:
+            print("\n\nShutting down...")
+            break
+        except Exception as e:
+            print(f"Error: {e}")
+if __name__ == "__main__":
     main()