Ekrem-the-second
/

Nano_Ethos50M

+import torch
+import torch.nn.functional as F
+import tiktoken
+import os
+# ==========================================
+# SETTINGS
+# ==========================================
+model_path = "/content/yagiz_gpt_full_packaged.pt"
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+block_size = 512  # Context window size of the model
+# ==========================================
+# 1. LOAD PACKAGED MODEL
+# ==========================================
+print(f"Device: {device}")
+if not os.path.exists(model_path):
+    raise FileNotFoundError(f"ERROR: File {model_path} not found. Please make sure the model is packaged correctly.")
+print(f"Loading {model_path}...")
+# MAGIC PART: No class definitions needed, just loading the TorchScript model.
+try:
+    model = torch.jit.load(model_path, map_location=device)
+    model.eval()
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Failed to load the model: {e}")
+    exit()
+# ==========================================
+# 2. TOKENIZER SETUP
+# ==========================================
+# Using 'tiktoken' since the model was trained with GPT-2 tokenizer (vocab_size=50257)
+try:
+    enc = tiktoken.get_encoding("gpt2")
+except:
+    print("Tiktoken library missing. Installing...")
+    os.system("pip install tiktoken")
+    import tiktoken
+    enc = tiktoken.get_encoding("gpt2")
+# Helper functions for encoding and decoding
+encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
+decode = lambda l: enc.decode(l)
+# ==========================================
+# 3. RESPONSE GENERATION FUNCTION
+# ==========================================
+def generate_response(prompt, max_new_tokens=100):
+    # 1. Convert text to tensor indices
+    idx = torch.tensor([encode(prompt)], dtype=torch.long, device=device)
+    # 2. Generate token by token
+    for _ in range(max_new_tokens):
+        # Crop context if it exceeds block size
+        idx_cond = idx if idx.size(1) <= block_size else idx[:, -block_size:]
+        # Get predictions (Forward pass)
+        # TorchScript models are called like functions
+        logits = model(idx_cond)
+        # Focus on the last token
+        logits = logits[:, -1, :]
+        # Apply Softmax to get probabilities
+        probs = F.softmax(logits, dim=-1)
+        # Sample from the distribution
+        idx_next = torch.multinomial(probs, num_samples=1)
+        # Append the new token to the sequence
+        idx = torch.cat((idx, idx_next), dim=1)
+    # 3. Decode indices back to text
+    return decode(idx[0].tolist())
+# ==========================================
+# 4. START CHAT INTERFACE
+# ==========================================
+print("\n" + "="*40)
+print("YAGIZ GPT (FULL PACKAGED) - READY")
+print("Type 'q' and press Enter to exit.")
+print("="*40 + "\n")
+while True:
+    user_input = input("Ask a question: ")
+    if user_input.lower() == 'q':
+        print("Exiting...")
+        break
+    # Prompt Engineering: Guiding the model with English format
+    prompt = f"Question: {user_input}\nAnswer:"
+    print(">> Model is thinking...")
+    try:
+        response = generate_response(prompt)
+        # Post-processing: Extract only the answer part
+        # Splitting by 'Answer:' to remove the prompt from the output
+        if "Answer:" in response:
+            answer_only = response.split("Answer:")[-1].strip()
+        else:
+            answer_only = response # Fallback if format breaks
+        print(f"\nAnswer: {answer_only}\n")
+        print("-" * 30)
+    except Exception as e:
+        print(f"An error occurred: {e}")