import torch
import torch.nn.functional as F
import tiktoken
import os

# ==========================================
# SETTINGS
# ==========================================
model_path = "/content/yagiz_gpt_full_packaged.pt"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
block_size = 512  # Context window size of the model

# ==========================================
# 1. LOAD PACKAGED MODEL
# ==========================================
print(f"Device: {device}")

if not os.path.exists(model_path):
    raise FileNotFoundError(f"ERROR: File {model_path} not found. Please make sure the model is packaged correctly.")

print(f"Loading {model_path}...")

# MAGIC PART: No class definitions needed, just loading the TorchScript model.
try:
    model = torch.jit.load(model_path, map_location=device)
    model.eval()
    print("Model loaded successfully!")
except Exception as e:
    print(f"Failed to load the model: {e}")
    exit()

# ==========================================
# 2. TOKENIZER SETUP
# ==========================================
# Using 'tiktoken' since the model was trained with GPT-2 tokenizer (vocab_size=50257)
try:
    enc = tiktoken.get_encoding("gpt2")
except:
    print("Tiktoken library missing. Installing...")
    os.system("pip install tiktoken")
    import tiktoken
    enc = tiktoken.get_encoding("gpt2")

# Helper functions for encoding and decoding
encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
decode = lambda l: enc.decode(l)

# ==========================================
# 3. RESPONSE GENERATION FUNCTION
# ==========================================
def generate_response(prompt, max_new_tokens=100):
    # 1. Convert text to tensor indices
    idx = torch.tensor([encode(prompt)], dtype=torch.long, device=device)
    
    # 2. Generate token by token
    for _ in range(max_new_tokens):
        # Crop context if it exceeds block size
        idx_cond = idx if idx.size(1) <= block_size else idx[:, -block_size:]
        
        # Get predictions (Forward pass)
        # TorchScript models are called like functions
        logits = model(idx_cond)
        
        # Focus on the last token
        logits = logits[:, -1, :]
        
        # Apply Softmax to get probabilities
        probs = F.softmax(logits, dim=-1)
        
        # Sample from the distribution
        idx_next = torch.multinomial(probs, num_samples=1)
        
        # Append the new token to the sequence
        idx = torch.cat((idx, idx_next), dim=1)

    # 3. Decode indices back to text
    return decode(idx[0].tolist())

# ==========================================
# 4. START CHAT INTERFACE
# ==========================================
print("\n" + "="*40)
print("YAGIZ GPT (FULL PACKAGED) - READY")
print("Type 'q' and press Enter to exit.")
print("="*40 + "\n")

while True:
    user_input = input("Ask a question: ")
    if user_input.lower() == 'q':
        print("Exiting...")
        break
    
    # Prompt Engineering: Guiding the model with English format
    prompt = f"Question: {user_input}\nAnswer:"
    
    print(">> Model is thinking...")
    try:
        response = generate_response(prompt)
        
        # Post-processing: Extract only the answer part
        # Splitting by 'Answer:' to remove the prompt from the output
        if "Answer:" in response:
            answer_only = response.split("Answer:")[-1].strip()
        else:
            answer_only = response # Fallback if format breaks
        
        print(f"\nAnswer: {answer_only}\n")
        print("-" * 30)
        
    except Exception as e:
        print(f"An error occurred: {e}")