Map-NEO / test_conversational_neo.py
Austin207's picture
Upload folder using huggingface_hub
a683148 verified
"""test_conversational_neo_manual.py - Manual generation for NeoMini model"""
from model_neo import NeoMini, NeoMiniConfig
from transformers import AutoTokenizer
import torch
import torch.nn.functional as F
import json
def load_conversational_model(model_path="conversational_neo_extended"):
"""Load the fine-tuned conversational model"""
print("Loading fine-tuned conversational model...")
# Load config
try:
with open(f"{model_path}/model_config.json", 'r') as f:
model_config = json.load(f)
max_seq_len = model_config.get('max_seq_len', 4096)
except:
max_seq_len = 4096
# Load model
config = NeoMiniConfig()
config.max_seq_len = max_seq_len
model = NeoMini(config)
checkpoint = torch.load(f"{model_path}/conversational_model.pt", map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()
# Move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"βœ… Model loaded with {max_seq_len} token context window on {device}")
return model, tokenizer, device
def generate_response(model, tokenizer, prompt, device, max_new_tokens=200, temperature=0.8, top_k=50, top_p=0.9):
"""Manual text generation for NeoMini model"""
# Tokenize input
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
original_length = input_ids.shape[1]
# Generate tokens one by one
with torch.no_grad():
for step in range(max_new_tokens):
# Forward pass
logits = model(input_ids)
# Get logits for the last token
next_token_logits = logits[0, -1, :] / temperature
# Apply top-k filtering
if top_k > 0:
top_k_logits, top_k_indices = torch.topk(next_token_logits, top_k)
next_token_logits = torch.full_like(next_token_logits, float('-inf'))
next_token_logits.scatter_(0, top_k_indices, top_k_logits)
# Apply top-p filtering
if top_p < 1.0:
sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
# Remove tokens with cumulative probability above threshold
sorted_indices_to_remove = cumulative_probs > top_p
sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone()
sorted_indices_to_remove[0] = 0
indices_to_remove = sorted_indices[sorted_indices_to_remove]
next_token_logits[indices_to_remove] = float('-inf')
# Sample next token
probs = F.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
# Append to sequence
input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)
# Stop if EOS token or max context reached
if next_token.item() == tokenizer.eos_token_id:
break
if input_ids.shape[1] >= model.config.max_seq_len:
break
# Decode only the generated part
generated_tokens = input_ids[0, original_length:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
return generated_text.strip()
def chat_with_model(model, tokenizer, device):
"""Interactive chat with the conversational model"""
print("\nπŸ€– MAP-NEO Conversational AI (Fine-Tuned)")
print("Type 'quit' to exit, 'clear' to clear history, 'test' for quality tests")
print("="*70)
conversation_history = []
system_prompt = "You are MAP-NEO, a helpful, harmless, and honest AI assistant. Engage in natural conversation and provide thoughtful, accurate responses."
while True:
user_input = input("\nπŸ§‘ You: ").strip()
if user_input.lower() in ['quit', 'exit']:
print("πŸ‘‹ Goodbye!")
break
if user_input.lower() == 'clear':
conversation_history = []
print("πŸ”„ Conversation cleared!")
continue
if user_input.lower() == 'test':
test_model_quality(model, tokenizer, device)
continue
if not user_input:
continue
# Build conversation context
conversation_history.append(f"User: {user_input}")
# Keep recent context (last 10 messages)
recent_context = conversation_history[-10:]
context = "\n".join(recent_context)
prompt = f"{system_prompt}\n\n{context}\nAssistant:"
# Check prompt length and truncate if needed
prompt_tokens = tokenizer.encode(prompt)
if len(prompt_tokens) > 1800: # Leave room for response
recent_context = conversation_history[-6:]
context = "\n".join(recent_context)
prompt = f"{system_prompt}\n\n{context}\nAssistant:"
print("πŸ€– MAP-NEO: ", end="", flush=True)
# Generate response
try:
assistant_response = generate_response(
model, tokenizer, prompt, device,
max_new_tokens=150,
temperature=0.8,
top_k=50,
top_p=0.9
)
# Clean up response
if assistant_response.startswith("Assistant:"):
assistant_response = assistant_response[10:].strip()
print(assistant_response)
# Add to history
conversation_history.append(f"Assistant: {assistant_response}")
# Show token usage
total_tokens = len(tokenizer.encode(prompt + assistant_response))
print(f" πŸ“Š Tokens: {total_tokens}/4096 ({total_tokens/4096*100:.1f}%)")
except Exception as e:
print(f"❌ Error generating response: {e}")
print("Try again with a different prompt.")
def test_model_quality(model, tokenizer, device):
"""Test model quality with sample prompts"""
print("\nπŸ§ͺ Testing Model Quality...")
print("="*60)
test_prompts = [
"Hello! Can you help me understand machine learning?",
"What's the difference between AI and machine learning?",
"I'm feeling stressed about work. Any advice?",
"Can you write a short story about a robot?",
"Explain quantum physics in simple terms.",
"How do I make a good cup of coffee?",
"What are the benefits of exercise?"
]
system_prompt = "You are MAP-NEO, a helpful, harmless, and honest AI assistant. Engage in natural conversation and provide thoughtful, accurate responses."
for i, user_prompt in enumerate(test_prompts[:5], 1): # Test first 5
print(f"\n--- Test {i}/5 ---")
print(f"πŸ§‘ User: {user_prompt}")
prompt = f"{system_prompt}\n\nUser: {user_prompt}\nAssistant:"
try:
assistant_response = generate_response(
model, tokenizer, prompt, device,
max_new_tokens=120,
temperature=0.7,
top_k=50,
top_p=0.9
)
print(f"πŸ€– MAP-NEO: {assistant_response}")
except Exception as e:
print(f"❌ Error: {e}")
print(f"\nβœ… Quality tests completed!")
def compare_before_after(model, tokenizer, device):
"""Compare responses before and after fine-tuning"""
print("\nπŸ“Š Before vs After Fine-Tuning Comparison")
print("="*60)
# Load original model for comparison
try:
print("Loading original model for comparison...")
original_config = NeoMiniConfig()
original_model = NeoMini(original_config)
original_checkpoint = torch.load('checkpoints/checkpoint_step_99999.pt', map_location='cpu')
original_model.load_state_dict(original_checkpoint['model_state_dict'])
original_model.eval().to(device)
test_prompt = "Hello! Can you help me learn about artificial intelligence?"
prompt = f"You are MAP-NEO, a helpful AI assistant.\n\nUser: {test_prompt}\nAssistant:"
# Original model response
print(f"\nπŸ§‘ User: {test_prompt}")
print("\nπŸ€– Original Model:")
original_response = generate_response(original_model, tokenizer, prompt, device, max_new_tokens=100, temperature=0.7)
print(original_response)
# Fine-tuned model response
print("\nπŸ€– Fine-Tuned Model:")
finetuned_response = generate_response(model, tokenizer, prompt, device, max_new_tokens=100, temperature=0.7)
print(finetuned_response)
print("\nπŸ“ˆ The fine-tuned model should be much more conversational and helpful!")
except Exception as e:
print(f"Comparison unavailable: {e}")
if __name__ == "__main__":
print("πŸš€ MAP-NEO Conversational AI Testing Suite")
print("="*60)
# Load model
model, tokenizer, device = load_conversational_model()
# Test model quality
test_model_quality(model, tokenizer, device)
# Compare with original if available
compare_before_after(model, tokenizer, device)
print("\n" + "="*70)
print("πŸŽ‰ Ready for interactive conversation!")
print("="*70)
# Start interactive chat
chat_with_model(model, tokenizer, device)