TroglodyteDerivations's picture
Upload 48 files
c28358e verified
#!/usr/bin/env python3
"""
Polished chat interface for GPT-OSS-120B with proper response parsing
"""
from mlx_lm import load, generate
import logging
import re
import time
from typing import List, Dict
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class GPTOSSChat:
def __init__(self):
logger.info("πŸš€ Loading GPT-OSS-120B...")
self.model, self.tokenizer = load("mlx-community/gpt-oss-120b-MXFP4-Q4")
logger.info("βœ… Model loaded successfully!")
def extract_final_response(self, response: str) -> str:
"""Extract the final assistant response from the chat template"""
# Look for the final assistant response
if "<|start|>assistant" in response:
parts = response.split("<|start|>assistant")
if len(parts) > 1:
final_part = parts[-1]
# Remove all channel and message tags
final_part = re.sub(r'<\|channel\|>[^<]+', '', final_part)
final_part = final_part.replace('<|message|>', '')
final_part = final_part.replace('<|end|>', '')
# Clean up any remaining tags or whitespace
final_part = re.sub(r'<[^>]+>', '', final_part)
final_part = final_part.strip()
if final_part:
return final_part
# Fallback: return the original response cleaned up
cleaned = re.sub(r'<\|[^>]+\|>', '', response)
cleaned = re.sub(r'<[^>]+>', '', cleaned)
return cleaned.strip()
def generate_response(self, prompt: str, max_tokens: int = 2048) -> str: # temp: float = 0.7
"""Generate a response with proper formatting"""
try:
# Format prompt with chat template
messages = [{"role": "user", "content": prompt}]
formatted_prompt = self.tokenizer.apply_chat_template(
messages, add_generation_prompt=True
)
# Generate response
response = generate(
self.model,
self.tokenizer,
prompt=formatted_prompt,
max_tokens=max_tokens,
#temp=temp,
verbose=False
)
# Extract and clean the final response
return self.extract_final_response(response)
except Exception as e:
logger.error(f"Generation error: {e}")
return f"I encountered an error: {str(e)}"
def interactive_chat(self):
"""Beautiful interactive chat interface"""
print("\n" + "=" * 60)
print("πŸ€– GPT-OSS-120B Chat Interface")
print("=" * 60)
print("πŸ’‘ Your M3 Ultra is running a 120B parameter model locally!")
print("🎯 Type your messages below (type '/quit' to exit)")
print("=" * 60)
conversation_history = []
while True:
try:
user_input = input("\nπŸ‘€ You: ").strip()
if user_input.lower() in ['/quit', '/exit', '/bye']:
print("πŸ‘‹ Goodbye! It was amazing chatting with you!")
break
if user_input.lower() == '/clear':
conversation_history = []
print("🧹 Conversation cleared!")
continue
if user_input.lower() == '/help':
print("\nπŸ“‹ Available commands:")
print(" /quit - Exit the chat")
print(" /clear - Clear conversation history")
print(" /help - Show this help message")
continue
if not user_input:
continue
# Generate response
print("πŸ’­ Thinking...", end="\r")
start_time = time.time()
response = self.generate_response(user_input, max_tokens=2048) # temp=0.7
generation_time = time.time() - start_time
# Add to conversation history
conversation_history.append({"user": user_input, "ai": response})
# Display response
print(f"πŸ€– AI ({generation_time:.1f}s): {response}")
except KeyboardInterrupt:
print("\n\nπŸ‘‹ Thanks for chatting! Goodbye!")
break
except Exception as e:
print(f"\n❌ Error: {e}")
def demonstration_mode():
"""Showcase the model's capabilities with beautiful formatting"""
print("\n" + "=" * 60)
print("🎭 GPT-OSS-120B Capabilities Demonstration")
print("=" * 60)
ai = GPTOSSChat()
demonstrations = [
{
"prompt": "Explain quantum computing like I'm 10 years old",
"description": "Simplified explanation"
},
{
"prompt": "Write a beautiful haiku about the ocean and technology",
"description": "Creative writing"
},
{
"prompt": "What are the most exciting recent developments in AI?",
"description": "Technical knowledge"
},
{
"prompt": "How would you describe the feeling of wonder to an alien?",
"description": "Philosophical reasoning"
},
{
"prompt": "Create a short story about a robot who discovers poetry",
"description": "Creative fiction"
}
]
for i, demo in enumerate(demonstrations, 1):
print(f"\n{i}. 🌟 {demo['description']}")
print(f" πŸ“ '{demo['prompt']}'")
response = ai.generate_response(demo['prompt'], max_tokens=2048)
# Format response with indentation
lines = response.split('\n')
for line in lines:
print(f" πŸ€– {line}")
print(" " + "─" * 50)
time.sleep(2) # Pause between demonstrations
if __name__ == "__main__":
print("πŸš€ Starting GPT-OSS-120B Chat System")
print("πŸ’Ύ Model: 120B parameters, 4-bit quantized")
print("🍎 Hardware: Apple M3 Ultra with 512GB RAM")
print("⚑ Performance: ~95 tokens/second")
# Create chat interface
chat = GPTOSSChat()
# Run demonstration
demonstration_mode()
# Start interactive chat
print("\n" + "=" * 60)
print("πŸ’¬ Starting Interactive Chat Mode...")
print("=" * 60)
chat.interactive_chat()