CAI-20B / inference.py

Upload CAI-20B Marketing Strategy Expert model

a691c57 verified 9 months ago

5.34 kB

	#!/usr/bin/env python3
	"""
	Example inference script for CAI-20B Marketing Strategy Expert
	"""

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import re

	class ResponseCleaner:
	"""Clean up model responses to remove artifacts"""

	def __init__(self):
	self.artifact_patterns = [
	r'<\\|[^>]+\\|>', # Special tokens
	r'assistantfinal',
	r'assistant\s*final',
	r'We need to.*?(?=\n\|$)',
	r'Let me.*?(?=\n\|$)',
	r'I need to.*?(?=\n\|$)',
	r'\\n\\n\\n+', # Multiple newlines
	]

	def clean_response(self, text):
	"""Clean artifacts from response"""
	cleaned = text

	# Remove artifacts
	for pattern in self.artifact_patterns:
	cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)

	# Clean up spacing
	cleaned = re.sub(r'\s+', ' ', cleaned).strip()

	# Ensure proper ending
	if cleaned and cleaned[-1] not in '.!?':
	last_sentence = cleaned.split('.')[-1].strip()
	if len(last_sentence) < 20:
	parts = cleaned.rsplit('.', 1)
	if len(parts) > 1:
	cleaned = parts[0] + '.'
	else:
	cleaned += '.'

	return cleaned


	class CAI20BMarketing:
	"""CAI-20B Marketing Strategy Expert Model"""

	def __init__(self, model_name="tigres2526/CAI-20B", device="cuda"):
	"""Initialize the model and tokenizer"""
	print("Loading CAI-20B Marketing Strategy Expert...")

	self.device = device
	self.cleaner = ResponseCleaner()

	# Load tokenizer
	self.tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)
	self.tokenizer.pad_token = self.tokenizer.eos_token

	# Load model
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True
	)
	self.model.eval()

	print("✅ Model loaded successfully!")

	def generate(
	self,
	question,
	max_new_tokens=250,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	clean_output=True
	):
	"""Generate marketing advice for a given question"""

	# Format prompt
	prompt = f"""You are a marketing strategy expert specializing in performance marketing, creative development, and conversion optimization.
	Provide practical, actionable advice grounded in real-world experience.

	User: {question}
	Assistant:"""

	# Tokenize
	inputs = self.tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	max_length=2048
	).to(self.model.device)

	# Generate
	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	pad_token_id=self.tokenizer.pad_token_id,
	eos_token_id=self.tokenizer.eos_token_id,
	no_repeat_ngram_size=3,
	)

	# Decode
	response = self.tokenizer.decode(
	outputs[0][inputs['input_ids'].shape[1]:],
	skip_special_tokens=True
	)

	# Clean if requested
	if clean_output:
	response = self.cleaner.clean_response(response)

	return response

	def chat(self):
	"""Interactive chat mode"""
	print("\n" + "=" * 70)
	print("CAI-20B Marketing Strategy Expert - Interactive Chat")
	print("Type 'exit' to quit")
	print("=" * 70 + "\n")

	while True:
	user_input = input("You: ").strip()

	if user_input.lower() == 'exit':
	print("Goodbye!")
	break

	if not user_input:
	continue

	# Generate response
	response = self.generate(user_input)

	# Display
	print(f"\nCAI-20B: {response}\n")
	print("-" * 70 + "\n")


	def main():
	"""Example usage"""

	# Initialize model
	model = CAI20BMarketing()

	# Example questions
	test_questions = [
	"What are the top 3 marketing channels for a B2B SaaS startup?",
	"How should I allocate a $10K monthly marketing budget?",
	"What's the difference between CAC and LTV?",
	]

	print("\n" + "=" * 70)
	print("Running example questions...")
	print("=" * 70 + "\n")

	for i, question in enumerate(test_questions, 1):
	print(f"Q{i}: {question}")
	response = model.generate(question)
	print(f"A: {response}\n")
	print("-" * 50 + "\n")

	# Optional: Start interactive chat
	print("\nWould you like to start interactive chat? (y/n)")
	if input().lower() == 'y':
	model.chat()


	if __name__ == "__main__":
	main()