# Helper utilities for the project

def parse_skill_match_score(score_str: str) -> int:
    """Extract numeric score from string"""
    import re
    match = re.search(r'\d+', score_str)
    return int(match.group(0)) if match else 50

def format_experience_duration(years_str: str) -> str:
    """Standardize experience duration format"""
    import re
    match = re.search(r'\d+', years_str)
    if match:
        years = int(match.group(0))
        return f"{years} years"
    return years_str

def clean_text(text: str) -> str:
    """Clean and normalize text"""
    import re
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text)
    # Remove special characters
    text = re.sub(r'[^\w\s\-@.]', '', text)
    return text.strip()

def skill_similarity(skill1: str, skill2: str) -> float:
    """Calculate similarity between two skills"""
    from difflib import SequenceMatcher
    return SequenceMatcher(None, skill1.lower(), skill2.lower()).ratio()

def batch_process(items: list, batch_size: int = 32):
    """Process items in batches"""
    for i in range(0, len(items), batch_size):
        yield items[i:i+batch_size]

# Model conversion utilities
def convert_to_onnx(model_path: str, output_path: str):
    """Convert fine-tuned model to ONNX format for faster inference"""
    from transformers import AutoModelForCausalLM, AutoTokenizer
    
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # Export to ONNX
    import torch
    dummy_input = torch.tensor([[tokenizer.eos_token_id]])
    
    torch.onnx.export(
        model,
        dummy_input,
        output_path,
        input_names=['input_ids'],
        output_names=['output'],
        dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}},
        opset_version=12
    )
    
    print(f"✅ Model exported to {output_path}")