Spaces:
Running
Running
| # Helper utilities for the project | |
| def parse_skill_match_score(score_str: str) -> int: | |
| """Extract numeric score from string""" | |
| import re | |
| match = re.search(r'\d+', score_str) | |
| return int(match.group(0)) if match else 50 | |
| def format_experience_duration(years_str: str) -> str: | |
| """Standardize experience duration format""" | |
| import re | |
| match = re.search(r'\d+', years_str) | |
| if match: | |
| years = int(match.group(0)) | |
| return f"{years} years" | |
| return years_str | |
| def clean_text(text: str) -> str: | |
| """Clean and normalize text""" | |
| import re | |
| # Remove extra whitespace | |
| text = re.sub(r'\s+', ' ', text) | |
| # Remove special characters | |
| text = re.sub(r'[^\w\s\-@.]', '', text) | |
| return text.strip() | |
| def skill_similarity(skill1: str, skill2: str) -> float: | |
| """Calculate similarity between two skills""" | |
| from difflib import SequenceMatcher | |
| return SequenceMatcher(None, skill1.lower(), skill2.lower()).ratio() | |
| def batch_process(items: list, batch_size: int = 32): | |
| """Process items in batches""" | |
| for i in range(0, len(items), batch_size): | |
| yield items[i:i+batch_size] | |
| # Model conversion utilities | |
| def convert_to_onnx(model_path: str, output_path: str): | |
| """Convert fine-tuned model to ONNX format for faster inference""" | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| model = AutoModelForCausalLM.from_pretrained(model_path) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| # Export to ONNX | |
| import torch | |
| dummy_input = torch.tensor([[tokenizer.eos_token_id]]) | |
| torch.onnx.export( | |
| model, | |
| dummy_input, | |
| output_path, | |
| input_names=['input_ids'], | |
| output_names=['output'], | |
| dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}}, | |
| opset_version=12 | |
| ) | |
| print(f"✅ Model exported to {output_path}") | |