resume-llm-api / src /utils.py
mhr-212's picture
Upload folder using huggingface_hub
7e0c689 verified
# Helper utilities for the project
def parse_skill_match_score(score_str: str) -> int:
"""Extract numeric score from string"""
import re
match = re.search(r'\d+', score_str)
return int(match.group(0)) if match else 50
def format_experience_duration(years_str: str) -> str:
"""Standardize experience duration format"""
import re
match = re.search(r'\d+', years_str)
if match:
years = int(match.group(0))
return f"{years} years"
return years_str
def clean_text(text: str) -> str:
"""Clean and normalize text"""
import re
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text)
# Remove special characters
text = re.sub(r'[^\w\s\-@.]', '', text)
return text.strip()
def skill_similarity(skill1: str, skill2: str) -> float:
"""Calculate similarity between two skills"""
from difflib import SequenceMatcher
return SequenceMatcher(None, skill1.lower(), skill2.lower()).ratio()
def batch_process(items: list, batch_size: int = 32):
"""Process items in batches"""
for i in range(0, len(items), batch_size):
yield items[i:i+batch_size]
# Model conversion utilities
def convert_to_onnx(model_path: str, output_path: str):
"""Convert fine-tuned model to ONNX format for faster inference"""
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Export to ONNX
import torch
dummy_input = torch.tensor([[tokenizer.eos_token_id]])
torch.onnx.export(
model,
dummy_input,
output_path,
input_names=['input_ids'],
output_names=['output'],
dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}},
opset_version=12
)
print(f"✅ Model exported to {output_path}")