Spaces:

mhr-212
/

resume-llm-api

Running

App Files Files Community

resume-llm-api / src /utils.py

mhr-212

Upload folder using huggingface_hub

7e0c689 verified 1 day ago

raw

history blame contribute delete

1.96 kB

	# Helper utilities for the project

	def parse_skill_match_score(score_str: str) -> int:
	"""Extract numeric score from string"""
	import re
	match = re.search(r'\d+', score_str)
	return int(match.group(0)) if match else 50

	def format_experience_duration(years_str: str) -> str:
	"""Standardize experience duration format"""
	import re
	match = re.search(r'\d+', years_str)
	if match:
	years = int(match.group(0))
	return f"{years} years"
	return years_str

	def clean_text(text: str) -> str:
	"""Clean and normalize text"""
	import re
	# Remove extra whitespace
	text = re.sub(r'\s+', ' ', text)
	# Remove special characters
	text = re.sub(r'[^\w\s\-@.]', '', text)
	return text.strip()

	def skill_similarity(skill1: str, skill2: str) -> float:
	"""Calculate similarity between two skills"""
	from difflib import SequenceMatcher
	return SequenceMatcher(None, skill1.lower(), skill2.lower()).ratio()

	def batch_process(items: list, batch_size: int = 32):
	"""Process items in batches"""
	for i in range(0, len(items), batch_size):
	yield items[i:i+batch_size]

	# Model conversion utilities
	def convert_to_onnx(model_path: str, output_path: str):
	"""Convert fine-tuned model to ONNX format for faster inference"""
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model = AutoModelForCausalLM.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	# Export to ONNX
	import torch
	dummy_input = torch.tensor([[tokenizer.eos_token_id]])

	torch.onnx.export(
	model,
	dummy_input,
	output_path,
	input_names=['input_ids'],
	output_names=['output'],
	dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}},
	opset_version=12
	)

	print(f"✅ Model exported to {output_path}")