Spaces:

Noo88ear
/

Job-Application-Assistant

Runtime error

🚀 Initial deployment of Multi-Agent Job Application Assistant

7498f2c 5 months ago

1.61 kB

	from __future__ import annotations
	from typing import List, Tuple
	import re

	from sklearn.feature_extraction.text import TfidfVectorizer


	def normalize_whitespace(text: str) -> str:
	return re.sub(r"\s+", " ", text).strip()


	def extract_keywords_from_text(text: str, top_k: int = 30) -> List[str]:
	if not text:
	return []
	# Simple TF-IDF over character n-grams and words to capture phrases
	docs = [text]
	vectorizer = TfidfVectorizer(
	analyzer="word",
	ngram_range=(1, 3),
	stop_words="english",
	max_features=5000,
	)
	tfidf = vectorizer.fit_transform(docs)
	feature_array = vectorizer.get_feature_names_out()
	scores = tfidf.toarray()[0]
	pairs: List[Tuple[str, float]] = list(zip(feature_array, scores))
	pairs.sort(key=lambda p: p[1], reverse=True)
	keywords = [k for k, _ in pairs[:top_k]]
	# Clean keywords a bit
	keywords = [normalize_whitespace(k) for k in keywords if len(k) > 2]
	# Deduplicate while preserving order
	seen = set()
	deduped = []
	for k in keywords:
	if k not in seen:
	seen.add(k)
	deduped.append(k)
	return deduped


	def clamp_to_char_limit(text: str, max_chars: int) -> str:
	text = text.strip()
	if len(text) <= max_chars:
	return text
	# Try to cut at last newline before limit
	cut = text[:max_chars]
	last_nl = cut.rfind("\n")
	if last_nl > max_chars - 500: # avoid cutting too far back
	return cut[:last_nl].rstrip() + "\n"
	# Fallback
	return cut.rstrip() + "\n"