Update handler.py
Browse files- handler.py +142 -119
handler.py
CHANGED
|
@@ -32,7 +32,7 @@ class EndpointHandler:
|
|
| 32 |
|
| 33 |
# Load model immediately
|
| 34 |
self.load_model()
|
| 35 |
-
|
| 36 |
def generate_optimized(self, inputs, attention_mask=None, max_new_tokens=512):
|
| 37 |
"""
|
| 38 |
Optimized generation function that maximizes GPU utilization
|
|
@@ -45,32 +45,49 @@ class EndpointHandler:
|
|
| 45 |
# Find input length to properly calculate output length
|
| 46 |
input_length = inputs.shape[1]
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
|
| 50 |
-
inputs,
|
| 51 |
-
attention_mask
|
| 52 |
-
max_new_tokens
|
| 53 |
|
| 54 |
# Performance options
|
| 55 |
-
use_cache
|
| 56 |
|
| 57 |
# Quality vs. speed tradeoff
|
| 58 |
-
temperature
|
| 59 |
-
top_p
|
| 60 |
-
do_sample
|
| 61 |
-
num_beams
|
| 62 |
|
| 63 |
# Token handling
|
| 64 |
-
pad_token_id
|
| 65 |
-
eos_token_id
|
| 66 |
|
| 67 |
# Content quality
|
| 68 |
-
repetition_penalty
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
return outputs, input_length
|
| 76 |
|
|
@@ -124,7 +141,8 @@ class EndpointHandler:
|
|
| 124 |
if os.path.exists(os.path.join(self.model_dir, "adapter_model.safetensors")):
|
| 125 |
print("Found adapter model, loading Phi-2 base with adapter")
|
| 126 |
|
| 127 |
-
# Check if PEFT is available
|
|
|
|
| 128 |
if not PEFT_AVAILABLE:
|
| 129 |
print("PEFT not available, installing...")
|
| 130 |
try:
|
|
@@ -189,22 +207,40 @@ class EndpointHandler:
|
|
| 189 |
device_map="auto",
|
| 190 |
)
|
| 191 |
|
| 192 |
-
# Check for Flash Attention support
|
| 193 |
try:
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
self.flash_attention_supported = False
|
| 200 |
|
| 201 |
# Enable TF32 precision for higher performance on newer NVIDIA GPUs
|
| 202 |
if self.device == "cuda":
|
| 203 |
# Only available on Ampere+ GPUs (A100, RTX 3090, etc.)
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
print(f"Model loaded successfully on {self.device}")
|
| 210 |
return True
|
|
@@ -248,7 +284,7 @@ class EndpointHandler:
|
|
| 248 |
# Format candidate information
|
| 249 |
candidate_summary = self.format_candidates_for_prompt(candidates)
|
| 250 |
|
| 251 |
-
# Build
|
| 252 |
prompt = f"""Analyze these candidates and create THREE different optimal startup team compositions of {team_size} people each.
|
| 253 |
|
| 254 |
CANDIDATES:
|
|
@@ -258,31 +294,32 @@ TEAM REQUIREMENTS:
|
|
| 258 |
{requirements or "Create a balanced team with complementary skills"}
|
| 259 |
|
| 260 |
For EACH team composition, please provide:
|
| 261 |
-
1. Team Name:
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
-
|
| 265 |
-
-
|
| 266 |
-
-
|
| 267 |
-
|
| 268 |
-
3. Team Analysis:
|
| 269 |
-
-
|
| 270 |
-
-
|
| 271 |
-
-
|
| 272 |
-
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
| 281 |
"""
|
| 282 |
|
| 283 |
-
# Format as chat with
|
| 284 |
messages = [
|
| 285 |
-
{"role": "system", "content": "You are an elite startup advisor with deep expertise in team composition
|
| 286 |
{"role": "user", "content": prompt}
|
| 287 |
]
|
| 288 |
|
|
@@ -307,17 +344,11 @@ Use clear headings. Be direct and concise with minimal filler language or unnece
|
|
| 307 |
# Create attention mask (explicitly handle padding)
|
| 308 |
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long()
|
| 309 |
|
| 310 |
-
#
|
| 311 |
-
outputs = self.
|
| 312 |
inputs,
|
| 313 |
attention_mask=attention_mask,
|
| 314 |
-
max_new_tokens=max_new_tokens
|
| 315 |
-
temperature=0.7,
|
| 316 |
-
top_p=0.9,
|
| 317 |
-
do_sample=True,
|
| 318 |
-
pad_token_id=self.tokenizer.pad_token_id,
|
| 319 |
-
eos_token_id=self.tokenizer.eos_token_id,
|
| 320 |
-
repetition_penalty=1.1
|
| 321 |
)
|
| 322 |
|
| 323 |
# Decode more carefully
|
|
@@ -374,43 +405,42 @@ Use clear headings. Be direct and concise with minimal filler language or unnece
|
|
| 374 |
# Format team information
|
| 375 |
team_summary = self.format_candidates_for_prompt(team)
|
| 376 |
|
| 377 |
-
# Build
|
| 378 |
-
prompt = f"""Analyze this existing startup team:
|
| 379 |
|
| 380 |
TEAM MEMBERS:
|
| 381 |
{team_summary}
|
| 382 |
|
| 383 |
Please provide:
|
| 384 |
|
| 385 |
-
1. Team Composition Analysis:
|
| 386 |
-
-
|
| 387 |
-
-
|
| 388 |
-
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
-
|
| 393 |
-
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
-
|
| 398 |
-
-
|
| 399 |
-
|
| 400 |
-
Be direct and concise with minimal filler language. Focus on actionable insights rather than generalizations.
|
| 401 |
"""
|
| 402 |
|
| 403 |
if include_startup_comparison:
|
| 404 |
prompt += """
|
| 405 |
-
4. Comparison to Successful Startups:
|
| 406 |
-
-
|
| 407 |
-
-
|
| 408 |
-
-
|
| 409 |
"""
|
| 410 |
|
| 411 |
-
# Format as chat with
|
| 412 |
messages = [
|
| 413 |
-
{"role": "system", "content": "You are an elite startup advisor with deep expertise in team composition
|
| 414 |
{"role": "user", "content": prompt}
|
| 415 |
]
|
| 416 |
|
|
@@ -435,17 +465,11 @@ Be direct and concise with minimal filler language. Focus on actionable insights
|
|
| 435 |
# Create attention mask (explicitly handle padding)
|
| 436 |
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long()
|
| 437 |
|
| 438 |
-
#
|
| 439 |
-
outputs = self.
|
| 440 |
inputs,
|
| 441 |
attention_mask=attention_mask,
|
| 442 |
-
max_new_tokens=max_new_tokens
|
| 443 |
-
temperature=0.7,
|
| 444 |
-
top_p=0.9,
|
| 445 |
-
do_sample=True,
|
| 446 |
-
pad_token_id=self.tokenizer.pad_token_id,
|
| 447 |
-
eos_token_id=self.tokenizer.eos_token_id,
|
| 448 |
-
repetition_penalty=1.1
|
| 449 |
)
|
| 450 |
|
| 451 |
# Decode more carefully
|
|
@@ -524,8 +548,8 @@ Be direct and concise with minimal filler language. Focus on actionable insights
|
|
| 524 |
skills = candidate['skills'] if isinstance(candidate['skills'], list) else [candidate['skills']]
|
| 525 |
skills_info = ", ".join(skills)
|
| 526 |
|
| 527 |
-
# Build
|
| 528 |
-
prompt = f"""Analyze this candidate for a startup founder or early employee role:
|
| 529 |
|
| 530 |
CANDIDATE PROFILE:
|
| 531 |
Name: {name}
|
|
@@ -539,30 +563,35 @@ Experience:
|
|
| 539 |
Skills:
|
| 540 |
{skills_info}
|
| 541 |
|
| 542 |
-
Please provide a
|
| 543 |
|
| 544 |
-
1. Strengths Analysis:
|
| 545 |
- Key professional strengths based on background and skills
|
| 546 |
-
-
|
|
|
|
| 547 |
|
| 548 |
-
2. Founder/Early Employee Fit:
|
| 549 |
- Assessment of suitability for founder or early employee roles
|
|
|
|
|
|
|
| 550 |
- Ideal role recommendations in a startup team
|
| 551 |
|
| 552 |
-
3. Complementary Team Members:
|
| 553 |
-
- What types of co-founders would complement this candidate
|
|
|
|
| 554 |
- Skills gaps that should be filled by other team members
|
| 555 |
|
| 556 |
-
4. Risk Assessment:
|
| 557 |
-
- Potential blind spots or weaknesses
|
| 558 |
-
- Areas where the candidate might need support
|
|
|
|
| 559 |
|
| 560 |
-
|
| 561 |
"""
|
| 562 |
|
| 563 |
-
# Format as chat with
|
| 564 |
messages = [
|
| 565 |
-
{"role": "system", "content": "You are an elite talent assessor specializing in startup founders and early employees. You provide
|
| 566 |
{"role": "user", "content": prompt}
|
| 567 |
]
|
| 568 |
|
|
@@ -587,17 +616,11 @@ Be direct and concise with minimal filler language. Focus on actionable insights
|
|
| 587 |
# Create attention mask (explicitly handle padding)
|
| 588 |
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long()
|
| 589 |
|
| 590 |
-
#
|
| 591 |
-
outputs = self.
|
| 592 |
inputs,
|
| 593 |
attention_mask=attention_mask,
|
| 594 |
-
max_new_tokens=max_new_tokens
|
| 595 |
-
temperature=0.7,
|
| 596 |
-
top_p=0.9,
|
| 597 |
-
do_sample=True,
|
| 598 |
-
pad_token_id=self.tokenizer.pad_token_id,
|
| 599 |
-
eos_token_id=self.tokenizer.eos_token_id,
|
| 600 |
-
repetition_penalty=1.1
|
| 601 |
)
|
| 602 |
|
| 603 |
# Decode more carefully
|
|
@@ -908,7 +931,7 @@ Return a JSON array containing ONLY the candidate numbers (starting from 1) that
|
|
| 908 |
return {
|
| 909 |
"team_analysis": team_analysis,
|
| 910 |
"model_info": {
|
| 911 |
-
"
|
| 912 |
"model_type": "phi-2-qlora-finetuned"
|
| 913 |
}
|
| 914 |
}
|
|
|
|
| 32 |
|
| 33 |
# Load model immediately
|
| 34 |
self.load_model()
|
| 35 |
+
|
| 36 |
def generate_optimized(self, inputs, attention_mask=None, max_new_tokens=512):
|
| 37 |
"""
|
| 38 |
Optimized generation function that maximizes GPU utilization
|
|
|
|
| 45 |
# Find input length to properly calculate output length
|
| 46 |
input_length = inputs.shape[1]
|
| 47 |
|
| 48 |
+
# Basic generation parameters
|
| 49 |
+
generation_kwargs = {
|
| 50 |
+
"inputs": inputs,
|
| 51 |
+
"attention_mask": attention_mask,
|
| 52 |
+
"max_new_tokens": max_new_tokens,
|
| 53 |
|
| 54 |
# Performance options
|
| 55 |
+
"use_cache": True, # Use KV cache for faster generation
|
| 56 |
|
| 57 |
# Quality vs. speed tradeoff
|
| 58 |
+
"temperature": 0.7 if self.use_sampling else 1.0,
|
| 59 |
+
"top_p": 0.9 if self.use_sampling else 1.0,
|
| 60 |
+
"do_sample": self.use_sampling, # Sampling is slightly slower but better quality
|
| 61 |
+
"num_beams": 1, # Beam search is slower but better quality (1 = no beam search)
|
| 62 |
|
| 63 |
# Token handling
|
| 64 |
+
"pad_token_id": self.tokenizer.pad_token_id,
|
| 65 |
+
"eos_token_id": self.tokenizer.eos_token_id,
|
| 66 |
|
| 67 |
# Content quality
|
| 68 |
+
"repetition_penalty": 1.1, # Reduce repetition
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
# Add Flash Attention parameters only if supported by the transformers version
|
| 72 |
+
# We check the transformer version by testing in a safe way
|
| 73 |
+
try:
|
| 74 |
+
import importlib
|
| 75 |
+
transformers_version = importlib.import_module('transformers').__version__
|
| 76 |
+
major, minor = map(int, transformers_version.split('.')[:2])
|
| 77 |
+
|
| 78 |
+
if major > 4 or (major == 4 and minor >= 32):
|
| 79 |
+
# Flash Attention support was added in transformers 4.32.0
|
| 80 |
+
if self.flash_attention_supported:
|
| 81 |
+
print("Using Flash Attention in generation")
|
| 82 |
+
generation_kwargs["flash_attn"] = True
|
| 83 |
+
generation_kwargs["flash_attn_cross_entropy"] = True
|
| 84 |
+
else:
|
| 85 |
+
print(f"Flash Attention not added - transformers version {transformers_version} doesn't support it")
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"Error checking transformers version, skipping Flash Attention: {e}")
|
| 88 |
+
|
| 89 |
+
# Generate with optimized parameters for GPU performance
|
| 90 |
+
outputs = self.model.generate(**generation_kwargs)
|
| 91 |
|
| 92 |
return outputs, input_length
|
| 93 |
|
|
|
|
| 141 |
if os.path.exists(os.path.join(self.model_dir, "adapter_model.safetensors")):
|
| 142 |
print("Found adapter model, loading Phi-2 base with adapter")
|
| 143 |
|
| 144 |
+
# Check if PEFT is available - using the global variable
|
| 145 |
+
global PEFT_AVAILABLE
|
| 146 |
if not PEFT_AVAILABLE:
|
| 147 |
print("PEFT not available, installing...")
|
| 148 |
try:
|
|
|
|
| 207 |
device_map="auto",
|
| 208 |
)
|
| 209 |
|
| 210 |
+
# Check for Flash Attention support with better error handling
|
| 211 |
try:
|
| 212 |
+
# First check if the transformers version supports it
|
| 213 |
+
import importlib
|
| 214 |
+
transformers_version = importlib.import_module('transformers').__version__
|
| 215 |
+
major, minor = map(int, transformers_version.split('.')[:2])
|
| 216 |
+
|
| 217 |
+
if major > 4 or (major == 4 and minor >= 32):
|
| 218 |
+
# Flash Attention support was added in transformers 4.32.0
|
| 219 |
+
try:
|
| 220 |
+
import flash_attn
|
| 221 |
+
self.flash_attention_supported = True
|
| 222 |
+
print(f"Flash Attention {flash_attn.__version__} detected and will be used if available!")
|
| 223 |
+
except ImportError:
|
| 224 |
+
print("Flash Attention library not installed. Using standard attention mechanism.")
|
| 225 |
+
self.flash_attention_supported = False
|
| 226 |
+
else:
|
| 227 |
+
print(f"Transformers version {transformers_version} doesn't support Flash Attention parameters. Using standard attention.")
|
| 228 |
+
self.flash_attention_supported = False
|
| 229 |
+
except Exception as e:
|
| 230 |
+
print(f"Error checking Flash Attention support: {e}")
|
| 231 |
+
print("Falling back to standard attention mechanism.")
|
| 232 |
self.flash_attention_supported = False
|
| 233 |
|
| 234 |
# Enable TF32 precision for higher performance on newer NVIDIA GPUs
|
| 235 |
if self.device == "cuda":
|
| 236 |
# Only available on Ampere+ GPUs (A100, RTX 3090, etc.)
|
| 237 |
+
try:
|
| 238 |
+
if torch.cuda.get_device_capability()[0] >= 8:
|
| 239 |
+
print("Enabling TF32 precision for faster matrix operations")
|
| 240 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 241 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 242 |
+
except Exception as e:
|
| 243 |
+
print(f"Error enabling TF32 precision: {e}")
|
| 244 |
|
| 245 |
print(f"Model loaded successfully on {self.device}")
|
| 246 |
return True
|
|
|
|
| 284 |
# Format candidate information
|
| 285 |
candidate_summary = self.format_candidates_for_prompt(candidates)
|
| 286 |
|
| 287 |
+
# Build the enhanced prompt for more detailed analysis
|
| 288 |
prompt = f"""Analyze these candidates and create THREE different optimal startup team compositions of {team_size} people each.
|
| 289 |
|
| 290 |
CANDIDATES:
|
|
|
|
| 294 |
{requirements or "Create a balanced team with complementary skills"}
|
| 295 |
|
| 296 |
For EACH team composition, please provide:
|
| 297 |
+
1. Team Name: Give this team composition a memorable name based on its strengths
|
| 298 |
+
2. Selected Members: List each selected team member with:
|
| 299 |
+
- Their name
|
| 300 |
+
- Recommended role in the team
|
| 301 |
+
- 2-3 sentences on WHY they specifically are valuable to this team composition
|
| 302 |
+
- How they complement other team members
|
| 303 |
+
|
| 304 |
+
3. Team Analysis (minimum 250 words):
|
| 305 |
+
- Detailed strengths of this specific team combination
|
| 306 |
+
- Potential weaknesses or challenges this team might face
|
| 307 |
+
- Assessment of skill coverage and diversity of thinking
|
| 308 |
+
- Team dynamics and how members would likely work together
|
| 309 |
+
- How this team aligns with the stated requirements
|
| 310 |
+
|
| 311 |
+
4. Alternative Applications:
|
| 312 |
+
- What type of startup would be MOST successful with this team
|
| 313 |
+
- What type of startup would be LEAST successful with this team
|
| 314 |
+
|
| 315 |
+
After presenting all three team compositions, provide a final recommendation on which team would be best and why.
|
| 316 |
+
|
| 317 |
+
Format your response carefully with clear headings and make it comprehensive enough for founders to make informed decisions.
|
| 318 |
"""
|
| 319 |
|
| 320 |
+
# Format as chat with improved system prompt
|
| 321 |
messages = [
|
| 322 |
+
{"role": "system", "content": "You are an elite startup advisor with deep expertise in team composition and founder dynamics. You specialize in analyzing candidate profiles and determining optimal team compositions that maximize chances of startup success."},
|
| 323 |
{"role": "user", "content": prompt}
|
| 324 |
]
|
| 325 |
|
|
|
|
| 344 |
# Create attention mask (explicitly handle padding)
|
| 345 |
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long()
|
| 346 |
|
| 347 |
+
# Use the optimized generator instead of direct model.generate call
|
| 348 |
+
outputs, input_length = self.generate_optimized(
|
| 349 |
inputs,
|
| 350 |
attention_mask=attention_mask,
|
| 351 |
+
max_new_tokens=max_new_tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
)
|
| 353 |
|
| 354 |
# Decode more carefully
|
|
|
|
| 405 |
# Format team information
|
| 406 |
team_summary = self.format_candidates_for_prompt(team)
|
| 407 |
|
| 408 |
+
# Build the prompt
|
| 409 |
+
prompt = f"""Analyze this existing startup team in depth:
|
| 410 |
|
| 411 |
TEAM MEMBERS:
|
| 412 |
{team_summary}
|
| 413 |
|
| 414 |
Please provide:
|
| 415 |
|
| 416 |
+
1. Team Composition Analysis (minimum 150 words):
|
| 417 |
+
- Overall assessment of the team's strengths and complementary skills
|
| 418 |
+
- Key skill coverage and potential skill gaps
|
| 419 |
+
- Team dynamics and how members would likely work together
|
| 420 |
+
- Potential areas of conflict or collaboration challenges
|
| 421 |
+
|
| 422 |
+
2. Success Factors (minimum 100 words):
|
| 423 |
+
- What types of startups would be MOST successful with this team
|
| 424 |
+
- Key advantages this team has compared to typical startup teams
|
| 425 |
+
- How team members' backgrounds create competitive advantages
|
| 426 |
+
|
| 427 |
+
3. Risk Factors (minimum 100 words):
|
| 428 |
+
- What types of startups would be LEAST successful with this team
|
| 429 |
+
- Potential blind spots or weaknesses in the team composition
|
| 430 |
+
- Suggested additions or changes to strengthen the team
|
|
|
|
| 431 |
"""
|
| 432 |
|
| 433 |
if include_startup_comparison:
|
| 434 |
prompt += """
|
| 435 |
+
4. Comparison to Successful Startups (minimum 100 words):
|
| 436 |
+
- How this team compares to founding teams of successful startups
|
| 437 |
+
- Historical examples of similar team compositions that succeeded
|
| 438 |
+
- Key differentiating factors from typical successful startup teams
|
| 439 |
"""
|
| 440 |
|
| 441 |
+
# Format as chat with improved system prompt
|
| 442 |
messages = [
|
| 443 |
+
{"role": "system", "content": "You are an elite startup advisor with deep expertise in team composition and founder dynamics. You specialize in analyzing team profiles and providing actionable insights to maximize chances of startup success."},
|
| 444 |
{"role": "user", "content": prompt}
|
| 445 |
]
|
| 446 |
|
|
|
|
| 465 |
# Create attention mask (explicitly handle padding)
|
| 466 |
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long()
|
| 467 |
|
| 468 |
+
# Use the optimized generator instead of direct model.generate call
|
| 469 |
+
outputs, input_length = self.generate_optimized(
|
| 470 |
inputs,
|
| 471 |
attention_mask=attention_mask,
|
| 472 |
+
max_new_tokens=max_new_tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
)
|
| 474 |
|
| 475 |
# Decode more carefully
|
|
|
|
| 548 |
skills = candidate['skills'] if isinstance(candidate['skills'], list) else [candidate['skills']]
|
| 549 |
skills_info = ", ".join(skills)
|
| 550 |
|
| 551 |
+
# Build comprehensive prompt
|
| 552 |
+
prompt = f"""Analyze this candidate in depth for a startup founder or early employee role:
|
| 553 |
|
| 554 |
CANDIDATE PROFILE:
|
| 555 |
Name: {name}
|
|
|
|
| 563 |
Skills:
|
| 564 |
{skills_info}
|
| 565 |
|
| 566 |
+
Please provide a comprehensive analysis including:
|
| 567 |
|
| 568 |
+
1. Strengths Analysis (minimum 150 words):
|
| 569 |
- Key professional strengths based on background and skills
|
| 570 |
+
- Notable accomplishments and their significance
|
| 571 |
+
- Areas of deep expertise and how they apply to startups
|
| 572 |
|
| 573 |
+
2. Founder/Early Employee Fit (minimum 150 words):
|
| 574 |
- Assessment of suitability for founder or early employee roles
|
| 575 |
+
- Specific founder archetype this candidate represents
|
| 576 |
+
- Optimal startup stages for this candidate
|
| 577 |
- Ideal role recommendations in a startup team
|
| 578 |
|
| 579 |
+
3. Complementary Team Members (minimum 100 words):
|
| 580 |
+
- What types of co-founders or team members would complement this candidate
|
| 581 |
+
- Potential team dynamics when working with different personality types
|
| 582 |
- Skills gaps that should be filled by other team members
|
| 583 |
|
| 584 |
+
4. Risk Assessment (minimum 100 words):
|
| 585 |
+
- Potential blind spots or weaknesses based on background
|
| 586 |
+
- Areas where the candidate might need support or development
|
| 587 |
+
- Situations where this candidate might struggle in a startup environment
|
| 588 |
|
| 589 |
+
Format your analysis with clear sections and detailed insights to help assess this candidate for startup roles.
|
| 590 |
"""
|
| 591 |
|
| 592 |
+
# Format as chat with system prompt
|
| 593 |
messages = [
|
| 594 |
+
{"role": "system", "content": "You are an elite talent assessor specializing in startup founders and early employees. You provide in-depth analysis of candidates' strengths, founder fit, and team compatibility."},
|
| 595 |
{"role": "user", "content": prompt}
|
| 596 |
]
|
| 597 |
|
|
|
|
| 616 |
# Create attention mask (explicitly handle padding)
|
| 617 |
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long()
|
| 618 |
|
| 619 |
+
# Use the optimized generator instead of direct model.generate call
|
| 620 |
+
outputs, input_length = self.generate_optimized(
|
| 621 |
inputs,
|
| 622 |
attention_mask=attention_mask,
|
| 623 |
+
max_new_tokens=max_new_tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
)
|
| 625 |
|
| 626 |
# Decode more carefully
|
|
|
|
| 931 |
return {
|
| 932 |
"team_analysis": team_analysis,
|
| 933 |
"model_info": {
|
| 934 |
+
"x": str(self.device),
|
| 935 |
"model_type": "phi-2-qlora-finetuned"
|
| 936 |
}
|
| 937 |
}
|