|
|
import os |
|
|
import json |
|
|
import torch |
|
|
from typing import Dict, List, Any, Optional, Union |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
|
|
|
|
|
|
|
try: |
|
|
import peft |
|
|
from peft import PeftModel, PeftConfig |
|
|
PEFT_AVAILABLE = True |
|
|
except ImportError: |
|
|
PEFT_AVAILABLE = False |
|
|
print("Warning: PEFT library not available. Adapter loading may fail.") |
|
|
|
|
|
|
|
|
class EndpointHandler: |
|
|
def __init__(self, model_dir=None): |
|
|
""" |
|
|
Initialize the endpoint handler. |
|
|
Args: |
|
|
model_dir: Directory where model and tokenizer are stored |
|
|
""" |
|
|
self.model = None |
|
|
self.tokenizer = None |
|
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
self.model_dir = model_dir or os.getenv("MODEL_PATH", "/model") |
|
|
|
|
|
|
|
|
self.flash_attention_supported = False |
|
|
self.use_sampling = True |
|
|
|
|
|
|
|
|
self.load_model() |
|
|
|
|
|
def generate_optimized(self, inputs, attention_mask=None, max_new_tokens=512): |
|
|
""" |
|
|
Optimized generation function that maximizes GPU utilization |
|
|
while respecting model constraints. |
|
|
""" |
|
|
|
|
|
if attention_mask is None: |
|
|
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long() |
|
|
|
|
|
|
|
|
input_length = inputs.shape[1] |
|
|
|
|
|
|
|
|
generation_kwargs = { |
|
|
"inputs": inputs, |
|
|
"attention_mask": attention_mask, |
|
|
"max_new_tokens": max_new_tokens, |
|
|
|
|
|
|
|
|
"use_cache": True, |
|
|
|
|
|
|
|
|
"temperature": 0.7 if self.use_sampling else 1.0, |
|
|
"top_p": 0.9 if self.use_sampling else 1.0, |
|
|
"do_sample": self.use_sampling, |
|
|
"num_beams": 1, |
|
|
|
|
|
|
|
|
"pad_token_id": self.tokenizer.pad_token_id, |
|
|
"eos_token_id": self.tokenizer.eos_token_id, |
|
|
|
|
|
|
|
|
"repetition_penalty": 1.1, |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
import importlib |
|
|
transformers_version = importlib.import_module('transformers').__version__ |
|
|
major, minor = map(int, transformers_version.split('.')[:2]) |
|
|
|
|
|
if major > 4 or (major == 4 and minor >= 32): |
|
|
|
|
|
if self.flash_attention_supported: |
|
|
print("Using Flash Attention in generation") |
|
|
generation_kwargs["flash_attn"] = True |
|
|
generation_kwargs["flash_attn_cross_entropy"] = True |
|
|
else: |
|
|
print(f"Flash Attention not added - transformers version {transformers_version} doesn't support it") |
|
|
except Exception as e: |
|
|
print(f"Error checking transformers version, skipping Flash Attention: {e}") |
|
|
|
|
|
|
|
|
outputs = self.model.generate(**generation_kwargs) |
|
|
|
|
|
return outputs, input_length |
|
|
|
|
|
def load_model(self): |
|
|
"""Load the finetuned model and tokenizer.""" |
|
|
try: |
|
|
print(f"Loading model from {self.model_dir} to {self.device}...") |
|
|
|
|
|
|
|
|
try: |
|
|
self.tokenizer = AutoTokenizer.from_pretrained( |
|
|
self.model_dir, |
|
|
padding_side="left", |
|
|
trust_remote_code=False |
|
|
) |
|
|
|
|
|
|
|
|
if self.tokenizer.pad_token is None: |
|
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
print("Set pad_token to eos_token") |
|
|
|
|
|
except Exception as tokenizer_error: |
|
|
print(f"Error loading tokenizer from {self.model_dir}: {tokenizer_error}") |
|
|
print("Attempting to load base Phi-2 tokenizer...") |
|
|
|
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained( |
|
|
"microsoft/phi-2", |
|
|
padding_side="left", |
|
|
trust_remote_code=False |
|
|
) |
|
|
if self.tokenizer.pad_token is None: |
|
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
|
|
|
|
|
|
try: |
|
|
from bitsandbytes.nn import Linear4bit |
|
|
from transformers import BitsAndBytesConfig |
|
|
|
|
|
print("Using 4-bit quantization with float16 compute type") |
|
|
|
|
|
|
|
|
quantization_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_compute_dtype=torch.float16, |
|
|
bnb_4bit_use_double_quant=True, |
|
|
bnb_4bit_quant_type="nf4" |
|
|
) |
|
|
|
|
|
|
|
|
if os.path.exists(os.path.join(self.model_dir, "adapter_model.safetensors")): |
|
|
print("Found adapter model, loading Phi-2 base with adapter") |
|
|
|
|
|
|
|
|
global PEFT_AVAILABLE |
|
|
if not PEFT_AVAILABLE: |
|
|
print("PEFT not available, installing...") |
|
|
try: |
|
|
import pip |
|
|
pip.main(['install', 'peft']) |
|
|
import peft |
|
|
from peft import PeftModel, PeftConfig |
|
|
PEFT_AVAILABLE = True |
|
|
except Exception as e: |
|
|
print(f"Failed to install PEFT: {e}") |
|
|
|
|
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
"microsoft/phi-2", |
|
|
quantization_config=quantization_config, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
try: |
|
|
|
|
|
self.model = PeftModel.from_pretrained( |
|
|
base_model, |
|
|
self.model_dir, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto" |
|
|
) |
|
|
print("Successfully loaded adapter model") |
|
|
except Exception as adapter_error: |
|
|
print(f"Error loading adapter: {adapter_error}") |
|
|
|
|
|
print("Falling back to base model without adapter") |
|
|
self.model = base_model |
|
|
else: |
|
|
|
|
|
print("Loading model directly from directory") |
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
self.model_dir, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
quantization_config=quantization_config |
|
|
) |
|
|
|
|
|
except ImportError as e: |
|
|
print(f"Warning: Could not use bitsandbytes quantization, falling back to standard loading: {e}") |
|
|
|
|
|
|
|
|
try: |
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
self.model_dir, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
) |
|
|
except Exception as model_error: |
|
|
print(f"Error loading from model directory: {model_error}") |
|
|
print("Attempting to load base Phi-2 model...") |
|
|
|
|
|
|
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
|
"microsoft/phi-2", |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
import importlib |
|
|
transformers_version = importlib.import_module('transformers').__version__ |
|
|
major, minor = map(int, transformers_version.split('.')[:2]) |
|
|
|
|
|
if major > 4 or (major == 4 and minor >= 32): |
|
|
|
|
|
try: |
|
|
import flash_attn |
|
|
self.flash_attention_supported = True |
|
|
print(f"Flash Attention {flash_attn.__version__} detected and will be used if available!") |
|
|
except ImportError: |
|
|
print("Flash Attention library not installed. Using standard attention mechanism.") |
|
|
self.flash_attention_supported = False |
|
|
else: |
|
|
print(f"Transformers version {transformers_version} doesn't support Flash Attention parameters. Using standard attention.") |
|
|
self.flash_attention_supported = False |
|
|
except Exception as e: |
|
|
print(f"Error checking Flash Attention support: {e}") |
|
|
print("Falling back to standard attention mechanism.") |
|
|
self.flash_attention_supported = False |
|
|
|
|
|
|
|
|
if self.device == "cuda": |
|
|
|
|
|
try: |
|
|
if torch.cuda.get_device_capability()[0] >= 8: |
|
|
print("Enabling TF32 precision for faster matrix operations") |
|
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
|
torch.backends.cudnn.allow_tf32 = True |
|
|
except Exception as e: |
|
|
print(f"Error enabling TF32 precision: {e}") |
|
|
|
|
|
print(f"Model loaded successfully on {self.device}") |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
import traceback |
|
|
print(traceback.format_exc()) |
|
|
return False |
|
|
|
|
|
def format_candidates_for_prompt(self, candidates: List[Dict[str, Any]]) -> str: |
|
|
"""Format candidate information for the prompt.""" |
|
|
candidate_summary = "" |
|
|
for i, candidate in enumerate(candidates): |
|
|
|
|
|
summary = f"Candidate {i+1}: {candidate.get('name', 'Unnamed')}\n" |
|
|
|
|
|
if candidate.get('education'): |
|
|
edu = candidate['education'][0] if isinstance(candidate['education'], list) and len(candidate['education']) > 0 else candidate['education'] |
|
|
summary += f"Education: {edu.get('institution', 'Unknown')}, {edu.get('degree', 'Unknown')}\n" |
|
|
|
|
|
if candidate.get('experience'): |
|
|
exp = candidate['experience'][0] if isinstance(candidate['experience'], list) and len(candidate['experience']) > 0 else candidate['experience'] |
|
|
summary += f"Experience: {exp.get('title', 'Unknown')} at {exp.get('company', 'Unknown')}\n" |
|
|
|
|
|
if candidate.get('skills'): |
|
|
skills = candidate['skills'] if isinstance(candidate['skills'], list) else [candidate['skills']] |
|
|
summary += f"Skills: {', '.join(skills[:5])}\n" |
|
|
|
|
|
candidate_summary += summary + "\n" |
|
|
|
|
|
return candidate_summary |
|
|
|
|
|
def generate_team(self, candidates: List[Dict[str, Any]], requirements: str = None, team_size: int = 5) -> str: |
|
|
"""Generate a team based on candidates and requirements.""" |
|
|
if self.model is None or self.tokenizer is None: |
|
|
self.load_model() |
|
|
if self.model is None: |
|
|
return "Model could not be loaded. The AI analysis service is currently unavailable." |
|
|
|
|
|
try: |
|
|
|
|
|
candidate_summary = self.format_candidates_for_prompt(candidates) |
|
|
|
|
|
|
|
|
prompt = f"""Analyze these candidates and create THREE different optimal startup team compositions of {team_size} people each. |
|
|
|
|
|
CANDIDATES: |
|
|
{candidate_summary} |
|
|
|
|
|
TEAM REQUIREMENTS: |
|
|
{requirements or "Create a balanced team with complementary skills"} |
|
|
|
|
|
For EACH team composition, please provide: |
|
|
1. Team Name: Give this team composition a memorable name based on its strengths |
|
|
2. Selected Members: List each selected team member with: |
|
|
- Their name |
|
|
- Recommended role in the team |
|
|
- 2-3 sentences on WHY they specifically are valuable to this team composition |
|
|
- How they complement other team members |
|
|
|
|
|
3. Team Analysis (minimum 250 words): |
|
|
- Detailed strengths of this specific team combination |
|
|
- Potential weaknesses or challenges this team might face |
|
|
- Assessment of skill coverage and diversity of thinking |
|
|
- Team dynamics and how members would likely work together |
|
|
- How this team aligns with the stated requirements |
|
|
|
|
|
4. Alternative Applications: |
|
|
- What type of startup would be MOST successful with this team |
|
|
- What type of startup would be LEAST successful with this team |
|
|
|
|
|
After presenting all three team compositions, provide a final recommendation on which team would be best and why. |
|
|
|
|
|
Format your response carefully with clear headings and make it comprehensive enough for founders to make informed decisions. |
|
|
""" |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": "You are an elite startup advisor with deep expertise in team composition and founder dynamics. You specialize in analyzing candidate profiles and determining optimal team compositions that maximize chances of startup success."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
|
|
|
|
|
|
inputs = self.tokenizer.apply_chat_template( |
|
|
messages, |
|
|
return_tensors="pt" |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
input_length = inputs.shape[1] |
|
|
|
|
|
max_context_length = 2048 |
|
|
|
|
|
|
|
|
max_new_tokens = max(100, min(1024, max_context_length - input_length)) |
|
|
|
|
|
print(f"Input length: {input_length}, Max new tokens: {max_new_tokens}") |
|
|
|
|
|
|
|
|
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long() |
|
|
|
|
|
|
|
|
outputs, input_length = self.generate_optimized( |
|
|
inputs, |
|
|
attention_mask=attention_mask, |
|
|
max_new_tokens=max_new_tokens |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
generated_output = outputs[0][input_length:] |
|
|
|
|
|
|
|
|
generated_text = self.tokenizer.decode( |
|
|
generated_output, |
|
|
skip_special_tokens=True, |
|
|
clean_up_tokenization_spaces=True |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = generated_text.replace("<|im_end|>", "").replace("<|im_start|>", "") |
|
|
assistant_response = generated_text.strip() |
|
|
|
|
|
|
|
|
if not assistant_response: |
|
|
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
assistant_response = full_response.split(prompt)[-1].strip() |
|
|
|
|
|
except Exception as decode_error: |
|
|
print(f"Error decoding response: {decode_error}") |
|
|
|
|
|
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
assistant_response = full_response.split(prompt)[-1].strip() |
|
|
|
|
|
return assistant_response |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"Error generating team: {e}") |
|
|
print(traceback.format_exc()) |
|
|
|
|
|
return f"We encountered an error while analyzing the team. The AI service may be temporarily unavailable. You can try again later or use the basic team creation features. Error details: {str(e)}" |
|
|
|
|
|
def analyze_team(self, team: List[Dict[str, Any]], include_startup_comparison: bool = True) -> str: |
|
|
""" |
|
|
Analyze an existing team and provide insights. |
|
|
Args: |
|
|
team: List of team members |
|
|
include_startup_comparison: Whether to compare to successful startups |
|
|
Returns: |
|
|
Team analysis as string |
|
|
""" |
|
|
if self.model is None or self.tokenizer is None: |
|
|
self.load_model() |
|
|
if self.model is None: |
|
|
return "Model could not be loaded. The AI analysis service is currently unavailable." |
|
|
|
|
|
try: |
|
|
|
|
|
team_summary = self.format_candidates_for_prompt(team) |
|
|
|
|
|
|
|
|
prompt = f"""Analyze this existing startup team in depth: |
|
|
|
|
|
TEAM MEMBERS: |
|
|
{team_summary} |
|
|
|
|
|
Please provide: |
|
|
|
|
|
1. Team Composition Analysis (minimum 150 words): |
|
|
- Overall assessment of the team's strengths and complementary skills |
|
|
- Key skill coverage and potential skill gaps |
|
|
- Team dynamics and how members would likely work together |
|
|
- Potential areas of conflict or collaboration challenges |
|
|
|
|
|
2. Success Factors (minimum 100 words): |
|
|
- What types of startups would be MOST successful with this team |
|
|
- Key advantages this team has compared to typical startup teams |
|
|
- How team members' backgrounds create competitive advantages |
|
|
|
|
|
3. Risk Factors (minimum 100 words): |
|
|
- What types of startups would be LEAST successful with this team |
|
|
- Potential blind spots or weaknesses in the team composition |
|
|
- Suggested additions or changes to strengthen the team |
|
|
""" |
|
|
|
|
|
if include_startup_comparison: |
|
|
prompt += """ |
|
|
4. Comparison to Successful Startups (minimum 100 words): |
|
|
- How this team compares to founding teams of successful startups |
|
|
- Historical examples of similar team compositions that succeeded |
|
|
- Key differentiating factors from typical successful startup teams |
|
|
""" |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": "You are an elite startup advisor with deep expertise in team composition and founder dynamics. You specialize in analyzing team profiles and providing actionable insights to maximize chances of startup success."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
|
|
|
|
|
|
inputs = self.tokenizer.apply_chat_template( |
|
|
messages, |
|
|
return_tensors="pt" |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
input_length = inputs.shape[1] |
|
|
|
|
|
max_context_length = 2048 |
|
|
|
|
|
|
|
|
max_new_tokens = max(100, min(1024, max_context_length - input_length)) |
|
|
|
|
|
print(f"Team analysis - Input length: {input_length}, Max new tokens: {max_new_tokens}") |
|
|
|
|
|
|
|
|
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long() |
|
|
|
|
|
|
|
|
outputs, input_length = self.generate_optimized( |
|
|
inputs, |
|
|
attention_mask=attention_mask, |
|
|
max_new_tokens=max_new_tokens |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
generated_output = outputs[0][input_length:] |
|
|
|
|
|
|
|
|
generated_text = self.tokenizer.decode( |
|
|
generated_output, |
|
|
skip_special_tokens=True, |
|
|
clean_up_tokenization_spaces=True |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = generated_text.replace("<|im_end|>", "").replace("<|im_start|>", "") |
|
|
assistant_response = generated_text.strip() |
|
|
|
|
|
|
|
|
if not assistant_response: |
|
|
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
assistant_response = full_response.split(prompt)[-1].strip() |
|
|
|
|
|
except Exception as decode_error: |
|
|
print(f"Error decoding team analysis response: {decode_error}") |
|
|
|
|
|
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
assistant_response = full_response.split(prompt)[-1].strip() |
|
|
|
|
|
return assistant_response |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"Error analyzing team: {e}") |
|
|
print(traceback.format_exc()) |
|
|
|
|
|
return f"We encountered an error while analyzing the team. The AI service may be temporarily unavailable. You can try again later or use the basic team features. Error details: {str(e)}" |
|
|
|
|
|
def analyze_candidate(self, candidate: Dict[str, Any]) -> str: |
|
|
""" |
|
|
Analyze an individual candidate and provide insights. |
|
|
Args: |
|
|
candidate: Candidate information |
|
|
Returns: |
|
|
Candidate analysis as string |
|
|
""" |
|
|
if self.model is None or self.tokenizer is None: |
|
|
self.load_model() |
|
|
if self.model is None: |
|
|
return "Model could not be loaded. The AI analysis service is currently unavailable." |
|
|
|
|
|
try: |
|
|
|
|
|
name = candidate.get('name', 'Unnamed Candidate') |
|
|
|
|
|
education_info = "" |
|
|
if candidate.get('education'): |
|
|
if isinstance(candidate['education'], list): |
|
|
for edu in candidate['education']: |
|
|
education_info += f"- {edu.get('degree', 'Degree')} from {edu.get('institution', 'Institution')}\n" |
|
|
else: |
|
|
edu = candidate['education'] |
|
|
education_info += f"- {edu.get('degree', 'Degree')} from {edu.get('institution', 'Institution')}\n" |
|
|
|
|
|
experience_info = "" |
|
|
if candidate.get('experience'): |
|
|
if isinstance(candidate['experience'], list): |
|
|
for exp in candidate['experience']: |
|
|
experience_info += f"- {exp.get('title', 'Role')} at {exp.get('company', 'Company')}\n" |
|
|
else: |
|
|
exp = candidate['experience'] |
|
|
experience_info += f"- {exp.get('title', 'Role')} at {exp.get('company', 'Company')}\n" |
|
|
|
|
|
skills_info = "" |
|
|
if candidate.get('skills'): |
|
|
skills = candidate['skills'] if isinstance(candidate['skills'], list) else [candidate['skills']] |
|
|
skills_info = ", ".join(skills) |
|
|
|
|
|
|
|
|
prompt = f"""Analyze this candidate in depth for a startup founder or early employee role: |
|
|
|
|
|
CANDIDATE PROFILE: |
|
|
Name: {name} |
|
|
|
|
|
Education: |
|
|
{education_info} |
|
|
|
|
|
Experience: |
|
|
{experience_info} |
|
|
|
|
|
Skills: |
|
|
{skills_info} |
|
|
|
|
|
Please provide a comprehensive analysis including: |
|
|
|
|
|
1. Strengths Analysis (minimum 150 words): |
|
|
- Key professional strengths based on background and skills |
|
|
- Notable accomplishments and their significance |
|
|
- Areas of deep expertise and how they apply to startups |
|
|
|
|
|
2. Founder/Early Employee Fit (minimum 150 words): |
|
|
- Assessment of suitability for founder or early employee roles |
|
|
- Specific founder archetype this candidate represents |
|
|
- Optimal startup stages for this candidate |
|
|
- Ideal role recommendations in a startup team |
|
|
|
|
|
3. Complementary Team Members (minimum 100 words): |
|
|
- What types of co-founders or team members would complement this candidate |
|
|
- Potential team dynamics when working with different personality types |
|
|
- Skills gaps that should be filled by other team members |
|
|
|
|
|
4. Risk Assessment (minimum 100 words): |
|
|
- Potential blind spots or weaknesses based on background |
|
|
- Areas where the candidate might need support or development |
|
|
- Situations where this candidate might struggle in a startup environment |
|
|
|
|
|
Format your analysis with clear sections and detailed insights to help assess this candidate for startup roles. |
|
|
""" |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": "You are an elite talent assessor specializing in startup founders and early employees. You provide in-depth analysis of candidates' strengths, founder fit, and team compatibility."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
|
|
|
|
|
|
inputs = self.tokenizer.apply_chat_template( |
|
|
messages, |
|
|
return_tensors="pt" |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
input_length = inputs.shape[1] |
|
|
|
|
|
max_context_length = 2048 |
|
|
|
|
|
|
|
|
max_new_tokens = max(100, min(1024, max_context_length - input_length)) |
|
|
|
|
|
print(f"Candidate analysis - Input length: {input_length}, Max new tokens: {max_new_tokens}") |
|
|
|
|
|
|
|
|
attention_mask = inputs.ne(self.tokenizer.pad_token_id).long() |
|
|
|
|
|
|
|
|
outputs, input_length = self.generate_optimized( |
|
|
inputs, |
|
|
attention_mask=attention_mask, |
|
|
max_new_tokens=max_new_tokens |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
generated_output = outputs[0][input_length:] |
|
|
|
|
|
|
|
|
generated_text = self.tokenizer.decode( |
|
|
generated_output, |
|
|
skip_special_tokens=True, |
|
|
clean_up_tokenization_spaces=True |
|
|
) |
|
|
|
|
|
|
|
|
generated_text = generated_text.replace("<|im_end|>", "").replace("<|im_start|>", "") |
|
|
assistant_response = generated_text.strip() |
|
|
|
|
|
|
|
|
if not assistant_response: |
|
|
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
assistant_response = full_response.split(prompt)[-1].strip() |
|
|
|
|
|
except Exception as decode_error: |
|
|
print(f"Error decoding candidate analysis response: {decode_error}") |
|
|
|
|
|
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
assistant_response = full_response.split(prompt)[-1].strip() |
|
|
|
|
|
return assistant_response |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"Error analyzing candidate: {e}") |
|
|
print(traceback.format_exc()) |
|
|
|
|
|
return f"We encountered an error while analyzing this candidate. The AI service may be temporarily unavailable. You can try again later. Error details: {str(e)}" |
|
|
|
|
|
def parse_natural_language_requirements(self, text: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Parse natural language team requirements into structured format. |
|
|
Args: |
|
|
text: Natural language description of team requirements |
|
|
Returns: |
|
|
Dictionary with structured requirements |
|
|
""" |
|
|
if self.model is None or self.tokenizer is None: |
|
|
self.load_model() |
|
|
if self.model is None: |
|
|
return {"error": "Model could not be loaded"} |
|
|
|
|
|
try: |
|
|
|
|
|
prompt = f"""Parse the following team requirements into a structured format: |
|
|
|
|
|
REQUIREMENTS: |
|
|
{text} |
|
|
|
|
|
Extract the following information: |
|
|
1. Required skills (comma-separated list) |
|
|
2. Required roles (comma-separated list) |
|
|
3. Industry focus (single industry) |
|
|
4. Startup stage (idea, mvp, growth, or scaling) |
|
|
5. Special requirements (comma-separated list) |
|
|
|
|
|
Format your response as a JSON object with these keys: requiredSkills, requiredRoles, industryFocus, startupStage, specialRequirements |
|
|
""" |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": "You are a helpful assistant that extracts structured information from text."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
|
|
|
|
|
|
inputs = self.tokenizer.apply_chat_template( |
|
|
messages, |
|
|
return_tensors="pt" |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.model.generate( |
|
|
inputs, |
|
|
max_length=1024, |
|
|
temperature=0.2, |
|
|
top_p=0.9, |
|
|
) |
|
|
|
|
|
|
|
|
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
assistant_response = response.split(prompt)[-1].strip() |
|
|
|
|
|
|
|
|
import re |
|
|
json_match = re.search(r'\{.*\}', assistant_response, re.DOTALL) |
|
|
if json_match: |
|
|
json_str = json_match.group(0) |
|
|
try: |
|
|
return json.loads(json_str) |
|
|
except json.JSONDecodeError: |
|
|
print(f"Error parsing JSON: {json_str}") |
|
|
|
|
|
|
|
|
return { |
|
|
"requiredSkills": [], |
|
|
"requiredRoles": [], |
|
|
"industryFocus": "", |
|
|
"startupStage": "", |
|
|
"specialRequirements": [] |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error parsing requirements: {e}") |
|
|
return { |
|
|
"requiredSkills": [], |
|
|
"requiredRoles": [], |
|
|
"industryFocus": "", |
|
|
"startupStage": "", |
|
|
"specialRequirements": [] |
|
|
} |
|
|
|
|
|
def search_candidates_by_description(self, description: str, candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Search for candidates matching a natural language description. |
|
|
Args: |
|
|
description: Natural language description of desired candidate |
|
|
candidates: List of candidates to search |
|
|
Returns: |
|
|
Filtered list of candidates matching the description |
|
|
""" |
|
|
if self.model is None or self.tokenizer is None: |
|
|
self.load_model() |
|
|
if self.model is None: |
|
|
return [] |
|
|
|
|
|
try: |
|
|
|
|
|
candidate_summary = self.format_candidates_for_prompt(candidates) |
|
|
|
|
|
prompt = f"""I'm looking for candidates matching this description: "{description}" |
|
|
|
|
|
Here are the available candidates: |
|
|
{candidate_summary} |
|
|
|
|
|
Return a JSON array containing ONLY the candidate numbers (starting from 1) that best match this description. Format it like this: [1, 4, 7] |
|
|
""" |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": "You are a helpful assistant that finds the best matching candidates based on descriptions."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
|
|
|
|
|
|
inputs = self.tokenizer.apply_chat_template( |
|
|
messages, |
|
|
return_tensors="pt" |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.model.generate( |
|
|
inputs, |
|
|
max_length=1024, |
|
|
temperature=0.3, |
|
|
top_p=0.9, |
|
|
) |
|
|
|
|
|
|
|
|
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
assistant_response = response.split(prompt)[-1].strip() |
|
|
|
|
|
|
|
|
import re |
|
|
array_match = re.search(r'\[.*\]', assistant_response, re.DOTALL) |
|
|
if array_match: |
|
|
array_str = array_match.group(0) |
|
|
try: |
|
|
indices = json.loads(array_str) |
|
|
|
|
|
indices = [i-1 for i in indices if i > 0 and i <= len(candidates)] |
|
|
return [candidates[i] for i in indices] |
|
|
except json.JSONDecodeError: |
|
|
print(f"Error parsing indices: {array_str}") |
|
|
|
|
|
|
|
|
return [] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error searching candidates: {e}") |
|
|
return [] |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: |
|
|
""" |
|
|
Perform inference on the input data. |
|
|
Args: |
|
|
data: Dictionary with inputs key containing the operation parameters |
|
|
Returns: |
|
|
Dictionary with the generated team analysis |
|
|
""" |
|
|
|
|
|
if "inputs" in data: |
|
|
inputs_data = data["inputs"] |
|
|
else: |
|
|
inputs_data = data |
|
|
|
|
|
|
|
|
operation = inputs_data.get("operation", "generate_team") |
|
|
|
|
|
|
|
|
if operation == "ping": |
|
|
model_status = "loaded" if self.model is not None else "not loaded" |
|
|
return { |
|
|
"status": "ok", |
|
|
"model_status": model_status, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if self.model is None: |
|
|
print("Model wasn't loaded initially, trying to load it now...") |
|
|
self.load_model() |
|
|
|
|
|
|
|
|
if self.model is None: |
|
|
|
|
|
if operation == "generate_team": |
|
|
return { |
|
|
"team_analysis": "Model could not be loaded. The AI analysis service is currently unavailable. We're working to restore it as soon as possible. In the meantime, you can still use the basic team creation features.", |
|
|
"error": "Model could not be loaded, using fallback mode", |
|
|
"fallback": True, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned", |
|
|
"status": "failed" |
|
|
} |
|
|
} |
|
|
elif operation == "analyze_team": |
|
|
return { |
|
|
"team_analysis": "Model could not be loaded. The AI analysis service is currently unavailable. We're working to restore it as soon as possible.", |
|
|
"error": "Model could not be loaded, using fallback mode", |
|
|
"fallback": True, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned", |
|
|
"status": "failed" |
|
|
} |
|
|
} |
|
|
elif operation == "analyze_candidate": |
|
|
return { |
|
|
"candidate_analysis": "Model could not be loaded. The AI analysis service is currently unavailable. We're working to restore it as soon as possible.", |
|
|
"error": "Model could not be loaded, using fallback mode", |
|
|
"fallback": True, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned", |
|
|
"status": "failed" |
|
|
} |
|
|
} |
|
|
else: |
|
|
|
|
|
return { |
|
|
"error": "Model could not be loaded, using fallback mode", |
|
|
"fallback": True, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned", |
|
|
"status": "failed" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if operation == "generate_team": |
|
|
candidates = inputs_data.get("candidates", []) |
|
|
requirements = inputs_data.get("requirements", "") |
|
|
team_size = int(inputs_data.get("team_size", 5)) |
|
|
|
|
|
if not candidates: |
|
|
return {"error": "No candidates provided"} |
|
|
|
|
|
team_analysis = self.generate_team(candidates, requirements, team_size) |
|
|
|
|
|
return { |
|
|
"team_analysis": team_analysis, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned" |
|
|
} |
|
|
} |
|
|
|
|
|
elif operation == "analyze_team": |
|
|
team = inputs_data.get("team", []) |
|
|
include_startup_comparison = inputs_data.get("include_startup_comparison", True) |
|
|
|
|
|
if not team: |
|
|
return {"error": "No team provided"} |
|
|
|
|
|
team_analysis = self.analyze_team(team, include_startup_comparison) |
|
|
|
|
|
return { |
|
|
"team_analysis": team_analysis, |
|
|
"model_info": { |
|
|
"x": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned" |
|
|
} |
|
|
} |
|
|
|
|
|
elif operation == "analyze_candidate": |
|
|
candidate = inputs_data.get("candidate", {}) |
|
|
|
|
|
if not candidate: |
|
|
return {"error": "No candidate provided"} |
|
|
|
|
|
candidate_analysis = self.analyze_candidate(candidate) |
|
|
|
|
|
return { |
|
|
"candidate_analysis": candidate_analysis, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned" |
|
|
} |
|
|
} |
|
|
|
|
|
elif operation == "parse_requirements": |
|
|
requirements_text = inputs_data.get("requirements_text", "") |
|
|
parsed_requirements = self.parse_natural_language_requirements(requirements_text) |
|
|
|
|
|
return { |
|
|
"parsed_requirements": parsed_requirements, |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned" |
|
|
} |
|
|
} |
|
|
|
|
|
elif operation == "search_candidates": |
|
|
candidates = inputs_data.get("candidates", []) |
|
|
description = inputs_data.get("description", "") |
|
|
|
|
|
if not candidates: |
|
|
return {"error": "No candidates provided"} |
|
|
|
|
|
if not description: |
|
|
return {"error": "No search description provided"} |
|
|
|
|
|
matching_candidates = self.search_candidates_by_description(description, candidates) |
|
|
|
|
|
return { |
|
|
"matching_candidates": matching_candidates, |
|
|
"count": len(matching_candidates), |
|
|
"model_info": { |
|
|
"device": str(self.device), |
|
|
"model_type": "phi-2-qlora-finetuned" |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
else: |
|
|
return {"error": f"Unknown operation: {operation}"} |