Spaces:
Running
Running
| """Career Chatbot | |
| AI assistant that represents professionals on their websites, answering | |
| questions about their background while facilitating follow-up contact. | |
| Data models have been refactored into the `models` package to keep this file | |
| focused on orchestration, tool wiring, and runtime logic. | |
| """ | |
| import os | |
| import json | |
| import logging | |
| from typing import List, Dict, Optional, Any | |
| from datetime import datetime | |
| import re | |
| import gradio as gr | |
| import requests | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| from pypdf import PdfReader | |
| from typer import prompt | |
| from promptkit import render | |
| # Import refactored data models | |
| from models import ( | |
| ChatbotConfig, | |
| Evaluation, | |
| StructuredResponse, | |
| JobMatchResult, | |
| ) | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class NotificationService: | |
| """Handles push notifications via Pushover""" | |
| def __init__(self, user_token: Optional[str] = None, app_token: Optional[str] = None): | |
| self.user_token = user_token or os.getenv("PUSHOVER_USER") | |
| self.app_token = app_token or os.getenv("PUSHOVER_TOKEN") | |
| self.api_url = "https://api.pushover.net/1/messages.json" | |
| self.enabled = bool(self.user_token and self.app_token) | |
| if self.enabled: | |
| logger.info("Pushover notification service initialized") | |
| else: | |
| logger.warning("Pushover credentials not found - notifications disabled") | |
| def send(self, message: str) -> bool: | |
| """Send a push notification""" | |
| if not self.enabled: | |
| logger.info(f"Notification (disabled): {message}") | |
| return False | |
| try: | |
| payload = { | |
| "user": self.user_token, | |
| "token": self.app_token, | |
| "message": message | |
| } | |
| response = requests.post(self.api_url, data=payload) | |
| response.raise_for_status() | |
| logger.info(f"Notification sent: {message}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to send notification: {e}") | |
| return False | |
| class WebSearchService: | |
| """Handles web searches and GitHub repository lookups""" | |
| def __init__(self, github_username: Optional[str] = None): | |
| self.github_username = github_username | |
| self.github_api_base = "https://api.github.com" | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| 'Accept': 'application/vnd.github.v3+json', | |
| 'User-Agent': 'CareerChatbot/1.0' | |
| }) | |
| # Check if GitHub token is available for higher rate limits | |
| github_token = os.getenv("GITHUB_TOKEN") | |
| if github_token: | |
| self.session.headers.update({'Authorization': f'token {github_token}'}) | |
| logger.info("GitHub API configured with authentication") | |
| else: | |
| logger.info("GitHub API configured without authentication (rate limits apply)") | |
| def search_github_repos(self, username: Optional[str] = None, topic: Optional[str] = None) -> Dict[str, Any]: | |
| """Search GitHub repositories for a user - returns ALL repos with full details""" | |
| try: | |
| username = username or self.github_username | |
| if not username: | |
| return {"error": "No GitHub username provided", "repos": []} | |
| # Get user's repositories | |
| url = f"{self.github_api_base}/users/{username}/repos" | |
| params = {'sort': 'updated', 'per_page': 100} # 100 is probably overkill but just in case | |
| response = self.session.get(url, params=params) | |
| response.raise_for_status() | |
| repos = response.json() | |
| # Filter out forked repositories to show only original work | |
| repos = [repo for repo in repos if not repo.get('fork', False)] | |
| # If topic is provided and valid, try to filter (but handle bad inputs gracefully) | |
| if topic and isinstance(topic, str): | |
| topic_lower = topic.lower() | |
| filtered = [] | |
| for repo in repos: | |
| # Check topics | |
| if any(topic_lower in topic.lower() for topic in repo.get('topics', [])): | |
| filtered.append(repo) | |
| continue | |
| # Check description | |
| description = repo.get('description', '') or '' | |
| if topic_lower in description.lower(): | |
| filtered.append(repo) | |
| continue | |
| # Check name | |
| name = repo.get('name', '') or '' | |
| if topic_lower in name.lower(): | |
| filtered.append(repo) | |
| continue | |
| # Check language | |
| language = repo.get('language', '') or '' | |
| if topic_lower == language.lower(): | |
| filtered.append(repo) | |
| # Only use filtered results if we found matches | |
| if filtered: | |
| repos = filtered | |
| # Format ALL repos with comprehensive details | |
| formatted_repos = [] | |
| all_languages = set() | |
| for repo in repos: # Return ALL repos, not just 5 | |
| language = repo.get('language') | |
| if language: | |
| all_languages.add(language) | |
| formatted_repos.append({ | |
| 'name': repo.get('name'), | |
| 'description': repo.get('description', 'No description'), | |
| 'url': repo.get('html_url'), | |
| 'language': language or 'Not specified', | |
| 'stars': repo.get('stargazers_count', 0), | |
| 'forks': repo.get('forks_count', 0), | |
| 'updated': repo.get('updated_at', ''), | |
| 'created': repo.get('created_at', ''), | |
| 'topics': repo.get('topics', []), | |
| 'size': repo.get('size', 0), | |
| 'is_fork': repo.get('fork', False), | |
| 'archived': repo.get('archived', False) | |
| }) | |
| return { | |
| "username": username, | |
| "total_repos": len(formatted_repos), | |
| "languages_used": list(all_languages), | |
| "topic_searched": topic, | |
| "repos": formatted_repos | |
| } | |
| except requests.exceptions.HTTPError as e: | |
| if e.response.status_code == 404: | |
| return {"error": f"GitHub user '{username}' not found", "repos": []} | |
| else: | |
| logger.error(f"GitHub API error: {e}") | |
| return {"error": f"GitHub API error: {str(e)}", "repos": []} | |
| except Exception as e: | |
| logger.error(f"Error searching GitHub: {e}") | |
| return {"error": f"Error searching GitHub: {str(e)}", "repos": []} | |
| def get_repo_details(self, repo_name: str, username: Optional[str] = None) -> Dict[str, Any]: | |
| """Get detailed information about a specific repository""" | |
| try: | |
| username = username or self.github_username | |
| if not username: | |
| return {"error": "No GitHub username provided"} | |
| url = f"{self.github_api_base}/repos/{username}/{repo_name}" | |
| response = self.session.get(url) | |
| response.raise_for_status() | |
| repo = response.json() | |
| # Get README content if available | |
| readme_content = None | |
| try: | |
| readme_url = f"{self.github_api_base}/repos/{username}/{repo_name}/readme" | |
| readme_response = self.session.get(readme_url) | |
| if readme_response.status_code == 200: | |
| readme_data = readme_response.json() | |
| if 'content' in readme_data: | |
| import base64 | |
| readme_content = base64.b64decode(readme_data['content']).decode('utf-8')[:500] # First 500 chars | |
| except Exception as e: | |
| logger.debug(f"Could not retrieve README: {e}") | |
| # Don't let README failure break the entire tool | |
| pass | |
| return { | |
| 'name': repo.get('name'), | |
| 'full_name': repo.get('full_name'), | |
| 'description': repo.get('description'), | |
| 'url': repo.get('html_url'), | |
| 'homepage': repo.get('homepage'), | |
| 'language': repo.get('language'), | |
| 'languages_url': repo.get('languages_url'), | |
| 'created_at': repo.get('created_at'), | |
| 'updated_at': repo.get('updated_at'), | |
| 'pushed_at': repo.get('pushed_at'), | |
| 'size': repo.get('size'), | |
| 'stars': repo.get('stargazers_count'), | |
| 'watchers': repo.get('watchers_count'), | |
| 'forks': repo.get('forks_count'), | |
| 'open_issues': repo.get('open_issues_count'), | |
| 'topics': repo.get('topics', []), | |
| 'readme_preview': readme_content | |
| } | |
| except Exception as e: | |
| logger.error(f"Error getting repo details: {e}") | |
| return {"error": f"Error getting repository details: {str(e)}"} | |
| class DocumentLoader: | |
| """Loads and processes professional documents""" | |
| def load_pdf(path: str) -> str: | |
| """Load text content from a PDF file""" | |
| try: | |
| reader = PdfReader(path) | |
| content = "" | |
| for page_num, page in enumerate(reader.pages): | |
| text = page.extract_text() | |
| if text: | |
| content += text | |
| # Debug logging for PDF content | |
| content_length = len(content) | |
| webrtc_found = "WebRTC" in content | |
| websocket_found = "WebSocket" in content | |
| logger.info(f"Loaded PDF: {path} - Length: {content_length} chars") | |
| logger.info(f"PDF Debug - WebRTC found: {webrtc_found}, WebSocket found: {websocket_found}") | |
| # Log a snippet around WebRTC if found | |
| if webrtc_found: | |
| webrtc_index = content.find("WebRTC") | |
| snippet = content[max(0, webrtc_index-50):webrtc_index+50] | |
| logger.info(f"WebRTC context: ...{snippet}...") | |
| return content | |
| except Exception as e: | |
| logger.error(f"Failed to load PDF {path}: {e}") | |
| return "" | |
| def load_text(path: str) -> str: | |
| """Load content from a text file""" | |
| try: | |
| with open(path, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| logger.info(f"Loaded text file: {path}") | |
| return content | |
| except Exception as e: | |
| logger.error(f"Failed to load text file {path}: {e}") | |
| return "" | |
| class Evaluator: | |
| """Evaluates AI responses for accuracy and hallucinations""" | |
| def __init__(self, config: ChatbotConfig, context: Dict[str, str]): | |
| self.config = config | |
| self.context = context | |
| # Use a different model for evaluation to avoid bias | |
| self.evaluator_client = OpenAI( | |
| api_key=os.getenv("GEMINI_API_KEY"), | |
| base_url="https://generativelanguage.googleapis.com/v1beta/openai/" | |
| ) | |
| # Initial system prompt without GitHub context will generic footer | |
| self.system_prompt = self._create_evaluator_prompt() | |
| def _create_evaluator_prompt(self, decision_criteria_footer=None) -> str: | |
| """Create the evaluator system prompt""" | |
| if decision_criteria_footer is None: | |
| decision_criteria_footer = "Mark UNACCEPTABLE only if: unsupported claims, missing tool usage when needed, or behavioral rules violated." | |
| # Get current date for evaluator context | |
| current_date = datetime.now().strftime("%B %d, %Y") | |
| # Debug logging for evaluator context | |
| resume_length = len(self.context['resume']) | |
| linkedin_length = len(self.context['linkedin']) | |
| summary_length = len(self.context['summary']) | |
| resume_has_webrtc = "WebRTC" in self.context['resume'] | |
| resume_has_websocket = "WebSocket" in self.context['resume'] | |
| logger.debug(f"EVALUATOR CONTEXT DEBUG:") | |
| logger.debug(f" Resume length: {resume_length} chars, WebRTC: {resume_has_webrtc}, WebSocket: {resume_has_websocket}") | |
| logger.debug(f" LinkedIn length: {linkedin_length} chars") | |
| logger.debug(f" Summary length: {summary_length} chars") | |
| if resume_has_webrtc: | |
| webrtc_index = self.context['resume'].find("WebRTC") | |
| snippet = self.context['resume'][max(0, webrtc_index-50):webrtc_index+50] | |
| logger.debug(f" WebRTC context in resume: ...{snippet}...") | |
| vars = { | |
| "config": self.config, | |
| "context": self.context, | |
| "job_match_threshold": self.config.job_match_threshold if self.config else "Good", | |
| "decision_criteria_footer": decision_criteria_footer, | |
| "current_date": current_date | |
| } | |
| prompt = render("prompts/evaluator.md", vars) | |
| return prompt | |
| def _create_user_prompt(self, reply: str, message: str, history: List[Dict]) -> str: | |
| """Create the user prompt for evaluation""" | |
| # Include the last N messages from the history. e.g., last 6 messages for more context | |
| history_str = "\n".join([f"{h['role']}: {h['content']}" for h in history[-6:]]) | |
| return f"""Here's the conversation context: | |
| {history_str} | |
| Latest User message: {message} | |
| Latest Agent response: {reply} | |
| Please evaluate this response with STRICTNESS: | |
| 1. Check EVERY factual claim against the provided context | |
| 2. If the Agent mentions ANY specific detail (skills, technologies, experiences, tools) not explicitly in the context, mark as UNACCEPTABLE | |
| 3. If the Agent should have said "I don't have that information", but instead made something up, mark as UNACCEPTABLE | |
| 4. Look for common hallucinations like claiming experience with technologies not mentioned in the resume/LinkedIn | |
| Is this response acceptable? Provide specific feedback about any issues.""" | |
| def rerun(self, reply: str, message: str, history: List[Dict], feedback: str) -> StructuredResponse: | |
| """Regenerate structured response with feedback from failed evaluation""" | |
| base_system_prompt = self._create_base_system_prompt() | |
| vars = { | |
| 'base_system_prompt': base_system_prompt, | |
| 'reply': reply, | |
| 'feedback': feedback | |
| } | |
| updated_system_prompt = render('prompts/chat_rerun.md', vars) | |
| messages = [{"role": "system", "content": updated_system_prompt}] + history + [{"role": "user", "content": message}] | |
| # Generate new structured response with parsed output | |
| response = self.evaluator_client.beta.chat.completions.parse( | |
| model=self.config.evaluator_model, | |
| messages=messages, | |
| response_format=StructuredResponse | |
| ) | |
| system_fp = getattr(response, "system_fingerprint", None) | |
| logging.debug("EVAL: served_model=%s system_fp=%s", response.model, system_fp) | |
| return response.choices[0].message.parsed | |
| def _create_base_system_prompt(self) -> str: | |
| """Create base system prompt without evaluation context""" | |
| vars = { | |
| 'config': self.config, | |
| 'context': self.context | |
| } | |
| return render('prompts/chat_base.md', vars) | |
| def evaluate_structured(self, structured_reply: StructuredResponse, message: str, history: List[Dict]) -> Evaluation: | |
| """Evaluate a structured response with reasoning and evidence""" | |
| try: | |
| # Create enhanced user prompt that includes the structured information | |
| is_job_matching = self._is_job_matching_context(structured_reply, message, history) | |
| # Check if GitHub tools were used | |
| logger.info(f"STRUCTURED REPLY TOOLS_USED: {structured_reply.tools_used}") | |
| github_tools_used = any(tool in structured_reply.tools_used for tool in ['search_github_repos', 'get_repo_details', 'functions.search_github_repos', 'functions.get_repo_details']) | |
| logger.info(f"GITHUB TOOLS USED: {github_tools_used}") | |
| if is_job_matching: | |
| evaluation_criteria = f"""Please evaluate this job matching response with REASONABLE STANDARDS: | |
| 1. Is the reasoning sound for professional skill assessment? | |
| 2. Are technical inferences reasonable (e.g., ROS2 experience → DDS knowledge)? | |
| 3. Were appropriate tools used for job analysis? | |
| 4. Does the response provide useful insights for recruitment? | |
| 5. CRITICAL: Match level hierarchy is Very Strong > Strong > Good > Moderate > Weak > Very Weak | |
| 6. CRITICAL: If job match is "{self.config.job_match_threshold if self.config else 'Good'}" or HIGHER in the hierarchy (Strong, Very Strong), facilitating contact is CORRECT behavior | |
| 7. CRITICAL: If job match is LOWER in the hierarchy than "{self.config.job_match_threshold if self.config else 'Good'}" (Moderate, Weak, Very Weak), declining contact is CORRECT behavior | |
| Job matching responses should be evaluated for practical utility, not pedantic precision. | |
| Accept reasonable technical inferences and contact facilitation decisions based on match level.""" | |
| else: | |
| if github_tools_used: | |
| evaluation_criteria = """Please evaluate this response with REASONABLE STANDARDS for GitHub tool usage: | |
| 1. GitHub tools (search_github_repos, get_repo_details) were used to gather additional information | |
| 2. Repository details like stars, forks, creation dates, programming languages, topics are LEGITIMATE from GitHub API | |
| 3. Technical project details obtained from GitHub tools are acceptable | |
| 4. Only reject if claims obviously contradict the professional background | |
| 5. The agent appropriately used tools to provide detailed project information | |
| When GitHub tools are used, trust the additional technical details they provide. | |
| Is this response acceptable? Provide specific feedback about any issues.""" | |
| else: | |
| evaluation_criteria = """Please evaluate this response with STRICTNESS: | |
| 1. Check EVERY factual claim against the provided context | |
| 2. If the Agent mentions ANY specific detail not explicitly in the context, mark as UNACCEPTABLE | |
| 3. If the Agent should have said "I don't have that information", but instead made something up, mark as UNACCEPTABLE | |
| 4. Look for common hallucinations and unsupported claims | |
| Is this response acceptable? Provide specific feedback about any issues.""" | |
| newline = '\n' | |
| user_prompt = f"""Here's the conversation context: | |
| {newline.join([f"{h['role']}: {h['content']}" for h in history[-3:]])} | |
| Latest User message: {message} | |
| Agent's structured response: | |
| Response: {structured_reply.response} | |
| Reasoning: {structured_reply.reasoning} | |
| Tools used: {structured_reply.tools_used} | |
| Facts used: {structured_reply.facts_used} | |
| {evaluation_criteria}""" | |
| # Check if GitHub tool results should be included in system prompt | |
| github_context = self._extract_github_context_from_history(history) | |
| system_prompt = self._create_evaluator_prompt_with_github(github_context) if github_context else self._create_evaluator_prompt() | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| response = self.evaluator_client.beta.chat.completions.parse( | |
| model=self.config.evaluator_model, | |
| messages=messages, | |
| response_format=Evaluation, | |
| temperature=0.0 | |
| ) | |
| system_fp = getattr(response, "system_fingerprint", None) | |
| logging.debug("EVAL: served_model=%s system_fp=%s", response.model, system_fp) | |
| evaluation = response.choices[0].message.parsed | |
| logger.info(f"EVALUATION RESULT: {'PASS' if evaluation.is_acceptable else 'FAIL'}") | |
| logger.info(f"AGENT RESPONSE: {structured_reply.response}") | |
| logger.info(f"AGENT REASONING: {structured_reply.reasoning}") | |
| logger.info(f"EVALUATOR FEEDBACK: {evaluation.feedback}") | |
| return evaluation | |
| except Exception as e: | |
| logger.error(f"Structured evaluation failed: {e}") | |
| return Evaluation(is_acceptable=True, feedback=f"Evaluation error: {str(e)}") | |
| def _external_tools_used(self, history: List[Dict]) -> bool: | |
| """Check if tools with external data (GitHub, job matching) were used in the conversation""" | |
| for message in history: | |
| if message.get('role') == 'tool': | |
| content = message.get('content', '') | |
| print(f"TOOL CONTENT DEBUG: {content}") # Temporary debug | |
| # Check for GitHub tool results | |
| if any(indicator in content for indicator in ['repos', 'languages_found', 'total_repos', 'github.com']): | |
| return True | |
| # Check for job matching tool results | |
| if any(indicator in content for indicator in ['overall_match_level', 'skill_assessments', 'should_facilitate_contact']): | |
| print("JOB MATCHING TOOL DETECTED!") # Temporary debug | |
| return True | |
| print("NO EXTERNAL TOOLS DETECTED") # Temporary debug | |
| return False | |
| def _is_github_context(self, structured_reply: StructuredResponse) -> bool: | |
| """Check if GitHub tools were used""" | |
| return any(tool in structured_reply.tools_used for tool in ['search_github_repos', 'get_repo_details']) | |
| def _is_job_matching_context(self, structured_reply: StructuredResponse, message: str, history: List[Dict]) -> bool: | |
| """Check if this is a job matching context""" | |
| # Check if job matching tool was used | |
| if 'evaluate_job_match' in structured_reply.tools_used: | |
| return True | |
| # Check if response contains job matching indicators | |
| response_content = structured_reply.response.lower() | |
| if any(indicator in response_content for indicator in ['match level', 'skills breakdown', 'overall match', 'job fit']): | |
| return True | |
| # Check if message contains job posting indicators | |
| message_content = message.lower() | |
| if any(indicator in message_content for indicator in ['job description', 'role', 'position', 'hiring', 'candidate']): | |
| return True | |
| return False | |
| def _create_evaluator_prompt_with_github(self, github_context: str) -> str: | |
| """Create evaluator prompt including GitHub tool results as valid context""" | |
| if github_context: | |
| # Get base evaluator content WITHOUT footer (empty string) | |
| base_evaluator_prompt = self._create_evaluator_prompt("") | |
| # Get current date for GitHub evaluator context | |
| current_date = datetime.now().strftime("%B %d, %Y") | |
| vars = { | |
| "base_evaluator_prompt": base_evaluator_prompt, | |
| "github_context": github_context, | |
| "current_date": current_date | |
| } | |
| return render("prompts/evaluator_with_github_context.md", vars) | |
| else: | |
| return self._create_evaluator_prompt() | |
| def _extract_github_context_from_history(self, history: List[Dict]) -> str: | |
| """Extract GitHub tool results from conversation history""" | |
| github_context = "" | |
| for message in history: | |
| if message.get('role') == 'tool': | |
| content = message.get('content', '') | |
| # Check if this is GitHub tool content (repo details or repo search results) | |
| if any(indicator in content for indicator in [ | |
| 'full_name', 'html_url', 'stargazers_count', 'watchers_count', 'forks_count', | |
| 'open_issues_count', 'created_at', 'updated_at', 'topics', 'repos', 'github.com' | |
| ]): | |
| github_context += f"\n{content}" | |
| return github_context.strip() | |
| class ToolRegistry: | |
| """Manages AI agent tools and their execution""" | |
| def __init__(self, notification_service: NotificationService, web_search_service: Optional[WebSearchService] = None, | |
| openai_client: Optional[OpenAI] = None, context: Optional[Dict[str, str]] = None, | |
| config: Optional[ChatbotConfig] = None): | |
| self.notification_service = notification_service | |
| self.web_search_service = web_search_service | |
| self.openai_client = openai_client | |
| self.context = context or {} | |
| self.config = config | |
| self.tools = self._create_tool_definitions() | |
| def _create_tool_definitions(self) -> List[Dict]: | |
| """Create tool definitions for the AI agent""" | |
| record_user_details = { | |
| "name": "record_user_details", | |
| "strict": True, | |
| "description": ( | |
| "Use this tool ONLY AFTER a user has explicitly provided their email address in response to an offer to facilitate contact. " | |
| "This tool records the user's contact details. " | |
| "IMPORTANT: DO NOT use this tool unless the user has given you their email. Do not make up an email address." | |
| ), | |
| "parameters": { | |
| "type": "object", | |
| "strict": True, | |
| "properties": { | |
| "email": { | |
| "type": "string", | |
| "description": "The email address explicitly provided by the user. Do not invent this." | |
| }, | |
| "name": { | |
| "type": "string", | |
| "description": "The user's name if they provided it. If not, use 'Visitor'." | |
| }, | |
| "notes": { | |
| "type": "string", | |
| "description": ( | |
| "Detailed notes about the conversation context and why the user wants to be contacted. " | |
| "Include the original question or job match details." | |
| ) | |
| } | |
| }, | |
| "required": ["email", "name", "notes"], | |
| "additionalProperties": False | |
| } | |
| } | |
| evaluate_job_match = { | |
| "name": "evaluate_job_match", | |
| "strict": True, | |
| "description": ( | |
| "Analyze how well the candidate matches a job posting. Use this when someone asks " | |
| "about job fit, role suitability, or provides a job description to evaluate. " | |
| "Returns detailed analysis with match levels and recommendations." | |
| ), | |
| "parameters": { | |
| "type": "object", | |
| "strict": True, | |
| "properties": { | |
| "job_description": { | |
| "type": "string", | |
| "description": "The FULL, COMPLETE, UNEDITED job description text exactly as provided by the user. Do NOT summarize, extract, or truncate - include ALL details including company info, salary, responsibilities, requirements, and nice-to-haves." | |
| }, | |
| "role_title": { | |
| "type": "string", | |
| "description": "The job title or role name" | |
| } | |
| }, | |
| "required": ["job_description", "role_title"], | |
| "additionalProperties": False | |
| } | |
| } | |
| tools = [ | |
| {"type": "function", "function": record_user_details}, | |
| {"type": "function", "function": evaluate_job_match} | |
| ] | |
| # Add GitHub search tools if web search service is available | |
| if self.web_search_service: | |
| search_github = { | |
| "name": "search_github_repos", | |
| "strict": True, | |
| "description": ( | |
| "Get ALL GitHub repositories with full details including languages, topics, stars, etc. " | |
| "Call WITHOUT parameters to get everything, then analyze the returned data. " | |
| "Returns list of all repos with language field showing what each is written in." | |
| ), | |
| "parameters": { | |
| "type": "object", | |
| "strict": True, | |
| "properties": {}, | |
| "required": [], | |
| "additionalProperties": False | |
| } | |
| } | |
| get_repo_info = { | |
| "name": "get_repo_details", | |
| "strict": True, | |
| "description": "Get detailed information about a specific GitHub repository", | |
| "parameters": { | |
| "type": "object", | |
| "strict": True, | |
| "properties": { | |
| "repo_name": { | |
| "type": "string", | |
| "description": "The name of the repository to get details for" | |
| } | |
| }, | |
| "required": ["repo_name"], | |
| "additionalProperties": False | |
| } | |
| } | |
| tools.extend([ | |
| {"type": "function", "function": search_github}, | |
| {"type": "function", "function": get_repo_info} | |
| ]) | |
| return tools | |
| def record_user_details(self, email: str, name: str = "Visitor", notes: str = "not provided") -> Dict: | |
| """Record user contact details and prepare notification""" | |
| message = f"Recording interest from {name} with email {email} and notes: {notes}" | |
| logger.info(f"Recorded user details: {email}, {name}") | |
| return { | |
| "recorded": "ok", | |
| "pending_notification": message | |
| } | |
| def evaluate_job_match(self, job_description: str, role_title: str) -> Dict: | |
| """Evaluate how well the candidate matches a job using LLM analysis""" | |
| if not self.openai_client or not self.context: | |
| return {"error": "Job matching requires OpenAI client and context"} | |
| logger.info(f"🎯 Evaluating job match for role: {role_title}") | |
| vars = { | |
| "role_title": role_title, | |
| "job_description": job_description, | |
| "config": self.config, | |
| "context": self.context, | |
| } | |
| # Create analysis prompt | |
| analysis_prompt = render("prompts/job_match_analysis.md", vars) | |
| try: | |
| response = self.openai_client.beta.chat.completions.parse( | |
| model=self.config.job_matching_model if self.config else "gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": "You are a professional job matching analyst."}, | |
| {"role": "user", "content": analysis_prompt} | |
| ], | |
| response_format=JobMatchResult | |
| ) | |
| system_fp = getattr(response, "system_fingerprint", None) | |
| logging.debug("MATCH: served_model=%s system_fp=%s", response.model, system_fp) | |
| result = response.choices[0].message.parsed | |
| logger.info(f"Job match analysis completed: {result.overall_match_level} match for {role_title}") | |
| result_dict = result.model_dump() | |
| # Add pending notification for high matches | |
| if result.should_facilitate_contact: | |
| result_dict["pending_notification"] = f"High job match found ({result.overall_match_level}): {role_title}" | |
| return result_dict | |
| except Exception as e: | |
| logger.error(f"Job matching analysis failed: {e}") | |
| return {"error": f"Analysis failed: {str(e)}"} | |
| def handle_tool_calls(self, tool_calls) -> tuple[List[Dict], List[str]]: | |
| """Execute tool calls from the AI agent and collect pending notifications""" | |
| results = [] | |
| pending_notifications = [] | |
| for tool_call in tool_calls: | |
| tool_name = tool_call.function.name | |
| arguments = json.loads(tool_call.function.arguments) | |
| logger.info(f"Tool called: {tool_name} with args: {arguments}") | |
| # Execute the appropriate tool | |
| if tool_name == "record_user_details": | |
| result = self.record_user_details(**arguments) | |
| elif tool_name == "search_github_repos" and self.web_search_service: | |
| topic = arguments.get('topic') | |
| result = self.web_search_service.search_github_repos(topic=topic) | |
| elif tool_name == "get_repo_details" and self.web_search_service: | |
| repo_name = arguments.get('repo_name') | |
| result = self.web_search_service.get_repo_details(repo_name) | |
| elif tool_name == "evaluate_job_match": | |
| result = self.evaluate_job_match(**arguments) | |
| else: | |
| logger.warning(f"Unknown tool called: {tool_name}") | |
| result = {} | |
| # Extract pending notifications | |
| if isinstance(result, dict) and "pending_notification" in result: | |
| pending_notifications.append(result.pop("pending_notification")) | |
| results.append({ | |
| "role": "tool", | |
| "content": json.dumps(result), | |
| "tool_call_id": tool_call.id | |
| }) | |
| return results, pending_notifications | |
| class CareerChatbot: | |
| """Main chatbot class that orchestrates the AI assistant""" | |
| def __init__(self, config: ChatbotConfig): | |
| self.config = config | |
| self.openai_client = OpenAI() | |
| # Initialize services | |
| self.notification_service = NotificationService() | |
| self.web_search_service = WebSearchService(github_username=config.github_username) if config.github_username else None | |
| self.document_loader = DocumentLoader() | |
| # Load professional context | |
| self.context = self._load_context() | |
| # Initialize tool registry with context | |
| self.tool_registry = ToolRegistry(self.notification_service, self.web_search_service, | |
| self.openai_client, self.context, self.config) | |
| self.evaluator = Evaluator(self.config, self.context) | |
| self.system_prompt = self._create_system_prompt() | |
| logger.info(f"CareerChatbot initialized for {config.name}") | |
| def _load_context(self) -> Dict[str, str]: | |
| """Load all professional context documents""" | |
| context = { | |
| "resume": self.document_loader.load_pdf(self.config.resume_path), | |
| "linkedin": self.document_loader.load_pdf(self.config.linkedin_path), | |
| "summary": self.document_loader.load_text(self.config.summary_path) | |
| } | |
| return context | |
| def _create_system_prompt(self) -> str: | |
| """Create the system prompt for the AI assistant""" | |
| # Prepare GitHub tools context if available | |
| github_tools = "" | |
| if self.web_search_service: | |
| github_tools = ( | |
| "You can use the `search_github_repos` tool to find open source projects and repositories. " | |
| "Use the `get_repo_details` tool to get detailed information about specific repositories." | |
| ) | |
| vars = { | |
| 'config': self.config, # Access as {config.name}, {config.job_match_threshold} | |
| 'context': self.context, # Access as {context.summary}, {context.linkedin}, etc. | |
| 'github_tools': github_tools # Access as {github_tools} (for conditional content) | |
| } | |
| chat_init_prompt = render('prompts/chat_init.md', vars) | |
| return chat_init_prompt | |
| def chat(self, message: str, history: List[Dict[str, str]], max_retries: int = 3) -> str: | |
| """Main chat function that processes user messages with evaluation and Lab 3 retry approach""" | |
| logger.info(f"🔄 PROCESSING message: '{message[:50]}...'") | |
| # Generate initial response with tools | |
| messages = [{"role": "system", "content": self.system_prompt}] + history + [{"role": "user", "content": message}] | |
| structured_reply, pending_notifications = self._generate_response_with_tools(messages) | |
| # Safety check - ensure we have a valid structured_reply | |
| if not structured_reply: | |
| logger.error("No structured reply received from _generate_response_with_tools") | |
| return "I apologize, but I'm experiencing technical difficulties. Please try again." | |
| # For evaluation, use the original history (tool results will be detected from tools_used field) | |
| evaluation_history = history | |
| # Systematic evaluation with Lab 3 approach | |
| for attempt in range(max_retries): | |
| try: | |
| # Evaluate the current reply using history that includes tool results | |
| evaluation = self.evaluator.evaluate_structured(structured_reply, message, evaluation_history) | |
| if evaluation.is_acceptable: | |
| logger.info(f"✅ PASSED evaluation on attempt {attempt + 1}/{max_retries}\n") | |
| # Send notifications only after successful evaluation | |
| for notification in pending_notifications: | |
| self.tool_registry.notification_service.send(notification) | |
| return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties." | |
| else: | |
| logger.warning(f"❌ FAILED evaluation on attempt {attempt + 1}/{max_retries}: {evaluation.feedback[:100]}...\n") | |
| # If we haven't exhausted retries, regenerate using Lab 3 rerun approach | |
| if attempt < max_retries - 1: | |
| logger.info("🔄 Regenerating response with feedback...") | |
| # Clear pending notifications from failed attempt | |
| pending_notifications.clear() | |
| new_reply = self.evaluator.rerun(structured_reply.response, message, history, evaluation.feedback) | |
| if new_reply: | |
| structured_reply = new_reply | |
| else: | |
| logger.error("Rerun returned None, keeping original reply") | |
| else: | |
| logger.warning(f"⚠️ Max retries ({max_retries}) reached. Returning final attempt.") | |
| return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties." | |
| except Exception as eval_error: | |
| logger.error(f"Evaluation failed: {eval_error}") | |
| # If evaluation fails, return the response we have | |
| return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties." | |
| return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties." | |
| def _generate_response_with_tools(self, messages: List[Dict]) -> tuple[StructuredResponse, List[str]]: | |
| """Generate response handling tool calls and collect pending notifications""" | |
| done = False | |
| all_pending_notifications = [] | |
| while not done: | |
| try: | |
| # Call the LLM with tools and structured output | |
| response = self.openai_client.beta.chat.completions.parse( | |
| model=self.config.model, | |
| messages=messages, | |
| tools=self.tool_registry.tools, | |
| tool_choice="auto", | |
| response_format=StructuredResponse | |
| ) | |
| system_fp = getattr(response, "system_fingerprint", None) | |
| logging.debug("CHAT: served_model=%s system_fp=%s", response.model, system_fp) | |
| finish_reason = response.choices[0].finish_reason | |
| # Handle tool calls if needed | |
| if finish_reason == "tool_calls": | |
| message_obj = response.choices[0].message | |
| tool_calls = message_obj.tool_calls | |
| results, pending_notifications = self.tool_registry.handle_tool_calls(tool_calls) | |
| all_pending_notifications.extend(pending_notifications) | |
| messages.append(message_obj) | |
| messages.extend(results) | |
| else: | |
| done = True | |
| return response.choices[0].message.parsed, all_pending_notifications | |
| except Exception as e: | |
| logger.error(f"Structured response parsing failed: {e}") | |
| # Fallback: try without structured output | |
| try: | |
| fallback_response = self.openai_client.chat.completions.create( | |
| model=self.config.model, | |
| messages=messages, | |
| tools=self.tool_registry.tools, | |
| tool_choice="auto" | |
| ) | |
| # Create a basic structured response from the fallback | |
| content = fallback_response.choices[0].message.content or "I apologize, but I encountered an error processing your request." | |
| fallback_structured = StructuredResponse( | |
| response=content, | |
| reasoning="Fallback response due to parsing error", | |
| tools_used=[], | |
| facts_used=[] | |
| ) | |
| return fallback_structured, all_pending_notifications | |
| except Exception as fallback_error: | |
| logger.error(f"Fallback response also failed: {fallback_error}") | |
| # Ultimate fallback | |
| error_response = StructuredResponse( | |
| response="I apologize, but I'm experiencing technical difficulties. Please try again.", | |
| reasoning="Error handling response", | |
| tools_used=[], | |
| facts_used=[] | |
| ) | |
| return error_response, all_pending_notifications | |
| def create_initial_greeting(self) -> str: | |
| """Create the initial greeting message""" | |
| return f"""👋 Hello! I'm an AI assistant designed by {self.config.name} and representing them professionally. | |
| I can answer questions about {self.config.name}'s career, experience, and professional background based on their resume and LinkedIn profile. | |
| If you have questions I can't answer from the available information, I'll be happy to arrange for {self.config.name} to respond to you personally via email. | |
| How can I help you today?""" | |
| def launch_interface(self): | |
| """Launch the Gradio interface""" | |
| # Create chatbot with initial message | |
| chatbot = gr.Chatbot( | |
| value=[ | |
| {"role": "assistant", "content": self.create_initial_greeting()} | |
| ], | |
| type="messages", | |
| height=700, | |
| show_copy_button=True, | |
| show_copy_all_button=True, | |
| ) | |
| # Create and launch the interface | |
| interface = gr.ChatInterface( | |
| self.chat, | |
| type="messages", | |
| chatbot=chatbot, | |
| examples=[ | |
| "What is the professional background?", | |
| "What companies has this person worked at?", | |
| "Where did they go to school?", | |
| "What are their main skills?" | |
| ], | |
| title=f"{self.config.name}'s AI Assistant" | |
| ) | |
| interface.launch() | |
| def main(): | |
| """Main entry point for the application""" | |
| # Load environment variables | |
| load_dotenv(override=True) | |
| # Create configuration | |
| # Extract GitHub username from summary or environment variable | |
| github_username = os.getenv("GITHUB_USERNAME") # Can be set to actual username | |
| config = ChatbotConfig( | |
| name="Amir Nathoo", | |
| github_username=github_username # Set to actual GitHub username if available | |
| ) | |
| # Initialize and launch chatbot | |
| chatbot = CareerChatbot(config) | |
| chatbot.launch_interface() | |
| if __name__ == "__main__": | |
| main() | |