Spaces:
Running
Running
File size: 45,514 Bytes
b02e301 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 |
"""Career Chatbot
AI assistant that represents professionals on their websites, answering
questions about their background while facilitating follow-up contact.
Data models have been refactored into the `models` package to keep this file
focused on orchestration, tool wiring, and runtime logic.
"""
import os
import json
import logging
from typing import List, Dict, Optional, Any
from datetime import datetime
import re
import gradio as gr
import requests
from dotenv import load_dotenv
from openai import OpenAI
from pypdf import PdfReader
from typer import prompt
from promptkit import render
# Import refactored data models
from models import (
ChatbotConfig,
Evaluation,
StructuredResponse,
JobMatchResult,
)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class NotificationService:
"""Handles push notifications via Pushover"""
def __init__(self, user_token: Optional[str] = None, app_token: Optional[str] = None):
self.user_token = user_token or os.getenv("PUSHOVER_USER")
self.app_token = app_token or os.getenv("PUSHOVER_TOKEN")
self.api_url = "https://api.pushover.net/1/messages.json"
self.enabled = bool(self.user_token and self.app_token)
if self.enabled:
logger.info("Pushover notification service initialized")
else:
logger.warning("Pushover credentials not found - notifications disabled")
def send(self, message: str) -> bool:
"""Send a push notification"""
if not self.enabled:
logger.info(f"Notification (disabled): {message}")
return False
try:
payload = {
"user": self.user_token,
"token": self.app_token,
"message": message
}
response = requests.post(self.api_url, data=payload)
response.raise_for_status()
logger.info(f"Notification sent: {message}")
return True
except Exception as e:
logger.error(f"Failed to send notification: {e}")
return False
class WebSearchService:
"""Handles web searches and GitHub repository lookups"""
def __init__(self, github_username: Optional[str] = None):
self.github_username = github_username
self.github_api_base = "https://api.github.com"
self.session = requests.Session()
self.session.headers.update({
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'CareerChatbot/1.0'
})
# Check if GitHub token is available for higher rate limits
github_token = os.getenv("GITHUB_TOKEN")
if github_token:
self.session.headers.update({'Authorization': f'token {github_token}'})
logger.info("GitHub API configured with authentication")
else:
logger.info("GitHub API configured without authentication (rate limits apply)")
def search_github_repos(self, username: Optional[str] = None, topic: Optional[str] = None) -> Dict[str, Any]:
"""Search GitHub repositories for a user - returns ALL repos with full details"""
try:
username = username or self.github_username
if not username:
return {"error": "No GitHub username provided", "repos": []}
# Get user's repositories
url = f"{self.github_api_base}/users/{username}/repos"
params = {'sort': 'updated', 'per_page': 100} # 100 is probably overkill but just in case
response = self.session.get(url, params=params)
response.raise_for_status()
repos = response.json()
# Filter out forked repositories to show only original work
repos = [repo for repo in repos if not repo.get('fork', False)]
# If topic is provided and valid, try to filter (but handle bad inputs gracefully)
if topic and isinstance(topic, str):
topic_lower = topic.lower()
filtered = []
for repo in repos:
# Check topics
if any(topic_lower in topic.lower() for topic in repo.get('topics', [])):
filtered.append(repo)
continue
# Check description
description = repo.get('description', '') or ''
if topic_lower in description.lower():
filtered.append(repo)
continue
# Check name
name = repo.get('name', '') or ''
if topic_lower in name.lower():
filtered.append(repo)
continue
# Check language
language = repo.get('language', '') or ''
if topic_lower == language.lower():
filtered.append(repo)
# Only use filtered results if we found matches
if filtered:
repos = filtered
# Format ALL repos with comprehensive details
formatted_repos = []
all_languages = set()
for repo in repos: # Return ALL repos, not just 5
language = repo.get('language')
if language:
all_languages.add(language)
formatted_repos.append({
'name': repo.get('name'),
'description': repo.get('description', 'No description'),
'url': repo.get('html_url'),
'language': language or 'Not specified',
'stars': repo.get('stargazers_count', 0),
'forks': repo.get('forks_count', 0),
'updated': repo.get('updated_at', ''),
'created': repo.get('created_at', ''),
'topics': repo.get('topics', []),
'size': repo.get('size', 0),
'is_fork': repo.get('fork', False),
'archived': repo.get('archived', False)
})
return {
"username": username,
"total_repos": len(formatted_repos),
"languages_used": list(all_languages),
"topic_searched": topic,
"repos": formatted_repos
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return {"error": f"GitHub user '{username}' not found", "repos": []}
else:
logger.error(f"GitHub API error: {e}")
return {"error": f"GitHub API error: {str(e)}", "repos": []}
except Exception as e:
logger.error(f"Error searching GitHub: {e}")
return {"error": f"Error searching GitHub: {str(e)}", "repos": []}
def get_repo_details(self, repo_name: str, username: Optional[str] = None) -> Dict[str, Any]:
"""Get detailed information about a specific repository"""
try:
username = username or self.github_username
if not username:
return {"error": "No GitHub username provided"}
url = f"{self.github_api_base}/repos/{username}/{repo_name}"
response = self.session.get(url)
response.raise_for_status()
repo = response.json()
# Get README content if available
readme_content = None
try:
readme_url = f"{self.github_api_base}/repos/{username}/{repo_name}/readme"
readme_response = self.session.get(readme_url)
if readme_response.status_code == 200:
readme_data = readme_response.json()
if 'content' in readme_data:
import base64
readme_content = base64.b64decode(readme_data['content']).decode('utf-8')[:500] # First 500 chars
except Exception as e:
logger.debug(f"Could not retrieve README: {e}")
# Don't let README failure break the entire tool
pass
return {
'name': repo.get('name'),
'full_name': repo.get('full_name'),
'description': repo.get('description'),
'url': repo.get('html_url'),
'homepage': repo.get('homepage'),
'language': repo.get('language'),
'languages_url': repo.get('languages_url'),
'created_at': repo.get('created_at'),
'updated_at': repo.get('updated_at'),
'pushed_at': repo.get('pushed_at'),
'size': repo.get('size'),
'stars': repo.get('stargazers_count'),
'watchers': repo.get('watchers_count'),
'forks': repo.get('forks_count'),
'open_issues': repo.get('open_issues_count'),
'topics': repo.get('topics', []),
'readme_preview': readme_content
}
except Exception as e:
logger.error(f"Error getting repo details: {e}")
return {"error": f"Error getting repository details: {str(e)}"}
class DocumentLoader:
"""Loads and processes professional documents"""
@staticmethod
def load_pdf(path: str) -> str:
"""Load text content from a PDF file"""
try:
reader = PdfReader(path)
content = ""
for page_num, page in enumerate(reader.pages):
text = page.extract_text()
if text:
content += text
# Debug logging for PDF content
content_length = len(content)
webrtc_found = "WebRTC" in content
websocket_found = "WebSocket" in content
logger.info(f"Loaded PDF: {path} - Length: {content_length} chars")
logger.info(f"PDF Debug - WebRTC found: {webrtc_found}, WebSocket found: {websocket_found}")
# Log a snippet around WebRTC if found
if webrtc_found:
webrtc_index = content.find("WebRTC")
snippet = content[max(0, webrtc_index-50):webrtc_index+50]
logger.info(f"WebRTC context: ...{snippet}...")
return content
except Exception as e:
logger.error(f"Failed to load PDF {path}: {e}")
return ""
@staticmethod
def load_text(path: str) -> str:
"""Load content from a text file"""
try:
with open(path, "r", encoding="utf-8") as f:
content = f.read()
logger.info(f"Loaded text file: {path}")
return content
except Exception as e:
logger.error(f"Failed to load text file {path}: {e}")
return ""
class Evaluator:
"""Evaluates AI responses for accuracy and hallucinations"""
def __init__(self, config: ChatbotConfig, context: Dict[str, str]):
self.config = config
self.context = context
# Use a different model for evaluation to avoid bias
self.evaluator_client = OpenAI(
api_key=os.getenv("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)
# Initial system prompt without GitHub context will generic footer
self.system_prompt = self._create_evaluator_prompt()
def _create_evaluator_prompt(self, decision_criteria_footer=None) -> str:
"""Create the evaluator system prompt"""
if decision_criteria_footer is None:
decision_criteria_footer = "Mark UNACCEPTABLE only if: unsupported claims, missing tool usage when needed, or behavioral rules violated."
# Get current date for evaluator context
current_date = datetime.now().strftime("%B %d, %Y")
# Debug logging for evaluator context
resume_length = len(self.context['resume'])
linkedin_length = len(self.context['linkedin'])
summary_length = len(self.context['summary'])
resume_has_webrtc = "WebRTC" in self.context['resume']
resume_has_websocket = "WebSocket" in self.context['resume']
logger.debug(f"EVALUATOR CONTEXT DEBUG:")
logger.debug(f" Resume length: {resume_length} chars, WebRTC: {resume_has_webrtc}, WebSocket: {resume_has_websocket}")
logger.debug(f" LinkedIn length: {linkedin_length} chars")
logger.debug(f" Summary length: {summary_length} chars")
if resume_has_webrtc:
webrtc_index = self.context['resume'].find("WebRTC")
snippet = self.context['resume'][max(0, webrtc_index-50):webrtc_index+50]
logger.debug(f" WebRTC context in resume: ...{snippet}...")
vars = {
"config": self.config,
"context": self.context,
"job_match_threshold": self.config.job_match_threshold if self.config else "Good",
"decision_criteria_footer": decision_criteria_footer,
"current_date": current_date
}
prompt = render("prompts/evaluator.md", vars)
return prompt
def _create_user_prompt(self, reply: str, message: str, history: List[Dict]) -> str:
"""Create the user prompt for evaluation"""
# Include the last N messages from the history. e.g., last 6 messages for more context
history_str = "\n".join([f"{h['role']}: {h['content']}" for h in history[-6:]])
return f"""Here's the conversation context:
{history_str}
Latest User message: {message}
Latest Agent response: {reply}
Please evaluate this response with STRICTNESS:
1. Check EVERY factual claim against the provided context
2. If the Agent mentions ANY specific detail (skills, technologies, experiences, tools) not explicitly in the context, mark as UNACCEPTABLE
3. If the Agent should have said "I don't have that information", but instead made something up, mark as UNACCEPTABLE
4. Look for common hallucinations like claiming experience with technologies not mentioned in the resume/LinkedIn
Is this response acceptable? Provide specific feedback about any issues."""
def rerun(self, reply: str, message: str, history: List[Dict], feedback: str) -> StructuredResponse:
"""Regenerate structured response with feedback from failed evaluation"""
base_system_prompt = self._create_base_system_prompt()
vars = {
'base_system_prompt': base_system_prompt,
'reply': reply,
'feedback': feedback
}
updated_system_prompt = render('prompts/chat_rerun.md', vars)
messages = [{"role": "system", "content": updated_system_prompt}] + history + [{"role": "user", "content": message}]
# Generate new structured response with parsed output
response = self.evaluator_client.beta.chat.completions.parse(
model=self.config.evaluator_model,
messages=messages,
response_format=StructuredResponse
)
system_fp = getattr(response, "system_fingerprint", None)
logging.debug("EVAL: served_model=%s system_fp=%s", response.model, system_fp)
return response.choices[0].message.parsed
def _create_base_system_prompt(self) -> str:
"""Create base system prompt without evaluation context"""
vars = {
'config': self.config,
'context': self.context
}
return render('prompts/chat_base.md', vars)
def evaluate_structured(self, structured_reply: StructuredResponse, message: str, history: List[Dict]) -> Evaluation:
"""Evaluate a structured response with reasoning and evidence"""
try:
# Create enhanced user prompt that includes the structured information
is_job_matching = self._is_job_matching_context(structured_reply, message, history)
# Check if GitHub tools were used
logger.info(f"STRUCTURED REPLY TOOLS_USED: {structured_reply.tools_used}")
github_tools_used = any(tool in structured_reply.tools_used for tool in ['search_github_repos', 'get_repo_details', 'functions.search_github_repos', 'functions.get_repo_details'])
logger.info(f"GITHUB TOOLS USED: {github_tools_used}")
if is_job_matching:
evaluation_criteria = f"""Please evaluate this job matching response with REASONABLE STANDARDS:
1. Is the reasoning sound for professional skill assessment?
2. Are technical inferences reasonable (e.g., ROS2 experience β DDS knowledge)?
3. Were appropriate tools used for job analysis?
4. Does the response provide useful insights for recruitment?
5. CRITICAL: Match level hierarchy is Very Strong > Strong > Good > Moderate > Weak > Very Weak
6. CRITICAL: If job match is "{self.config.job_match_threshold if self.config else 'Good'}" or HIGHER in the hierarchy (Strong, Very Strong), facilitating contact is CORRECT behavior
7. CRITICAL: If job match is LOWER in the hierarchy than "{self.config.job_match_threshold if self.config else 'Good'}" (Moderate, Weak, Very Weak), declining contact is CORRECT behavior
Job matching responses should be evaluated for practical utility, not pedantic precision.
Accept reasonable technical inferences and contact facilitation decisions based on match level."""
else:
if github_tools_used:
evaluation_criteria = """Please evaluate this response with REASONABLE STANDARDS for GitHub tool usage:
1. GitHub tools (search_github_repos, get_repo_details) were used to gather additional information
2. Repository details like stars, forks, creation dates, programming languages, topics are LEGITIMATE from GitHub API
3. Technical project details obtained from GitHub tools are acceptable
4. Only reject if claims obviously contradict the professional background
5. The agent appropriately used tools to provide detailed project information
When GitHub tools are used, trust the additional technical details they provide.
Is this response acceptable? Provide specific feedback about any issues."""
else:
evaluation_criteria = """Please evaluate this response with STRICTNESS:
1. Check EVERY factual claim against the provided context
2. If the Agent mentions ANY specific detail not explicitly in the context, mark as UNACCEPTABLE
3. If the Agent should have said "I don't have that information", but instead made something up, mark as UNACCEPTABLE
4. Look for common hallucinations and unsupported claims
Is this response acceptable? Provide specific feedback about any issues."""
newline = '\n'
user_prompt = f"""Here's the conversation context:
{newline.join([f"{h['role']}: {h['content']}" for h in history[-3:]])}
Latest User message: {message}
Agent's structured response:
Response: {structured_reply.response}
Reasoning: {structured_reply.reasoning}
Tools used: {structured_reply.tools_used}
Facts used: {structured_reply.facts_used}
{evaluation_criteria}"""
# Check if GitHub tool results should be included in system prompt
github_context = self._extract_github_context_from_history(history)
system_prompt = self._create_evaluator_prompt_with_github(github_context) if github_context else self._create_evaluator_prompt()
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
response = self.evaluator_client.beta.chat.completions.parse(
model=self.config.evaluator_model,
messages=messages,
response_format=Evaluation,
temperature=0.0
)
system_fp = getattr(response, "system_fingerprint", None)
logging.debug("EVAL: served_model=%s system_fp=%s", response.model, system_fp)
evaluation = response.choices[0].message.parsed
logger.info(f"EVALUATION RESULT: {'PASS' if evaluation.is_acceptable else 'FAIL'}")
logger.info(f"AGENT RESPONSE: {structured_reply.response}")
logger.info(f"AGENT REASONING: {structured_reply.reasoning}")
logger.info(f"EVALUATOR FEEDBACK: {evaluation.feedback}")
return evaluation
except Exception as e:
logger.error(f"Structured evaluation failed: {e}")
return Evaluation(is_acceptable=True, feedback=f"Evaluation error: {str(e)}")
def _external_tools_used(self, history: List[Dict]) -> bool:
"""Check if tools with external data (GitHub, job matching) were used in the conversation"""
for message in history:
if message.get('role') == 'tool':
content = message.get('content', '')
print(f"TOOL CONTENT DEBUG: {content}") # Temporary debug
# Check for GitHub tool results
if any(indicator in content for indicator in ['repos', 'languages_found', 'total_repos', 'github.com']):
return True
# Check for job matching tool results
if any(indicator in content for indicator in ['overall_match_level', 'skill_assessments', 'should_facilitate_contact']):
print("JOB MATCHING TOOL DETECTED!") # Temporary debug
return True
print("NO EXTERNAL TOOLS DETECTED") # Temporary debug
return False
def _is_github_context(self, structured_reply: StructuredResponse) -> bool:
"""Check if GitHub tools were used"""
return any(tool in structured_reply.tools_used for tool in ['search_github_repos', 'get_repo_details'])
def _is_job_matching_context(self, structured_reply: StructuredResponse, message: str, history: List[Dict]) -> bool:
"""Check if this is a job matching context"""
# Check if job matching tool was used
if 'evaluate_job_match' in structured_reply.tools_used:
return True
# Check if response contains job matching indicators
response_content = structured_reply.response.lower()
if any(indicator in response_content for indicator in ['match level', 'skills breakdown', 'overall match', 'job fit']):
return True
# Check if message contains job posting indicators
message_content = message.lower()
if any(indicator in message_content for indicator in ['job description', 'role', 'position', 'hiring', 'candidate']):
return True
return False
def _create_evaluator_prompt_with_github(self, github_context: str) -> str:
"""Create evaluator prompt including GitHub tool results as valid context"""
if github_context:
# Get base evaluator content WITHOUT footer (empty string)
base_evaluator_prompt = self._create_evaluator_prompt("")
# Get current date for GitHub evaluator context
current_date = datetime.now().strftime("%B %d, %Y")
vars = {
"base_evaluator_prompt": base_evaluator_prompt,
"github_context": github_context,
"current_date": current_date
}
return render("prompts/evaluator_with_github_context.md", vars)
else:
return self._create_evaluator_prompt()
def _extract_github_context_from_history(self, history: List[Dict]) -> str:
"""Extract GitHub tool results from conversation history"""
github_context = ""
for message in history:
if message.get('role') == 'tool':
content = message.get('content', '')
# Check if this is GitHub tool content (repo details or repo search results)
if any(indicator in content for indicator in [
'full_name', 'html_url', 'stargazers_count', 'watchers_count', 'forks_count',
'open_issues_count', 'created_at', 'updated_at', 'topics', 'repos', 'github.com'
]):
github_context += f"\n{content}"
return github_context.strip()
class ToolRegistry:
"""Manages AI agent tools and their execution"""
def __init__(self, notification_service: NotificationService, web_search_service: Optional[WebSearchService] = None,
openai_client: Optional[OpenAI] = None, context: Optional[Dict[str, str]] = None,
config: Optional[ChatbotConfig] = None):
self.notification_service = notification_service
self.web_search_service = web_search_service
self.openai_client = openai_client
self.context = context or {}
self.config = config
self.tools = self._create_tool_definitions()
def _create_tool_definitions(self) -> List[Dict]:
"""Create tool definitions for the AI agent"""
record_user_details = {
"name": "record_user_details",
"strict": True,
"description": (
"Use this tool ONLY AFTER a user has explicitly provided their email address in response to an offer to facilitate contact. "
"This tool records the user's contact details. "
"IMPORTANT: DO NOT use this tool unless the user has given you their email. Do not make up an email address."
),
"parameters": {
"type": "object",
"strict": True,
"properties": {
"email": {
"type": "string",
"description": "The email address explicitly provided by the user. Do not invent this."
},
"name": {
"type": "string",
"description": "The user's name if they provided it. If not, use 'Visitor'."
},
"notes": {
"type": "string",
"description": (
"Detailed notes about the conversation context and why the user wants to be contacted. "
"Include the original question or job match details."
)
}
},
"required": ["email", "name", "notes"],
"additionalProperties": False
}
}
evaluate_job_match = {
"name": "evaluate_job_match",
"strict": True,
"description": (
"Analyze how well the candidate matches a job posting. Use this when someone asks "
"about job fit, role suitability, or provides a job description to evaluate. "
"Returns detailed analysis with match levels and recommendations."
),
"parameters": {
"type": "object",
"strict": True,
"properties": {
"job_description": {
"type": "string",
"description": "The FULL, COMPLETE, UNEDITED job description text exactly as provided by the user. Do NOT summarize, extract, or truncate - include ALL details including company info, salary, responsibilities, requirements, and nice-to-haves."
},
"role_title": {
"type": "string",
"description": "The job title or role name"
}
},
"required": ["job_description", "role_title"],
"additionalProperties": False
}
}
tools = [
{"type": "function", "function": record_user_details},
{"type": "function", "function": evaluate_job_match}
]
# Add GitHub search tools if web search service is available
if self.web_search_service:
search_github = {
"name": "search_github_repos",
"strict": True,
"description": (
"Get ALL GitHub repositories with full details including languages, topics, stars, etc. "
"Call WITHOUT parameters to get everything, then analyze the returned data. "
"Returns list of all repos with language field showing what each is written in."
),
"parameters": {
"type": "object",
"strict": True,
"properties": {},
"required": [],
"additionalProperties": False
}
}
get_repo_info = {
"name": "get_repo_details",
"strict": True,
"description": "Get detailed information about a specific GitHub repository",
"parameters": {
"type": "object",
"strict": True,
"properties": {
"repo_name": {
"type": "string",
"description": "The name of the repository to get details for"
}
},
"required": ["repo_name"],
"additionalProperties": False
}
}
tools.extend([
{"type": "function", "function": search_github},
{"type": "function", "function": get_repo_info}
])
return tools
def record_user_details(self, email: str, name: str = "Visitor", notes: str = "not provided") -> Dict:
"""Record user contact details and prepare notification"""
message = f"Recording interest from {name} with email {email} and notes: {notes}"
logger.info(f"Recorded user details: {email}, {name}")
return {
"recorded": "ok",
"pending_notification": message
}
def evaluate_job_match(self, job_description: str, role_title: str) -> Dict:
"""Evaluate how well the candidate matches a job using LLM analysis"""
if not self.openai_client or not self.context:
return {"error": "Job matching requires OpenAI client and context"}
logger.info(f"π― Evaluating job match for role: {role_title}")
vars = {
"role_title": role_title,
"job_description": job_description,
"config": self.config,
"context": self.context,
}
# Create analysis prompt
analysis_prompt = render("prompts/job_match_analysis.md", vars)
try:
response = self.openai_client.beta.chat.completions.parse(
model=self.config.job_matching_model if self.config else "gpt-4o",
messages=[
{"role": "system", "content": "You are a professional job matching analyst."},
{"role": "user", "content": analysis_prompt}
],
response_format=JobMatchResult
)
system_fp = getattr(response, "system_fingerprint", None)
logging.debug("MATCH: served_model=%s system_fp=%s", response.model, system_fp)
result = response.choices[0].message.parsed
logger.info(f"Job match analysis completed: {result.overall_match_level} match for {role_title}")
result_dict = result.model_dump()
# Add pending notification for high matches
if result.should_facilitate_contact:
result_dict["pending_notification"] = f"High job match found ({result.overall_match_level}): {role_title}"
return result_dict
except Exception as e:
logger.error(f"Job matching analysis failed: {e}")
return {"error": f"Analysis failed: {str(e)}"}
def handle_tool_calls(self, tool_calls) -> tuple[List[Dict], List[str]]:
"""Execute tool calls from the AI agent and collect pending notifications"""
results = []
pending_notifications = []
for tool_call in tool_calls:
tool_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
logger.info(f"Tool called: {tool_name} with args: {arguments}")
# Execute the appropriate tool
if tool_name == "record_user_details":
result = self.record_user_details(**arguments)
elif tool_name == "search_github_repos" and self.web_search_service:
topic = arguments.get('topic')
result = self.web_search_service.search_github_repos(topic=topic)
elif tool_name == "get_repo_details" and self.web_search_service:
repo_name = arguments.get('repo_name')
result = self.web_search_service.get_repo_details(repo_name)
elif tool_name == "evaluate_job_match":
result = self.evaluate_job_match(**arguments)
else:
logger.warning(f"Unknown tool called: {tool_name}")
result = {}
# Extract pending notifications
if isinstance(result, dict) and "pending_notification" in result:
pending_notifications.append(result.pop("pending_notification"))
results.append({
"role": "tool",
"content": json.dumps(result),
"tool_call_id": tool_call.id
})
return results, pending_notifications
class CareerChatbot:
"""Main chatbot class that orchestrates the AI assistant"""
def __init__(self, config: ChatbotConfig):
self.config = config
self.openai_client = OpenAI()
# Initialize services
self.notification_service = NotificationService()
self.web_search_service = WebSearchService(github_username=config.github_username) if config.github_username else None
self.document_loader = DocumentLoader()
# Load professional context
self.context = self._load_context()
# Initialize tool registry with context
self.tool_registry = ToolRegistry(self.notification_service, self.web_search_service,
self.openai_client, self.context, self.config)
self.evaluator = Evaluator(self.config, self.context)
self.system_prompt = self._create_system_prompt()
logger.info(f"CareerChatbot initialized for {config.name}")
def _load_context(self) -> Dict[str, str]:
"""Load all professional context documents"""
context = {
"resume": self.document_loader.load_pdf(self.config.resume_path),
"linkedin": self.document_loader.load_pdf(self.config.linkedin_path),
"summary": self.document_loader.load_text(self.config.summary_path)
}
return context
def _create_system_prompt(self) -> str:
"""Create the system prompt for the AI assistant"""
# Prepare GitHub tools context if available
github_tools = ""
if self.web_search_service:
github_tools = (
"You can use the `search_github_repos` tool to find open source projects and repositories. "
"Use the `get_repo_details` tool to get detailed information about specific repositories."
)
vars = {
'config': self.config, # Access as {config.name}, {config.job_match_threshold}
'context': self.context, # Access as {context.summary}, {context.linkedin}, etc.
'github_tools': github_tools # Access as {github_tools} (for conditional content)
}
chat_init_prompt = render('prompts/chat_init.md', vars)
return chat_init_prompt
def chat(self, message: str, history: List[Dict[str, str]], max_retries: int = 3) -> str:
"""Main chat function that processes user messages with evaluation and Lab 3 retry approach"""
logger.info(f"π PROCESSING message: '{message[:50]}...'")
# Generate initial response with tools
messages = [{"role": "system", "content": self.system_prompt}] + history + [{"role": "user", "content": message}]
structured_reply, pending_notifications = self._generate_response_with_tools(messages)
# Safety check - ensure we have a valid structured_reply
if not structured_reply:
logger.error("No structured reply received from _generate_response_with_tools")
return "I apologize, but I'm experiencing technical difficulties. Please try again."
# For evaluation, use the original history (tool results will be detected from tools_used field)
evaluation_history = history
# Systematic evaluation with Lab 3 approach
for attempt in range(max_retries):
try:
# Evaluate the current reply using history that includes tool results
evaluation = self.evaluator.evaluate_structured(structured_reply, message, evaluation_history)
if evaluation.is_acceptable:
logger.info(f"β
PASSED evaluation on attempt {attempt + 1}/{max_retries}\n")
# Send notifications only after successful evaluation
for notification in pending_notifications:
self.tool_registry.notification_service.send(notification)
return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties."
else:
logger.warning(f"β FAILED evaluation on attempt {attempt + 1}/{max_retries}: {evaluation.feedback[:100]}...\n")
# If we haven't exhausted retries, regenerate using Lab 3 rerun approach
if attempt < max_retries - 1:
logger.info("π Regenerating response with feedback...")
# Clear pending notifications from failed attempt
pending_notifications.clear()
new_reply = self.evaluator.rerun(structured_reply.response, message, history, evaluation.feedback)
if new_reply:
structured_reply = new_reply
else:
logger.error("Rerun returned None, keeping original reply")
else:
logger.warning(f"β οΈ Max retries ({max_retries}) reached. Returning final attempt.")
return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties."
except Exception as eval_error:
logger.error(f"Evaluation failed: {eval_error}")
# If evaluation fails, return the response we have
return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties."
return structured_reply.response if structured_reply else "I apologize, but I'm experiencing technical difficulties."
def _generate_response_with_tools(self, messages: List[Dict]) -> tuple[StructuredResponse, List[str]]:
"""Generate response handling tool calls and collect pending notifications"""
done = False
all_pending_notifications = []
while not done:
try:
# Call the LLM with tools and structured output
response = self.openai_client.beta.chat.completions.parse(
model=self.config.model,
messages=messages,
tools=self.tool_registry.tools,
tool_choice="auto",
response_format=StructuredResponse
)
system_fp = getattr(response, "system_fingerprint", None)
logging.debug("CHAT: served_model=%s system_fp=%s", response.model, system_fp)
finish_reason = response.choices[0].finish_reason
# Handle tool calls if needed
if finish_reason == "tool_calls":
message_obj = response.choices[0].message
tool_calls = message_obj.tool_calls
results, pending_notifications = self.tool_registry.handle_tool_calls(tool_calls)
all_pending_notifications.extend(pending_notifications)
messages.append(message_obj)
messages.extend(results)
else:
done = True
return response.choices[0].message.parsed, all_pending_notifications
except Exception as e:
logger.error(f"Structured response parsing failed: {e}")
# Fallback: try without structured output
try:
fallback_response = self.openai_client.chat.completions.create(
model=self.config.model,
messages=messages,
tools=self.tool_registry.tools,
tool_choice="auto"
)
# Create a basic structured response from the fallback
content = fallback_response.choices[0].message.content or "I apologize, but I encountered an error processing your request."
fallback_structured = StructuredResponse(
response=content,
reasoning="Fallback response due to parsing error",
tools_used=[],
facts_used=[]
)
return fallback_structured, all_pending_notifications
except Exception as fallback_error:
logger.error(f"Fallback response also failed: {fallback_error}")
# Ultimate fallback
error_response = StructuredResponse(
response="I apologize, but I'm experiencing technical difficulties. Please try again.",
reasoning="Error handling response",
tools_used=[],
facts_used=[]
)
return error_response, all_pending_notifications
def create_initial_greeting(self) -> str:
"""Create the initial greeting message"""
return f"""π Hello! I'm an AI assistant designed by {self.config.name} and representing them professionally.
I can answer questions about {self.config.name}'s career, experience, and professional background based on their resume and LinkedIn profile.
If you have questions I can't answer from the available information, I'll be happy to arrange for {self.config.name} to respond to you personally via email.
How can I help you today?"""
def launch_interface(self):
"""Launch the Gradio interface"""
# Create chatbot with initial message
chatbot = gr.Chatbot(
value=[
{"role": "assistant", "content": self.create_initial_greeting()}
],
type="messages",
height=700,
show_copy_button=True,
show_copy_all_button=True,
)
# Create and launch the interface
interface = gr.ChatInterface(
self.chat,
type="messages",
chatbot=chatbot,
examples=[
"What is the professional background?",
"What companies has this person worked at?",
"Where did they go to school?",
"What are their main skills?"
],
title=f"{self.config.name}'s AI Assistant"
)
interface.launch()
def main():
"""Main entry point for the application"""
# Load environment variables
load_dotenv(override=True)
# Create configuration
# Extract GitHub username from summary or environment variable
github_username = os.getenv("GITHUB_USERNAME") # Can be set to actual username
config = ChatbotConfig(
name="Amir Nathoo",
github_username=github_username # Set to actual GitHub username if available
)
# Initialize and launch chatbot
chatbot = CareerChatbot(config)
chatbot.launch_interface()
if __name__ == "__main__":
main()
|