Spaces:
Sleeping
Sleeping
Upload 46 files
Browse files- config/skills.yaml +12 -0
- llm_analysis/__init__.py +0 -0
- llm_analysis/__pycache__/__init__.cpython-312.pyc +0 -0
- llm_analysis/__pycache__/langgraph_pipeline.cpython-312.pyc +0 -0
- llm_analysis/__pycache__/langsmith_logger.cpython-312.pyc +0 -0
- llm_analysis/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- llm_analysis/__pycache__/prompt_templates.cpython-312.pyc +0 -0
- llm_analysis/langgraph_pipeline.py +225 -0
- llm_analysis/langsmith_logger.py +265 -0
- llm_analysis/llm_analyzer.py +170 -0
- llm_analysis/prompt_templates.py +83 -0
- logs/langsmith_metrics.jsonl +57 -0
- logs/langsmith_traces.jsonl +120 -0
- matchers/__init__.py +0 -0
- matchers/__pycache__/__init__.cpython-312.pyc +0 -0
- matchers/__pycache__/final_scorer.cpython-312.pyc +0 -0
- matchers/__pycache__/hard_matcher.cpython-312.pyc +0 -0
- matchers/__pycache__/semantic_matcher.cpython-312.pyc +0 -0
- matchers/entity_extractor.py +160 -0
- matchers/final_scorer.py +73 -0
- matchers/fuzzy_matcher.py +117 -0
- matchers/hard_matcher.py +47 -0
- matchers/semantic_matcher.py +37 -0
- parsers/__iniy__.py +0 -0
- parsers/__pycache__/cleaner.cpython-312.pyc +0 -0
- parsers/__pycache__/docx_parser.cpython-312.pyc +0 -0
- parsers/__pycache__/jd_parser.cpython-312.pyc +0 -0
- parsers/__pycache__/job_requirement_parser.cpython-312.pyc +0 -0
- parsers/__pycache__/pdf_parser.cpython-312.pyc +0 -0
- parsers/__pycache__/section_splitter.cpython-312.pyc +0 -0
- parsers/__pycache__/skill_extractor.cpython-312.pyc +0 -0
- parsers/__pycache__/skills_list.cpython-312.pyc +0 -0
- parsers/__pycache__/smart_skill_extractor.cpython-312.pyc +0 -0
- parsers/cleaner.py +7 -0
- parsers/docx_parser.py +5 -0
- parsers/entity_extractor.py +33 -0
- parsers/jd_parser.py +20 -0
- parsers/job_requirement_parser.py +449 -0
- parsers/pdf_parser.py +25 -0
- parsers/section_splitter.py +71 -0
- parsers/skill_extractor.py +64 -0
- parsers/skills_list.py +35 -0
- parsers/smart_skill_extractor.py +244 -0
- parsers/universal_parser.py +144 -0
- scoring/__pycache__/relevance_scorer.cpython-312.pyc +0 -0
- scoring/relevance_scorer.py +314 -0
config/skills.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
skills:
|
| 2 |
+
- python
|
| 3 |
+
- java
|
| 4 |
+
- c++
|
| 5 |
+
- sql
|
| 6 |
+
- aws
|
| 7 |
+
- docker
|
| 8 |
+
- kubernetes
|
| 9 |
+
- tensorflow
|
| 10 |
+
- pytorch
|
| 11 |
+
- react
|
| 12 |
+
- node.js
|
llm_analysis/__init__.py
ADDED
|
File without changes
|
llm_analysis/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (154 Bytes). View file
|
|
|
llm_analysis/__pycache__/langgraph_pipeline.cpython-312.pyc
ADDED
|
Binary file (9.35 kB). View file
|
|
|
llm_analysis/__pycache__/langsmith_logger.cpython-312.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|
llm_analysis/__pycache__/llm_analyzer.cpython-312.pyc
ADDED
|
Binary file (8.66 kB). View file
|
|
|
llm_analysis/__pycache__/prompt_templates.cpython-312.pyc
ADDED
|
Binary file (3.76 kB). View file
|
|
|
llm_analysis/langgraph_pipeline.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llm_analysis/langgraph_pipeline.py - Structured Analysis Pipeline
|
| 2 |
+
from langgraph.graph import StateGraph, END
|
| 3 |
+
from typing import Dict, List, TypedDict
|
| 4 |
+
import json
|
| 5 |
+
from llm_analysis.llm_analyzer import LLMResumeAnalyzer
|
| 6 |
+
|
| 7 |
+
class AnalysisState(TypedDict):
|
| 8 |
+
"""State object for the analysis pipeline"""
|
| 9 |
+
resume_text: str
|
| 10 |
+
jd_text: str
|
| 11 |
+
basic_scores: Dict
|
| 12 |
+
enhanced_skills: Dict
|
| 13 |
+
llm_analysis: Dict
|
| 14 |
+
improvement_roadmap: Dict
|
| 15 |
+
final_result: Dict
|
| 16 |
+
current_step: str
|
| 17 |
+
errors: List[str]
|
| 18 |
+
|
| 19 |
+
class ResumeAnalysisPipeline:
|
| 20 |
+
"""LangGraph-powered structured analysis pipeline"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, model="x-ai/grok-4-fast:free"):
|
| 23 |
+
self.llm_analyzer = LLMResumeAnalyzer(model=model)
|
| 24 |
+
self.graph = self._create_pipeline()
|
| 25 |
+
print("✅ LangGraph pipeline initialized")
|
| 26 |
+
|
| 27 |
+
def _create_pipeline(self):
|
| 28 |
+
"""Create the structured analysis pipeline"""
|
| 29 |
+
|
| 30 |
+
# Define the workflow graph
|
| 31 |
+
workflow = StateGraph(AnalysisState)
|
| 32 |
+
|
| 33 |
+
# Add nodes (analysis steps)
|
| 34 |
+
workflow.add_node("skills_extraction", self._extract_skills_node)
|
| 35 |
+
workflow.add_node("llm_analysis", self._llm_analysis_node)
|
| 36 |
+
workflow.add_node("roadmap_generation", self._roadmap_generation_node)
|
| 37 |
+
workflow.add_node("final_compilation", self._final_compilation_node)
|
| 38 |
+
workflow.add_node("error_handler", self._error_handler_node)
|
| 39 |
+
|
| 40 |
+
# Define the flow
|
| 41 |
+
workflow.set_entry_point("skills_extraction")
|
| 42 |
+
|
| 43 |
+
# Add edges (flow control)
|
| 44 |
+
workflow.add_edge("skills_extraction", "llm_analysis")
|
| 45 |
+
workflow.add_edge("llm_analysis", "roadmap_generation")
|
| 46 |
+
workflow.add_edge("roadmap_generation", "final_compilation")
|
| 47 |
+
workflow.add_edge("final_compilation", END)
|
| 48 |
+
workflow.add_edge("error_handler", END)
|
| 49 |
+
|
| 50 |
+
# Add conditional edges for error handling
|
| 51 |
+
workflow.add_conditional_edges(
|
| 52 |
+
"skills_extraction",
|
| 53 |
+
self._should_continue,
|
| 54 |
+
{
|
| 55 |
+
"continue": "llm_analysis",
|
| 56 |
+
"error": "error_handler"
|
| 57 |
+
}
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
workflow.add_conditional_edges(
|
| 61 |
+
"llm_analysis",
|
| 62 |
+
self._should_continue,
|
| 63 |
+
{
|
| 64 |
+
"continue": "roadmap_generation",
|
| 65 |
+
"error": "error_handler"
|
| 66 |
+
}
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
return workflow.compile()
|
| 70 |
+
|
| 71 |
+
def _should_continue(self, state: AnalysisState) -> str:
|
| 72 |
+
"""Decide whether to continue or handle errors"""
|
| 73 |
+
if state.get("errors"):
|
| 74 |
+
return "error"
|
| 75 |
+
return "continue"
|
| 76 |
+
|
| 77 |
+
def _extract_skills_node(self, state: AnalysisState) -> AnalysisState:
|
| 78 |
+
"""Node 1: Enhanced skills extraction"""
|
| 79 |
+
try:
|
| 80 |
+
state["current_step"] = "skills_extraction"
|
| 81 |
+
print("🔍 LangGraph: Extracting skills...")
|
| 82 |
+
|
| 83 |
+
# Enhanced skills extraction
|
| 84 |
+
enhanced_skills = self.llm_analyzer.enhance_skills_extraction(state["resume_text"])
|
| 85 |
+
state["enhanced_skills"] = enhanced_skills
|
| 86 |
+
|
| 87 |
+
print("✅ LangGraph: Skills extraction completed")
|
| 88 |
+
return state
|
| 89 |
+
|
| 90 |
+
except Exception as e:
|
| 91 |
+
state["errors"].append(f"Skills extraction failed: {str(e)}")
|
| 92 |
+
return state
|
| 93 |
+
|
| 94 |
+
def _llm_analysis_node(self, state: AnalysisState) -> AnalysisState:
|
| 95 |
+
"""Node 2: LLM analysis"""
|
| 96 |
+
try:
|
| 97 |
+
state["current_step"] = "llm_analysis"
|
| 98 |
+
print("🧠 LangGraph: Running LLM analysis...")
|
| 99 |
+
|
| 100 |
+
# LLM analysis
|
| 101 |
+
llm_analysis = self.llm_analyzer.analyze_resume_vs_jd(
|
| 102 |
+
state["resume_text"],
|
| 103 |
+
state["jd_text"],
|
| 104 |
+
state["basic_scores"]
|
| 105 |
+
)
|
| 106 |
+
state["llm_analysis"] = llm_analysis
|
| 107 |
+
|
| 108 |
+
print("✅ LangGraph: LLM analysis completed")
|
| 109 |
+
return state
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
state["errors"].append(f"LLM analysis failed: {str(e)}")
|
| 113 |
+
return state
|
| 114 |
+
|
| 115 |
+
def _roadmap_generation_node(self, state: AnalysisState) -> AnalysisState:
|
| 116 |
+
"""Node 3: Improvement roadmap generation"""
|
| 117 |
+
try:
|
| 118 |
+
state["current_step"] = "roadmap_generation"
|
| 119 |
+
print("🗺️ LangGraph: Generating improvement roadmap...")
|
| 120 |
+
|
| 121 |
+
# Generate roadmap
|
| 122 |
+
roadmap = self.llm_analyzer.generate_improvement_roadmap(state["llm_analysis"])
|
| 123 |
+
state["improvement_roadmap"] = roadmap
|
| 124 |
+
|
| 125 |
+
print("✅ LangGraph: Roadmap generation completed")
|
| 126 |
+
return state
|
| 127 |
+
|
| 128 |
+
except Exception as e:
|
| 129 |
+
state["errors"].append(f"Roadmap generation failed: {str(e)}")
|
| 130 |
+
return state
|
| 131 |
+
|
| 132 |
+
def _final_compilation_node(self, state: AnalysisState) -> AnalysisState:
|
| 133 |
+
"""Node 4: Final result compilation"""
|
| 134 |
+
try:
|
| 135 |
+
state["current_step"] = "final_compilation"
|
| 136 |
+
print("📊 LangGraph: Compiling final results...")
|
| 137 |
+
|
| 138 |
+
# Compile final result
|
| 139 |
+
final_result = {
|
| 140 |
+
"basic_scores": state["basic_scores"],
|
| 141 |
+
"enhanced_skills": state["enhanced_skills"],
|
| 142 |
+
"llm_analysis": state["llm_analysis"],
|
| 143 |
+
"improvement_roadmap": state["improvement_roadmap"],
|
| 144 |
+
"pipeline_status": "completed",
|
| 145 |
+
"processing_steps": ["skills_extraction", "llm_analysis", "roadmap_generation", "compilation"]
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
state["final_result"] = final_result
|
| 149 |
+
print("✅ LangGraph: Pipeline completed successfully")
|
| 150 |
+
return state
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
state["errors"].append(f"Final compilation failed: {str(e)}")
|
| 154 |
+
return state
|
| 155 |
+
|
| 156 |
+
def _error_handler_node(self, state: AnalysisState) -> AnalysisState:
|
| 157 |
+
"""Error handling node"""
|
| 158 |
+
print(f"❌ LangGraph: Handling errors - {len(state['errors'])} error(s)")
|
| 159 |
+
|
| 160 |
+
state["final_result"] = {
|
| 161 |
+
"pipeline_status": "failed",
|
| 162 |
+
"errors": state["errors"],
|
| 163 |
+
"last_successful_step": state.get("current_step", "unknown"),
|
| 164 |
+
"partial_results": {
|
| 165 |
+
"basic_scores": state.get("basic_scores", {}),
|
| 166 |
+
"enhanced_skills": state.get("enhanced_skills", {}),
|
| 167 |
+
"llm_analysis": state.get("llm_analysis", {}),
|
| 168 |
+
"improvement_roadmap": state.get("improvement_roadmap", {})
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
return state
|
| 172 |
+
|
| 173 |
+
def run_structured_analysis(self, resume_text: str, jd_text: str, basic_scores: Dict) -> Dict:
|
| 174 |
+
"""Run the complete structured analysis pipeline"""
|
| 175 |
+
print("🚀 Starting LangGraph structured analysis pipeline...")
|
| 176 |
+
|
| 177 |
+
# Initialize state
|
| 178 |
+
initial_state = AnalysisState(
|
| 179 |
+
resume_text=resume_text,
|
| 180 |
+
jd_text=jd_text,
|
| 181 |
+
basic_scores=basic_scores,
|
| 182 |
+
enhanced_skills={},
|
| 183 |
+
llm_analysis={},
|
| 184 |
+
improvement_roadmap={},
|
| 185 |
+
final_result={},
|
| 186 |
+
current_step="initializing",
|
| 187 |
+
errors=[]
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Run the pipeline
|
| 191 |
+
try:
|
| 192 |
+
final_state = self.graph.invoke(initial_state)
|
| 193 |
+
|
| 194 |
+
print("✅ LangGraph pipeline execution completed")
|
| 195 |
+
return final_state["final_result"]
|
| 196 |
+
|
| 197 |
+
except Exception as e:
|
| 198 |
+
print(f"❌ LangGraph pipeline failed: {e}")
|
| 199 |
+
return {
|
| 200 |
+
"pipeline_status": "critical_failure",
|
| 201 |
+
"error": str(e),
|
| 202 |
+
"basic_scores": basic_scores
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
# Test function
|
| 206 |
+
def test_langgraph_pipeline():
|
| 207 |
+
"""Test the LangGraph pipeline"""
|
| 208 |
+
pipeline = ResumeAnalysisPipeline()
|
| 209 |
+
|
| 210 |
+
sample_resume = "Python developer with React experience"
|
| 211 |
+
sample_jd = "Looking for Python developer with React skills"
|
| 212 |
+
sample_basic_scores = {
|
| 213 |
+
"score": 75,
|
| 214 |
+
"matched_skills": ["python", "react"],
|
| 215 |
+
"missing_skills": ["docker"],
|
| 216 |
+
"matched_count": 2,
|
| 217 |
+
"total_jd_skills": 3
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
result = pipeline.run_structured_analysis(sample_resume, sample_jd, sample_basic_scores)
|
| 221 |
+
print(f"✅ LangGraph test completed: {result.get('pipeline_status', 'unknown')}")
|
| 222 |
+
return result.get('pipeline_status') == 'completed'
|
| 223 |
+
|
| 224 |
+
if __name__ == "__main__":
|
| 225 |
+
test_langgraph_pipeline()
|
llm_analysis/langsmith_logger.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llm_analysis/langsmith_logger.py - LangSmith Observability & Debugging
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from typing import Dict, Any, Optional
|
| 6 |
+
import uuid
|
| 7 |
+
|
| 8 |
+
# Note: LangSmith requires API key for full functionality
|
| 9 |
+
# For hackathon demo, we'll create a local logging system that mimics LangSmith
|
| 10 |
+
|
| 11 |
+
class LangSmithLogger:
|
| 12 |
+
"""LangSmith-style logging and observability for LLM chains"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, project_name="resume-relevance-system"):
|
| 15 |
+
self.project_name = project_name
|
| 16 |
+
self.session_id = str(uuid.uuid4())
|
| 17 |
+
self.logs_dir = "logs"
|
| 18 |
+
os.makedirs(self.logs_dir, exist_ok=True)
|
| 19 |
+
|
| 20 |
+
# Initialize log files
|
| 21 |
+
self.trace_log = f"{self.logs_dir}/langsmith_traces.jsonl"
|
| 22 |
+
self.metrics_log = f"{self.logs_dir}/langsmith_metrics.jsonl"
|
| 23 |
+
|
| 24 |
+
print(f"✅ LangSmith Logger initialized - Project: {project_name}")
|
| 25 |
+
print(f"📊 Session ID: {self.session_id}")
|
| 26 |
+
|
| 27 |
+
def start_trace(self, trace_name: str, inputs: Dict[str, Any]) -> str:
|
| 28 |
+
"""Start a new trace for an LLM chain"""
|
| 29 |
+
trace_id = str(uuid.uuid4())
|
| 30 |
+
|
| 31 |
+
trace_start = {
|
| 32 |
+
"trace_id": trace_id,
|
| 33 |
+
"session_id": self.session_id,
|
| 34 |
+
"project_name": self.project_name,
|
| 35 |
+
"trace_name": trace_name,
|
| 36 |
+
"start_time": datetime.utcnow().isoformat(),
|
| 37 |
+
"inputs": inputs,
|
| 38 |
+
"status": "started",
|
| 39 |
+
"type": "trace_start"
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
self._log_event(trace_start, self.trace_log)
|
| 43 |
+
print(f"🔍 LangSmith: Started trace '{trace_name}' - ID: {trace_id[:8]}...")
|
| 44 |
+
return trace_id
|
| 45 |
+
|
| 46 |
+
def end_trace(self, trace_id: str, outputs: Dict[str, Any],
|
| 47 |
+
status: str = "success", error: Optional[str] = None,
|
| 48 |
+
token_usage: Optional[Dict] = None):
|
| 49 |
+
"""End a trace with results"""
|
| 50 |
+
|
| 51 |
+
trace_end = {
|
| 52 |
+
"trace_id": trace_id,
|
| 53 |
+
"session_id": self.session_id,
|
| 54 |
+
"end_time": datetime.utcnow().isoformat(),
|
| 55 |
+
"outputs": outputs,
|
| 56 |
+
"status": status,
|
| 57 |
+
"error": error,
|
| 58 |
+
"token_usage": token_usage or {},
|
| 59 |
+
"type": "trace_end"
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
self._log_event(trace_end, self.trace_log)
|
| 63 |
+
status_emoji = "✅" if status == "success" else "❌"
|
| 64 |
+
print(f"{status_emoji} LangSmith: Ended trace {trace_id[:8]}... - Status: {status}")
|
| 65 |
+
|
| 66 |
+
def log_llm_call(self, trace_id: str, step_name: str,
|
| 67 |
+
prompt: str, response: str, model: str,
|
| 68 |
+
latency_ms: float, token_usage: Optional[Dict] = None):
|
| 69 |
+
"""Log an individual LLM call within a trace"""
|
| 70 |
+
|
| 71 |
+
llm_call = {
|
| 72 |
+
"trace_id": trace_id,
|
| 73 |
+
"step_name": step_name,
|
| 74 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 75 |
+
"model": model,
|
| 76 |
+
"prompt": prompt[:500] + "..." if len(prompt) > 500 else prompt, # Truncate long prompts
|
| 77 |
+
"response": response[:500] + "..." if len(response) > 500 else response,
|
| 78 |
+
"latency_ms": latency_ms,
|
| 79 |
+
"token_usage": token_usage or {},
|
| 80 |
+
"type": "llm_call"
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
self._log_event(llm_call, self.trace_log)
|
| 84 |
+
print(f"🤖 LangSmith: LLM call logged - {step_name} ({latency_ms:.1f}ms)")
|
| 85 |
+
|
| 86 |
+
def log_metrics(self, metrics: Dict[str, Any]):
|
| 87 |
+
"""Log performance metrics"""
|
| 88 |
+
|
| 89 |
+
metric_entry = {
|
| 90 |
+
"session_id": self.session_id,
|
| 91 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 92 |
+
"metrics": metrics,
|
| 93 |
+
"type": "metrics"
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
self._log_event(metric_entry, self.metrics_log)
|
| 97 |
+
print(f"📊 LangSmith: Metrics logged - {list(metrics.keys())}")
|
| 98 |
+
|
| 99 |
+
def log_evaluation(self, trace_id: str, evaluation_results: Dict[str, Any]):
|
| 100 |
+
"""Log evaluation results for testing and debugging"""
|
| 101 |
+
|
| 102 |
+
evaluation = {
|
| 103 |
+
"trace_id": trace_id,
|
| 104 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 105 |
+
"evaluation_results": evaluation_results,
|
| 106 |
+
"type": "evaluation"
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
self._log_event(evaluation, self.trace_log)
|
| 110 |
+
print(f"🧪 LangSmith: Evaluation logged for trace {trace_id[:8]}...")
|
| 111 |
+
|
| 112 |
+
def _log_event(self, event: Dict[str, Any], log_file: str):
|
| 113 |
+
"""Write event to log file"""
|
| 114 |
+
try:
|
| 115 |
+
with open(log_file, 'a', encoding='utf-8') as f:
|
| 116 |
+
f.write(json.dumps(event) + '\n')
|
| 117 |
+
except Exception as e:
|
| 118 |
+
print(f"⚠️ LangSmith: Failed to write log - {e}")
|
| 119 |
+
|
| 120 |
+
def get_session_summary(self) -> Dict[str, Any]:
|
| 121 |
+
"""Get summary of current session"""
|
| 122 |
+
try:
|
| 123 |
+
traces = []
|
| 124 |
+
metrics = []
|
| 125 |
+
|
| 126 |
+
# Read trace logs
|
| 127 |
+
if os.path.exists(self.trace_log):
|
| 128 |
+
with open(self.trace_log, 'r', encoding='utf-8') as f:
|
| 129 |
+
for line in f:
|
| 130 |
+
if line.strip():
|
| 131 |
+
event = json.loads(line.strip())
|
| 132 |
+
if event.get("session_id") == self.session_id:
|
| 133 |
+
if event.get("type") == "trace_start":
|
| 134 |
+
traces.append(event)
|
| 135 |
+
|
| 136 |
+
# Read metrics logs
|
| 137 |
+
if os.path.exists(self.metrics_log):
|
| 138 |
+
with open(self.metrics_log, 'r', encoding='utf-8') as f:
|
| 139 |
+
for line in f:
|
| 140 |
+
if line.strip():
|
| 141 |
+
event = json.loads(line.strip())
|
| 142 |
+
if event.get("session_id") == self.session_id:
|
| 143 |
+
metrics.append(event)
|
| 144 |
+
|
| 145 |
+
return {
|
| 146 |
+
"session_id": self.session_id,
|
| 147 |
+
"project_name": self.project_name,
|
| 148 |
+
"total_traces": len(traces),
|
| 149 |
+
"total_metrics": len(metrics),
|
| 150 |
+
"traces": traces[-5:], # Last 5 traces
|
| 151 |
+
"metrics": metrics[-5:] # Last 5 metrics
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
except Exception as e:
|
| 155 |
+
print(f"⚠️ LangSmith: Failed to get session summary - {e}")
|
| 156 |
+
return {"error": str(e)}
|
| 157 |
+
|
| 158 |
+
def export_session_data(self, filename: Optional[str] = None) -> str:
|
| 159 |
+
"""Export session data for analysis"""
|
| 160 |
+
if not filename:
|
| 161 |
+
filename = f"{self.logs_dir}/session_{self.session_id[:8]}_export.json"
|
| 162 |
+
|
| 163 |
+
summary = self.get_session_summary()
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
| 167 |
+
json.dump(summary, f, indent=2)
|
| 168 |
+
|
| 169 |
+
print(f"📁 LangSmith: Session data exported to {filename}")
|
| 170 |
+
return filename
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f"❌ LangSmith: Export failed - {e}")
|
| 174 |
+
return ""
|
| 175 |
+
|
| 176 |
+
# Global logger instance
|
| 177 |
+
logger = LangSmithLogger()
|
| 178 |
+
|
| 179 |
+
def trace_llm_analysis(func):
|
| 180 |
+
"""Decorator to trace LLM analysis functions"""
|
| 181 |
+
def wrapper(*args, **kwargs):
|
| 182 |
+
# Start trace
|
| 183 |
+
trace_id = logger.start_trace(
|
| 184 |
+
func.__name__,
|
| 185 |
+
{"args_count": len(args), "kwargs": list(kwargs.keys())}
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
start_time = datetime.utcnow()
|
| 189 |
+
|
| 190 |
+
try:
|
| 191 |
+
# Execute function
|
| 192 |
+
result = func(*args, **kwargs)
|
| 193 |
+
|
| 194 |
+
# Calculate metrics
|
| 195 |
+
end_time = datetime.utcnow()
|
| 196 |
+
latency = (end_time - start_time).total_seconds() * 1000
|
| 197 |
+
|
| 198 |
+
# End trace
|
| 199 |
+
logger.end_trace(
|
| 200 |
+
trace_id,
|
| 201 |
+
{"result_type": type(result).__name__},
|
| 202 |
+
"success"
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Log metrics
|
| 206 |
+
logger.log_metrics({
|
| 207 |
+
"function": func.__name__,
|
| 208 |
+
"latency_ms": latency,
|
| 209 |
+
"success": True
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
return result
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
# Log error
|
| 216 |
+
logger.end_trace(
|
| 217 |
+
trace_id,
|
| 218 |
+
{},
|
| 219 |
+
"error",
|
| 220 |
+
str(e)
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
logger.log_metrics({
|
| 224 |
+
"function": func.__name__,
|
| 225 |
+
"success": False,
|
| 226 |
+
"error": str(e)
|
| 227 |
+
})
|
| 228 |
+
|
| 229 |
+
raise e
|
| 230 |
+
|
| 231 |
+
return wrapper
|
| 232 |
+
|
| 233 |
+
# Test function
|
| 234 |
+
def test_langsmith_logging():
|
| 235 |
+
"""Test LangSmith logging functionality"""
|
| 236 |
+
|
| 237 |
+
# Test trace
|
| 238 |
+
trace_id = logger.start_trace("test_analysis", {"test": True})
|
| 239 |
+
|
| 240 |
+
logger.log_llm_call(
|
| 241 |
+
trace_id,
|
| 242 |
+
"test_llm_call",
|
| 243 |
+
"Test prompt",
|
| 244 |
+
"Test response",
|
| 245 |
+
"grok-4-fast",
|
| 246 |
+
150.5,
|
| 247 |
+
{"tokens": 100}
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
logger.end_trace(trace_id, {"test_result": "success"}, "success")
|
| 251 |
+
|
| 252 |
+
# Test metrics
|
| 253 |
+
logger.log_metrics({
|
| 254 |
+
"test_metric": 95.5,
|
| 255 |
+
"accuracy": 0.85
|
| 256 |
+
})
|
| 257 |
+
|
| 258 |
+
# Get summary
|
| 259 |
+
summary = logger.get_session_summary()
|
| 260 |
+
print(f"✅ LangSmith test completed - {summary['total_traces']} traces logged")
|
| 261 |
+
|
| 262 |
+
return summary['total_traces'] > 0
|
| 263 |
+
|
| 264 |
+
if __name__ == "__main__":
|
| 265 |
+
test_langsmith_logging()
|
llm_analysis/llm_analyzer.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llm_analysis/llm_analyzer.py
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from langchain_openai import ChatOpenAI
|
| 6 |
+
from langchain.prompts import ChatPromptTemplate
|
| 7 |
+
from llm_analysis.prompt_templates import (
|
| 8 |
+
RESUME_ANALYSIS_PROMPT,
|
| 9 |
+
IMPROVEMENT_ROADMAP_PROMPT,
|
| 10 |
+
SKILLS_ENHANCEMENT_PROMPT
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
class LLMResumeAnalyzer:
|
| 16 |
+
def __init__(self, model=None):
|
| 17 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 18 |
+
if not api_key:
|
| 19 |
+
raise ValueError("❌ OPENAI_API_KEY not found in .env file")
|
| 20 |
+
|
| 21 |
+
# Use the provided model, or fall back to environment variable/default
|
| 22 |
+
llm_model = model or os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
|
| 23 |
+
|
| 24 |
+
self.llm = ChatOpenAI(
|
| 25 |
+
model=llm_model,
|
| 26 |
+
temperature=0.2, # Low for consistency
|
| 27 |
+
api_key=api_key
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
print(f"✅ LLM Analyzer initialized successfully with model: {llm_model}")
|
| 31 |
+
|
| 32 |
+
def analyze_resume_vs_jd(self, resume_text, jd_text, keyword_match_data):
|
| 33 |
+
"""Comprehensive LLM-powered resume analysis"""
|
| 34 |
+
print("🤖 Running LLM analysis...")
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 38 |
+
("system", "You are an expert HR recruiter and technical hiring manager."),
|
| 39 |
+
("human", RESUME_ANALYSIS_PROMPT)
|
| 40 |
+
])
|
| 41 |
+
|
| 42 |
+
chain = prompt | self.llm
|
| 43 |
+
|
| 44 |
+
response = chain.invoke({
|
| 45 |
+
"resume_text": resume_text[:3000], # Truncate to avoid token limits
|
| 46 |
+
"jd_text": jd_text[:2000],
|
| 47 |
+
"matched_count": keyword_match_data.get("matched_count", 0),
|
| 48 |
+
"total_skills": keyword_match_data.get("total_jd_skills", 0),
|
| 49 |
+
"matched_skills": ", ".join(keyword_match_data.get("matched_skills", [])),
|
| 50 |
+
"missing_skills": ", ".join(keyword_match_data.get("missing_skills", [])),
|
| 51 |
+
"coverage_percentage": keyword_match_data.get("score", 0)
|
| 52 |
+
})
|
| 53 |
+
|
| 54 |
+
# Parse JSON response
|
| 55 |
+
analysis = json.loads(response.content)
|
| 56 |
+
print("✅ LLM analysis completed successfully")
|
| 57 |
+
return analysis
|
| 58 |
+
|
| 59 |
+
except json.JSONDecodeError as e:
|
| 60 |
+
print(f"⚠️ JSON parsing error: {e}")
|
| 61 |
+
return self._create_fallback_analysis(keyword_match_data)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"❌ LLM analysis error: {e}")
|
| 64 |
+
return self._create_fallback_analysis(keyword_match_data)
|
| 65 |
+
|
| 66 |
+
def generate_improvement_roadmap(self, analysis_results):
|
| 67 |
+
"""Generate detailed improvement roadmap"""
|
| 68 |
+
print("🗺️ Generating improvement roadmap...")
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 72 |
+
("system", "You are a career coach specializing in tech careers."),
|
| 73 |
+
("human", IMPROVEMENT_ROADMAP_PROMPT)
|
| 74 |
+
])
|
| 75 |
+
|
| 76 |
+
chain = prompt | self.llm
|
| 77 |
+
|
| 78 |
+
response = chain.invoke({
|
| 79 |
+
"analysis_results": json.dumps(analysis_results, indent=2)
|
| 80 |
+
})
|
| 81 |
+
|
| 82 |
+
roadmap = json.loads(response.content)
|
| 83 |
+
print("✅ Improvement roadmap generated successfully")
|
| 84 |
+
return roadmap
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"❌ Roadmap generation error: {e}")
|
| 88 |
+
return self._create_fallback_roadmap()
|
| 89 |
+
|
| 90 |
+
def enhance_skills_extraction(self, text):
|
| 91 |
+
"""Use LLM to extract and categorize skills more intelligently"""
|
| 92 |
+
print("🧠 Enhancing skills extraction with LLM...")
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 96 |
+
("system", "You are a technical skills extraction specialist."),
|
| 97 |
+
("human", SKILLS_ENHANCEMENT_PROMPT)
|
| 98 |
+
])
|
| 99 |
+
|
| 100 |
+
chain = prompt | self.llm
|
| 101 |
+
|
| 102 |
+
response = chain.invoke({
|
| 103 |
+
"text": text[:2000] # Truncate to avoid token limits
|
| 104 |
+
})
|
| 105 |
+
|
| 106 |
+
skills_data = json.loads(response.content)
|
| 107 |
+
print("✅ Skills enhancement completed")
|
| 108 |
+
return skills_data
|
| 109 |
+
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"❌ Skills enhancement error: {e}")
|
| 112 |
+
return {"all_technical_skills": [], "error": str(e)}
|
| 113 |
+
|
| 114 |
+
def _create_fallback_analysis(self, keyword_data):
|
| 115 |
+
"""Fallback analysis when LLM fails"""
|
| 116 |
+
return {
|
| 117 |
+
"overall_fit_score": max(1, int(keyword_data.get("score", 0) / 10)),
|
| 118 |
+
"experience_alignment": "Unable to assess - manual review needed",
|
| 119 |
+
"key_strengths": ["Technical skills present in resume"],
|
| 120 |
+
"critical_gaps": keyword_data.get("missing_skills", [])[:3],
|
| 121 |
+
"role_suitability": "Medium - based on keyword match only",
|
| 122 |
+
"improvement_suggestions": ["Add missing technical skills", "Improve resume formatting"],
|
| 123 |
+
"recommended_skills_to_learn": keyword_data.get("missing_skills", [])[:3],
|
| 124 |
+
"project_recommendations": ["Build projects showcasing missing skills"],
|
| 125 |
+
"certification_suggestions": ["Relevant industry certifications"],
|
| 126 |
+
"interview_readiness": "Moderate preparation needed",
|
| 127 |
+
"salary_expectations": "Market standard for skill level",
|
| 128 |
+
"final_verdict": "Automated analysis only - requires manual review"
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
def _create_fallback_roadmap(self):
|
| 132 |
+
"""Fallback roadmap when LLM fails"""
|
| 133 |
+
return {
|
| 134 |
+
"immediate_actions": ["Update resume with missing skills", "Clean up resume formatting"],
|
| 135 |
+
"week_1_plan": ["Research missing skills", "Start online tutorials"],
|
| 136 |
+
"month_1_plan": ["Complete beginner courses", "Build first project"],
|
| 137 |
+
"month_3_plan": ["Build portfolio", "Apply for relevant positions"],
|
| 138 |
+
"priority_skills": ["As identified in job description"],
|
| 139 |
+
"learning_resources": {
|
| 140 |
+
"free_courses": ["freeCodeCamp", "Coursera free courses"],
|
| 141 |
+
"paid_courses": ["Udemy", "Pluralsight"],
|
| 142 |
+
"books": ["Technical books for identified skills"],
|
| 143 |
+
"practice_platforms": ["LeetCode", "HackerRank"]
|
| 144 |
+
},
|
| 145 |
+
"portfolio_improvements": ["Build 2-3 projects showcasing skills"],
|
| 146 |
+
"networking_suggestions": ["Join LinkedIn groups", "Attend tech meetups"],
|
| 147 |
+
"quick_wins": ["Update LinkedIn profile", "Get recommendations"],
|
| 148 |
+
"estimated_timeline": "3-6 months for significant improvement"
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
# Test LLM connectivity
|
| 152 |
+
def test_llm_connection():
|
| 153 |
+
"""Test if LLM is working"""
|
| 154 |
+
try:
|
| 155 |
+
analyzer = LLMResumeAnalyzer()
|
| 156 |
+
print("🧪 Testing LLM connection...")
|
| 157 |
+
|
| 158 |
+
# Simple test
|
| 159 |
+
result = analyzer.llm.invoke("Say 'Hello, LLM is working!' in JSON format: {\"status\": \"working\", \"message\": \"Hello, LLM is working!\"}")
|
| 160 |
+
test_response = json.loads(result.content)
|
| 161 |
+
|
| 162 |
+
print(f"✅ LLM Test Result: {test_response}")
|
| 163 |
+
return True
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"❌ LLM Test Failed: {e}")
|
| 167 |
+
return False
|
| 168 |
+
|
| 169 |
+
if __name__ == "__main__":
|
| 170 |
+
test_llm_connection()
|
llm_analysis/prompt_templates.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llm_analysis/prompt_templates.py
|
| 2 |
+
|
| 3 |
+
RESUME_ANALYSIS_PROMPT = """You are an expert HR recruiter analyzing resumes against job descriptions.
|
| 4 |
+
|
| 5 |
+
RESUME:
|
| 6 |
+
{resume_text}
|
| 7 |
+
|
| 8 |
+
JOB DESCRIPTION:
|
| 9 |
+
{jd_text}
|
| 10 |
+
|
| 11 |
+
KEYWORD MATCH ANALYSIS:
|
| 12 |
+
- Matched Skills ({matched_count}/{total_skills}): {matched_skills}
|
| 13 |
+
- Missing Skills: {missing_skills}
|
| 14 |
+
- Coverage: {coverage_percentage}%
|
| 15 |
+
|
| 16 |
+
Please provide a comprehensive analysis in JSON format:
|
| 17 |
+
{{
|
| 18 |
+
"overall_fit_score": <0-10 integer>,
|
| 19 |
+
"experience_alignment": "<brief assessment of experience match>",
|
| 20 |
+
"key_strengths": ["<strength1>", "<strength2>", "<strength3>"],
|
| 21 |
+
"critical_gaps": ["<gap1>", "<gap2>", "<gap3>"],
|
| 22 |
+
"role_suitability": "<High/Medium/Low with reasoning>",
|
| 23 |
+
"improvement_suggestions": ["<actionable suggestion1>", "<actionable suggestion2>"],
|
| 24 |
+
"recommended_skills_to_learn": ["<skill1>", "<skill2>", "<skill3>"],
|
| 25 |
+
"project_recommendations": ["<project idea1>", "<project idea2>"],
|
| 26 |
+
"certification_suggestions": ["<cert1>", "<cert2>"],
|
| 27 |
+
"interview_readiness": "<assessment of interview preparation needed>",
|
| 28 |
+
"salary_expectations": "<realistic salary range assessment>",
|
| 29 |
+
"final_verdict": "<detailed reasoning for recommendation>"
|
| 30 |
+
}}
|
| 31 |
+
|
| 32 |
+
Focus on being practical, specific, and actionable in your recommendations."""
|
| 33 |
+
|
| 34 |
+
IMPROVEMENT_ROADMAP_PROMPT = """Based on this resume analysis, create a detailed improvement roadmap for the candidate.
|
| 35 |
+
|
| 36 |
+
ANALYSIS RESULTS:
|
| 37 |
+
{analysis_results}
|
| 38 |
+
|
| 39 |
+
Create a structured improvement plan in JSON format:
|
| 40 |
+
{{
|
| 41 |
+
"immediate_actions": ["<action that can be done today>", "<another immediate action>"],
|
| 42 |
+
"week_1_plan": ["<specific task for week 1>", "<another week 1 task>"],
|
| 43 |
+
"month_1_plan": ["<month 1 goal>", "<another month 1 goal>"],
|
| 44 |
+
"month_3_plan": ["<3 month goal>", "<another 3 month goal>"],
|
| 45 |
+
"priority_skills": ["<highest priority skill>", "<second priority>", "<third priority>"],
|
| 46 |
+
"learning_resources": {{
|
| 47 |
+
"free_courses": ["<course recommendation>", "<another course>"],
|
| 48 |
+
"paid_courses": ["<premium course>", "<another premium course>"],
|
| 49 |
+
"books": ["<book recommendation>", "<another book>"],
|
| 50 |
+
"practice_platforms": ["<platform>", "<another platform>"]
|
| 51 |
+
}},
|
| 52 |
+
"portfolio_improvements": ["<specific project to build>", "<another project>"],
|
| 53 |
+
"networking_suggestions": ["<networking advice>", "<another networking tip>"],
|
| 54 |
+
"quick_wins": ["<easy improvement>", "<another quick win>"],
|
| 55 |
+
"estimated_timeline": "<realistic timeline to become job-ready>"
|
| 56 |
+
}}
|
| 57 |
+
|
| 58 |
+
Be specific with course names, book titles, and platform recommendations."""
|
| 59 |
+
|
| 60 |
+
SKILLS_ENHANCEMENT_PROMPT = """Analyze the following text and extract ALL technical skills, then categorize and enhance the skills list.
|
| 61 |
+
|
| 62 |
+
TEXT TO ANALYZE:
|
| 63 |
+
{text}
|
| 64 |
+
|
| 65 |
+
Extract and categorize skills comprehensively in JSON format:
|
| 66 |
+
{{
|
| 67 |
+
"programming_languages": ["<language1>", "<language2>"],
|
| 68 |
+
"web_frameworks": ["<framework1>", "<framework2>"],
|
| 69 |
+
"databases": ["<db1>", "<db2>"],
|
| 70 |
+
"cloud_platforms": ["<platform1>", "<platform2>"],
|
| 71 |
+
"devops_tools": ["<tool1>", "<tool2>"],
|
| 72 |
+
"testing_tools": ["<tool1>", "<tool2>"],
|
| 73 |
+
"development_tools": ["<tool1>", "<tool2>"],
|
| 74 |
+
"soft_skills": ["<skill1>", "<skill2>"],
|
| 75 |
+
"methodologies": ["<methodology1>", "<methodology2>"],
|
| 76 |
+
"all_technical_skills": ["<comprehensive list of all technical skills found>"],
|
| 77 |
+
"skill_proficiency_estimate": {{
|
| 78 |
+
"<skill>": "<Beginner/Intermediate/Advanced based on context>",
|
| 79 |
+
"<another_skill>": "<proficiency_level>"
|
| 80 |
+
}}
|
| 81 |
+
}}
|
| 82 |
+
|
| 83 |
+
Be thorough and include variations (e.g., JS and JavaScript, k8s and Kubernetes)."""
|
logs/langsmith_metrics.jsonl
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "timestamp": "2025-09-20T10:28:09.968967", "metrics": {"analysis_success": true, "resume_length": 2238, "jd_length": 3149, "skills_found": 19, "pipeline_status": "completed", "enhanced_scoring": false}, "type": "metrics"}
|
| 2 |
+
{"session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "timestamp": "2025-09-20T10:28:09.971970", "metrics": {"function": "complete_ai_analysis", "latency_ms": 3343.7670000000003, "success": true}, "type": "metrics"}
|
| 3 |
+
{"session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "timestamp": "2025-09-20T10:32:17.648118", "metrics": {"analysis_success": true, "resume_length": 2238, "jd_length": 3149, "skills_found": 19, "pipeline_status": "completed", "enhanced_scoring": false}, "type": "metrics"}
|
| 4 |
+
{"session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "timestamp": "2025-09-20T10:32:17.654122", "metrics": {"function": "complete_ai_analysis", "latency_ms": 1884.951, "success": true}, "type": "metrics"}
|
| 5 |
+
{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:21:59.326340", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
|
| 6 |
+
{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:21:59.332369", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3593.709, "success": true}, "type": "metrics"}
|
| 7 |
+
{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:27:16.659867", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
|
| 8 |
+
{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:27:16.662861", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1167.566, "success": true}, "type": "metrics"}
|
| 9 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T15:38:04.483122", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
|
| 10 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T15:38:04.489226", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3661.242, "success": true}, "type": "metrics"}
|
| 11 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:02:44.109780", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
|
| 12 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:02:44.111775", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 929.667, "success": true}, "type": "metrics"}
|
| 13 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:09:57.021715", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
|
| 14 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:09:57.026900", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 912.856, "success": true}, "type": "metrics"}
|
| 15 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:11:55.042808", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
|
| 16 |
+
{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:11:55.047901", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1488.064, "success": true}, "type": "metrics"}
|
| 17 |
+
{"session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "timestamp": "2025-09-20T16:19:28.468185", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
|
| 18 |
+
{"session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "timestamp": "2025-09-20T16:19:28.473178", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3668.7690000000002, "success": true}, "type": "metrics"}
|
| 19 |
+
{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:22:17.448927", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
|
| 20 |
+
{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:22:17.453922", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3874.583, "success": true}, "type": "metrics"}
|
| 21 |
+
{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:26:09.359080", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
|
| 22 |
+
{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:26:09.366978", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1227.908, "success": true}, "type": "metrics"}
|
| 23 |
+
{"session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "timestamp": "2025-09-20T16:28:36.803003", "metrics": {"api_success": true, "final_score": 33.752, "pipeline_used": true}, "type": "metrics"}
|
| 24 |
+
{"session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "timestamp": "2025-09-20T16:28:36.808437", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3730.636, "success": true}, "type": "metrics"}
|
| 25 |
+
{"session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "timestamp": "2025-09-20T16:32:52.095638", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
|
| 26 |
+
{"session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "timestamp": "2025-09-20T16:32:52.098635", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3076.762, "success": true}, "type": "metrics"}
|
| 27 |
+
{"session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "timestamp": "2025-09-21T01:22:16.389240", "metrics": {"api_success": true, "final_score": 32.5, "pipeline_used": true}, "type": "metrics"}
|
| 28 |
+
{"session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "timestamp": "2025-09-21T01:22:16.394244", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2296.625, "success": true}, "type": "metrics"}
|
| 29 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:24:37.998103", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
|
| 30 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:24:38.001115", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1919.5310000000002, "success": true}, "type": "metrics"}
|
| 31 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:16.385405", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
|
| 32 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:16.388509", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1454.168, "success": true}, "type": "metrics"}
|
| 33 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:51.527938", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 12.616, "success": true}, "type": "metrics"}
|
| 34 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:28:26.866106", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 133.30700000000002, "success": true}, "type": "metrics"}
|
| 35 |
+
{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:29:02.073814", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 19.596, "success": true}, "type": "metrics"}
|
| 36 |
+
{"session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "timestamp": "2025-09-21T01:40:09.312013", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
|
| 37 |
+
{"session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "timestamp": "2025-09-21T01:40:09.314913", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2356.451, "success": true}, "type": "metrics"}
|
| 38 |
+
{"session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "timestamp": "2025-09-21T01:55:07.375404", "metrics": {"api_success": true, "final_score": 32.5, "pipeline_used": true}, "type": "metrics"}
|
| 39 |
+
{"session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "timestamp": "2025-09-21T01:55:07.378410", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2503.252, "success": true}, "type": "metrics"}
|
| 40 |
+
{"session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "timestamp": "2025-09-21T02:19:01.012120", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
|
| 41 |
+
{"session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "timestamp": "2025-09-21T02:19:01.016125", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2274.592, "success": true}, "type": "metrics"}
|
| 42 |
+
{"session_id": "acba97a8-88e4-428b-9390-783700f0235f", "timestamp": "2025-09-21T03:13:19.055138", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
|
| 43 |
+
{"session_id": "acba97a8-88e4-428b-9390-783700f0235f", "timestamp": "2025-09-21T03:13:19.059234", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2120.11, "success": true}, "type": "metrics"}
|
| 44 |
+
{"session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "timestamp": "2025-09-21T03:30:37.071608", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
|
| 45 |
+
{"session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "timestamp": "2025-09-21T03:30:37.075742", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2324.2560000000003, "success": true}, "type": "metrics"}
|
| 46 |
+
{"session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "timestamp": "2025-09-21T03:32:54.777227", "metrics": {"api_success": true, "final_score": 40.0, "pipeline_used": true}, "type": "metrics"}
|
| 47 |
+
{"session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "timestamp": "2025-09-21T03:32:54.786216", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2556.451, "success": true}, "type": "metrics"}
|
| 48 |
+
{"session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "timestamp": "2025-09-21T03:39:27.003374", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
|
| 49 |
+
{"session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "timestamp": "2025-09-21T03:39:27.014265", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 4611.081, "success": true}, "type": "metrics"}
|
| 50 |
+
{"session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "timestamp": "2025-09-21T03:44:56.950150", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
|
| 51 |
+
{"session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "timestamp": "2025-09-21T03:44:56.954924", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3159.484, "success": true}, "type": "metrics"}
|
| 52 |
+
{"session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "timestamp": "2025-09-21T03:51:12.817358", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
|
| 53 |
+
{"session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "timestamp": "2025-09-21T03:51:12.821360", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2815.247, "success": true}, "type": "metrics"}
|
| 54 |
+
{"session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "timestamp": "2025-09-21T03:55:12.022321", "metrics": {"api_success": true, "final_score": 50.0, "pipeline_used": true}, "type": "metrics"}
|
| 55 |
+
{"session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "timestamp": "2025-09-21T03:55:12.025331", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3375.04, "success": true}, "type": "metrics"}
|
| 56 |
+
{"session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "timestamp": "2025-09-21T04:07:46.220611", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
|
| 57 |
+
{"session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "timestamp": "2025-09-21T04:07:46.225181", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 26514.756999999998, "success": true}, "type": "metrics"}
|
logs/langsmith_traces.jsonl
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"trace_id": "a9220664-0b26-4e3f-b34d-14939e97460a", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis", "start_time": "2025-09-20T10:28:06.626205", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 2 |
+
{"trace_id": "9194d1ec-9db4-45ec-a0c4-3ebef618c208", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "project_name": "resume-relevance-system", "trace_name": "complete_resume_analysis", "start_time": "2025-09-20T10:28:06.628203", "inputs": {"resume_file": "input/sample_resume.pdf", "jd_file": "input/sample_jd.pdf"}, "status": "started", "type": "trace_start"}
|
| 3 |
+
{"trace_id": "9194d1ec-9db4-45ec-a0c4-3ebef618c208", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "end_time": "2025-09-20T10:28:09.969969", "outputs": {"pipeline_status": "completed", "final_score": 5}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 4 |
+
{"trace_id": "a9220664-0b26-4e3f-b34d-14939e97460a", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "end_time": "2025-09-20T10:28:09.971970", "outputs": {"result_type": "NoneType"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 5 |
+
{"trace_id": "78931f3c-8c2f-4cf0-88a3-75ffd391e133", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis", "start_time": "2025-09-20T10:32:15.765176", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 6 |
+
{"trace_id": "7b53b614-d774-41fc-86fe-155ad7326413", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "project_name": "resume-relevance-system", "trace_name": "complete_resume_analysis", "start_time": "2025-09-20T10:32:15.766169", "inputs": {"resume_file": "input/sample_resume.pdf", "jd_file": "input/sample_jd.pdf"}, "status": "started", "type": "trace_start"}
|
| 7 |
+
{"trace_id": "7b53b614-d774-41fc-86fe-155ad7326413", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "end_time": "2025-09-20T10:32:17.649124", "outputs": {"pipeline_status": "completed", "final_score": 5}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 8 |
+
{"trace_id": "78931f3c-8c2f-4cf0-88a3-75ffd391e133", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "end_time": "2025-09-20T10:32:17.651120", "outputs": {"result_type": "NoneType"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 9 |
+
{"trace_id": "49f5c4ab-d238-4faa-a175-0adf0f3920df", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T11:21:55.733567", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 10 |
+
{"trace_id": "5393826b-554b-421b-8790-1218ad6016dc", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T11:21:55.736654", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmplvkk7m14.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6rbiooty.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 11 |
+
{"trace_id": "5393826b-554b-421b-8790-1218ad6016dc", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:21:59.323345", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 12 |
+
{"trace_id": "49f5c4ab-d238-4faa-a175-0adf0f3920df", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:21:59.330363", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 13 |
+
{"trace_id": "e302ca41-6236-4f05-97cd-d17a0ad75b37", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T11:27:15.492297", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 14 |
+
{"trace_id": "378fa23b-0d68-42ab-b430-bda462fe4e14", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T11:27:15.493296", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp0424aaqj.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpnalp728z.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 15 |
+
{"trace_id": "378fa23b-0d68-42ab-b430-bda462fe4e14", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:27:16.654803", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 16 |
+
{"trace_id": "e302ca41-6236-4f05-97cd-d17a0ad75b37", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:27:16.660862", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 17 |
+
{"trace_id": "a25e723f-9809-42da-b369-8d60667993bf", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T15:38:00.822976", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 18 |
+
{"trace_id": "1634d9e3-2e84-40a8-9873-3738f7facb35", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T15:38:00.825975", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6ikrxzk6.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpjkujpprp.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 19 |
+
{"trace_id": "1634d9e3-2e84-40a8-9873-3738f7facb35", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T15:38:04.478225", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 20 |
+
{"trace_id": "a25e723f-9809-42da-b369-8d60667993bf", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T15:38:04.487217", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 21 |
+
{"trace_id": "09528e66-9a21-4ea6-8297-deb8cf96bcc4", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:02:43.180112", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 22 |
+
{"trace_id": "31c100c7-ea89-4df2-85cf-bcc2b1cc1397", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:02:43.181109", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp02rmxegc.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpoy2ydj6o.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 23 |
+
{"trace_id": "31c100c7-ea89-4df2-85cf-bcc2b1cc1397", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:02:44.108699", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 24 |
+
{"trace_id": "09528e66-9a21-4ea6-8297-deb8cf96bcc4", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:02:44.110776", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 25 |
+
{"trace_id": "6a185789-6d47-44e4-8cca-e0dedd2b22d7", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:09:56.110011", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 26 |
+
{"trace_id": "7e1ab0e9-03b8-4099-9dd9-5d92f68f4798", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:09:56.113023", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp8d87hwhf.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpa77tf5fb.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 27 |
+
{"trace_id": "7e1ab0e9-03b8-4099-9dd9-5d92f68f4798", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:09:57.017743", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 28 |
+
{"trace_id": "6a185789-6d47-44e4-8cca-e0dedd2b22d7", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:09:57.024881", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 29 |
+
{"trace_id": "6f031d08-1458-4330-84c1-550cdf7d2cc0", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:11:53.555834", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 30 |
+
{"trace_id": "d0a35be8-5789-4346-94d1-9e8dd8b48b6e", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:11:53.557800", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpyzkfzvg1.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6qku_qze.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 31 |
+
{"trace_id": "d0a35be8-5789-4346-94d1-9e8dd8b48b6e", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:11:55.040910", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 32 |
+
{"trace_id": "6f031d08-1458-4330-84c1-550cdf7d2cc0", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:11:55.045864", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 33 |
+
{"trace_id": "d0ea9e90-42ba-4e34-a8e4-fa21eaf1b931", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:19:24.799410", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 34 |
+
{"trace_id": "870c9f17-e25a-4f3b-abf1-5e85a949b313", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:19:24.801413", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpytdykoki.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpmmugnx60.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 35 |
+
{"trace_id": "870c9f17-e25a-4f3b-abf1-5e85a949b313", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "end_time": "2025-09-20T16:19:28.465273", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 36 |
+
{"trace_id": "d0ea9e90-42ba-4e34-a8e4-fa21eaf1b931", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "end_time": "2025-09-20T16:19:28.470182", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 37 |
+
{"trace_id": "10094869-bf87-4b42-adef-76bba3e94cfd", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:22:13.573970", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 38 |
+
{"trace_id": "02739c4d-646b-4a07-aba7-e67815a8aa98", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:22:13.577347", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpztf2hnqs.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp8znkrcrb.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 39 |
+
{"trace_id": "02739c4d-646b-4a07-aba7-e67815a8aa98", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:22:17.445918", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 40 |
+
{"trace_id": "10094869-bf87-4b42-adef-76bba3e94cfd", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:22:17.451930", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 41 |
+
{"trace_id": "b4b1e1c1-ce7b-4f41-a78a-6efa65f3df7b", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:26:08.130125", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 42 |
+
{"trace_id": "e0fba981-e2b7-4892-a9a0-155cf402bcad", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:26:08.133123", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpvtdar3no.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmptw1nuu6d.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 43 |
+
{"trace_id": "e0fba981-e2b7-4892-a9a0-155cf402bcad", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:26:09.355068", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 44 |
+
{"trace_id": "b4b1e1c1-ce7b-4f41-a78a-6efa65f3df7b", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:26:09.361031", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 45 |
+
{"trace_id": "c4c0578d-0611-4b90-b757-578779274efe", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:28:33.069844", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 46 |
+
{"trace_id": "65c0aa4f-4494-43a4-8934-70169b717582", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:28:33.074357", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp89dd34s7.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp116yeyi3.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 47 |
+
{"trace_id": "65c0aa4f-4494-43a4-8934-70169b717582", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "end_time": "2025-09-20T16:28:36.800094", "outputs": {"final_score": 33.752, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 48 |
+
{"trace_id": "c4c0578d-0611-4b90-b757-578779274efe", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "end_time": "2025-09-20T16:28:36.804993", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 49 |
+
{"trace_id": "33f9fce0-9c3e-492a-9524-51c34a5db94d", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:32:49.017877", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 50 |
+
{"trace_id": "6e392f12-a809-4478-af07-9dbf8c47e537", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:32:49.019874", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpioc13nbs.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpapjq22q7.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 51 |
+
{"trace_id": "6e392f12-a809-4478-af07-9dbf8c47e537", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "end_time": "2025-09-20T16:32:52.093645", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 52 |
+
{"trace_id": "33f9fce0-9c3e-492a-9524-51c34a5db94d", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "end_time": "2025-09-20T16:32:52.096636", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 53 |
+
{"trace_id": "d823ef9f-2df5-4806-9afa-4fec03cc59de", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:22:14.093539", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 54 |
+
{"trace_id": "61dcda6f-7572-4e56-b810-9aa8c09db991", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:22:14.094535", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpk34qnxoz.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpinedpti6.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 55 |
+
{"trace_id": "61dcda6f-7572-4e56-b810-9aa8c09db991", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "end_time": "2025-09-21T01:22:16.388242", "outputs": {"final_score": 32.5, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 56 |
+
{"trace_id": "d823ef9f-2df5-4806-9afa-4fec03cc59de", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "end_time": "2025-09-21T01:22:16.391160", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 57 |
+
{"trace_id": "24695c1d-aad7-4e83-b253-a52949c1ff1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:24:36.076482", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 58 |
+
{"trace_id": "8142abf7-8623-4bd8-a0ec-b035eeea49de", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:24:36.080584", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpmq0nsda_.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpfy6mq83d.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 59 |
+
{"trace_id": "8142abf7-8623-4bd8-a0ec-b035eeea49de", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:24:37.994412", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 60 |
+
{"trace_id": "24695c1d-aad7-4e83-b253-a52949c1ff1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:24:38.000115", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 61 |
+
{"trace_id": "4bfd280e-c0f4-4e29-91ca-1cd28ce98c0e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:27:14.928024", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 62 |
+
{"trace_id": "f5e0624d-5361-4d2e-a2c2-70a37f6d7859", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:27:14.932244", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpd8i2w90l.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpkbs8mbvf.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 63 |
+
{"trace_id": "f5e0624d-5361-4d2e-a2c2-70a37f6d7859", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:16.383406", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 64 |
+
{"trace_id": "4bfd280e-c0f4-4e29-91ca-1cd28ce98c0e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:16.386412", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 65 |
+
{"trace_id": "936e4d74-a342-40d2-a9ec-ed7d697bdb91", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:27:51.511294", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 66 |
+
{"trace_id": "e2c67c0c-fb9d-428e-9b21-44d61304fd1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:27:51.513311", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp16rci5h5.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp7dwr_wtp.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 67 |
+
{"trace_id": "e2c67c0c-fb9d-428e-9b21-44d61304fd1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:51.522926", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmp16rci5h5.pdf'.", "token_usage": {}, "type": "trace_end"}
|
| 68 |
+
{"trace_id": "936e4d74-a342-40d2-a9ec-ed7d697bdb91", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:51.524923", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 69 |
+
{"trace_id": "0e5ac59b-bedd-4fc8-b46d-f8acb4a418ee", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:28:26.729537", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 70 |
+
{"trace_id": "5a35f592-d45c-4576-b550-14ee205df0da", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:28:26.730536", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp59t8_l1r.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp2_n11hm5.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 71 |
+
{"trace_id": "5a35f592-d45c-4576-b550-14ee205df0da", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:28:26.861845", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmp2_n11hm5.pdf'.", "token_usage": {}, "type": "trace_end"}
|
| 72 |
+
{"trace_id": "0e5ac59b-bedd-4fc8-b46d-f8acb4a418ee", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:28:26.863843", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 73 |
+
{"trace_id": "2033f188-874b-4779-93ed-cd939e13dc02", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:29:02.050099", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 74 |
+
{"trace_id": "765dc4e3-c115-4468-a2a1-928b7a708f0a", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:29:02.053217", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpelpe_kq9.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpd1tg607v.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 75 |
+
{"trace_id": "765dc4e3-c115-4468-a2a1-928b7a708f0a", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:29:02.070814", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmpelpe_kq9.pdf'.", "token_usage": {}, "type": "trace_end"}
|
| 76 |
+
{"trace_id": "2033f188-874b-4779-93ed-cd939e13dc02", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:29:02.072813", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 77 |
+
{"trace_id": "05aafbe3-91d6-4567-88f7-971d4bf1cfa3", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:40:06.952453", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 78 |
+
{"trace_id": "fa4cd081-5a89-4311-8d6b-a8bee0f4f95b", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:40:06.957463", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpfpgea8mx.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp0mp2_3rx.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 79 |
+
{"trace_id": "fa4cd081-5a89-4311-8d6b-a8bee0f4f95b", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "end_time": "2025-09-21T01:40:09.302898", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 80 |
+
{"trace_id": "05aafbe3-91d6-4567-88f7-971d4bf1cfa3", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "end_time": "2025-09-21T01:40:09.313914", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 81 |
+
{"trace_id": "4e7b4fc5-3c0a-42dc-99e4-d7a895db19a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:55:04.866221", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 82 |
+
{"trace_id": "eba0b151-f45f-4497-a18c-97968ba558a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:55:04.874152", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpilnra4ly.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpifbcxjwl.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 83 |
+
{"trace_id": "eba0b151-f45f-4497-a18c-97968ba558a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "end_time": "2025-09-21T01:55:07.366397", "outputs": {"final_score": 32.5, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 84 |
+
{"trace_id": "4e7b4fc5-3c0a-42dc-99e4-d7a895db19a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "end_time": "2025-09-21T01:55:07.377404", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 85 |
+
{"trace_id": "f0c61041-ed71-4df3-b1ba-272bb3a1a1ca", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T02:18:58.734780", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 86 |
+
{"trace_id": "7bbeada3-6822-421f-a326-bfa56bb0a2cd", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T02:18:58.739429", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpw_kcul8t.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpllsrq7v8.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 87 |
+
{"trace_id": "7bbeada3-6822-421f-a326-bfa56bb0a2cd", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "end_time": "2025-09-21T02:19:01.005118", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 88 |
+
{"trace_id": "f0c61041-ed71-4df3-b1ba-272bb3a1a1ca", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "end_time": "2025-09-21T02:19:01.014021", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 89 |
+
{"trace_id": "334643be-5298-4b6c-8567-347271832f6a", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:13:16.934134", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 90 |
+
{"trace_id": "4819d93c-4a91-4d3d-90d9-277d7be3381d", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:13:16.937126", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpxhu7fkn7.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp9mtoh73r.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 91 |
+
{"trace_id": "4819d93c-4a91-4d3d-90d9-277d7be3381d", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "end_time": "2025-09-21T03:13:19.052137", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 92 |
+
{"trace_id": "334643be-5298-4b6c-8567-347271832f6a", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "end_time": "2025-09-21T03:13:19.057236", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 93 |
+
{"trace_id": "d0d18179-5ddb-493b-8c62-22229f06484e", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:30:34.747377", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 94 |
+
{"trace_id": "4c0b2ff9-aced-442e-bba1-bd1e00c18cab", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:30:34.749366", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6mve9qq5.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpv5vjd46t.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 95 |
+
{"trace_id": "4c0b2ff9-aced-442e-bba1-bd1e00c18cab", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "end_time": "2025-09-21T03:30:37.066611", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 96 |
+
{"trace_id": "d0d18179-5ddb-493b-8c62-22229f06484e", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "end_time": "2025-09-21T03:30:37.073622", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 97 |
+
{"trace_id": "270ffb7c-a36a-4799-92cc-74ba938e58c5", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:32:52.223767", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 98 |
+
{"trace_id": "250619ff-9a1a-4e1d-ab99-528591c4dd07", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:32:52.226771", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpw4_p93qm.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpziz23bn2.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 99 |
+
{"trace_id": "250619ff-9a1a-4e1d-ab99-528591c4dd07", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "end_time": "2025-09-21T03:32:54.770212", "outputs": {"final_score": 40.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 100 |
+
{"trace_id": "270ffb7c-a36a-4799-92cc-74ba938e58c5", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "end_time": "2025-09-21T03:32:54.783222", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 101 |
+
{"trace_id": "c1c18675-903d-4d06-9b6d-d162dc6697aa", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:39:22.397308", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 102 |
+
{"trace_id": "aadff1c2-9852-4726-960d-af92c369fd8b", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:39:22.400313", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpp_yxirpc.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp36aqmv7r.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 103 |
+
{"trace_id": "aadff1c2-9852-4726-960d-af92c369fd8b", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "end_time": "2025-09-21T03:39:26.991284", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 104 |
+
{"trace_id": "c1c18675-903d-4d06-9b6d-d162dc6697aa", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "end_time": "2025-09-21T03:39:27.010387", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 105 |
+
{"trace_id": "df6e7046-48cd-4b29-ac41-173d3a4fb5f9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:44:53.791298", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 106 |
+
{"trace_id": "03a87671-e3a6-40fa-a2f6-dbdad8e025c9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:44:53.793766", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp3esxeq0t.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpteop9bro.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 107 |
+
{"trace_id": "03a87671-e3a6-40fa-a2f6-dbdad8e025c9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "end_time": "2025-09-21T03:44:56.947132", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 108 |
+
{"trace_id": "df6e7046-48cd-4b29-ac41-173d3a4fb5f9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "end_time": "2025-09-21T03:44:56.953250", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 109 |
+
{"trace_id": "02d153d2-eb16-4784-abb5-42ab4509f10a", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:51:10.001559", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 110 |
+
{"trace_id": "99341c01-5bb4-47fe-80e9-fe8dd1836ee9", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:51:10.004099", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpynmwjkur.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpb85kyh_i.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 111 |
+
{"trace_id": "99341c01-5bb4-47fe-80e9-fe8dd1836ee9", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "end_time": "2025-09-21T03:51:12.815359", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 112 |
+
{"trace_id": "02d153d2-eb16-4784-abb5-42ab4509f10a", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "end_time": "2025-09-21T03:51:12.819346", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 113 |
+
{"trace_id": "334c866e-795d-4eb9-90dc-2f66b7128d5f", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:55:08.646302", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 114 |
+
{"trace_id": "d86ec10f-f015-46b1-bf1b-f72b1a4613b3", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:55:08.648291", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp1cfpxuyy.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6s3sg7jw.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 115 |
+
{"trace_id": "d86ec10f-f015-46b1-bf1b-f72b1a4613b3", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "end_time": "2025-09-21T03:55:12.019303", "outputs": {"final_score": 50.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 116 |
+
{"trace_id": "334c866e-795d-4eb9-90dc-2f66b7128d5f", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "end_time": "2025-09-21T03:55:12.023331", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 117 |
+
{"trace_id": "8b172285-0bdc-468f-9524-c6fbff86ab8e", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T04:07:19.705410", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
|
| 118 |
+
{"trace_id": "d728dc29-eda1-42d5-ab33-aa4e99157fe1", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T04:07:19.708408", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpjg086yms.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp_u57r0c7.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
|
| 119 |
+
{"trace_id": "d728dc29-eda1-42d5-ab33-aa4e99157fe1", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "end_time": "2025-09-21T04:07:46.216611", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
| 120 |
+
{"trace_id": "8b172285-0bdc-468f-9524-c6fbff86ab8e", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "end_time": "2025-09-21T04:07:46.223165", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
|
matchers/__init__.py
ADDED
|
File without changes
|
matchers/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (150 Bytes). View file
|
|
|
matchers/__pycache__/final_scorer.cpython-312.pyc
ADDED
|
Binary file (2.79 kB). View file
|
|
|
matchers/__pycache__/hard_matcher.cpython-312.pyc
ADDED
|
Binary file (2 kB). View file
|
|
|
matchers/__pycache__/semantic_matcher.cpython-312.pyc
ADDED
|
Binary file (2.21 kB). View file
|
|
|
matchers/entity_extractor.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/entity_extractor.py - SPACY ENTITY EXTRACTION
|
| 2 |
+
import spacy
|
| 3 |
+
from collections import Counter
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
class EntityExtractor:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
try:
|
| 9 |
+
print("🧠 Loading spaCy model...")
|
| 10 |
+
self.nlp = spacy.load("en_core_web_sm")
|
| 11 |
+
print("✅ spaCy model loaded successfully")
|
| 12 |
+
except OSError:
|
| 13 |
+
print("⚠️ spaCy model not found. Run: python -m spacy download en_core_web_sm")
|
| 14 |
+
self.nlp = None
|
| 15 |
+
|
| 16 |
+
def extract_skills_with_nlp(self, text):
|
| 17 |
+
"""Extract skills using spaCy NLP"""
|
| 18 |
+
if not self.nlp:
|
| 19 |
+
return self._fallback_extraction(text)
|
| 20 |
+
|
| 21 |
+
print("🔍 Extracting entities with spaCy...")
|
| 22 |
+
|
| 23 |
+
doc = self.nlp(text)
|
| 24 |
+
|
| 25 |
+
# Extract entities
|
| 26 |
+
entities = {
|
| 27 |
+
"persons": [],
|
| 28 |
+
"organizations": [],
|
| 29 |
+
"technologies": [],
|
| 30 |
+
"skills": [],
|
| 31 |
+
"locations": []
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
for ent in doc.ents:
|
| 35 |
+
if ent.label_ == "PERSON":
|
| 36 |
+
entities["persons"].append(ent.text)
|
| 37 |
+
elif ent.label_ == "ORG":
|
| 38 |
+
entities["organizations"].append(ent.text)
|
| 39 |
+
elif ent.label_ == "GPE": # Geopolitical entity (locations)
|
| 40 |
+
entities["locations"].append(ent.text)
|
| 41 |
+
|
| 42 |
+
# Extract noun phrases as potential skills
|
| 43 |
+
noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks
|
| 44 |
+
if len(chunk.text.split()) <= 3] # Max 3 words
|
| 45 |
+
|
| 46 |
+
# Filter technical terms
|
| 47 |
+
tech_patterns = [
|
| 48 |
+
r'\b\w+\.js\b', r'\b\w+script\b', r'\b\w+SQL\b',
|
| 49 |
+
r'\bAPI\b', r'\bSDK\b', r'\bIDE\b', r'\bOS\b'
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
tech_terms = []
|
| 53 |
+
for pattern in tech_patterns:
|
| 54 |
+
tech_terms.extend(re.findall(pattern, text, re.IGNORECASE))
|
| 55 |
+
|
| 56 |
+
entities["technologies"] = list(set(tech_terms))
|
| 57 |
+
entities["skills"] = list(set(noun_phrases))
|
| 58 |
+
|
| 59 |
+
return entities
|
| 60 |
+
|
| 61 |
+
def extract_experience_years(self, text):
|
| 62 |
+
"""Extract years of experience using NLP"""
|
| 63 |
+
if not self.nlp:
|
| 64 |
+
return self._extract_years_regex(text)
|
| 65 |
+
|
| 66 |
+
doc = self.nlp(text)
|
| 67 |
+
|
| 68 |
+
experience_patterns = [
|
| 69 |
+
r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
|
| 70 |
+
r'(\d+)\+?\s*years?\s*in',
|
| 71 |
+
r'experience.*?(\d+)\+?\s*years?',
|
| 72 |
+
r'(\d+)\+?\s*year.*?experience'
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
years = []
|
| 76 |
+
for pattern in experience_patterns:
|
| 77 |
+
matches = re.findall(pattern, text.lower())
|
| 78 |
+
years.extend([int(match) for match in matches if match.isdigit()])
|
| 79 |
+
|
| 80 |
+
return max(years) if years else 0
|
| 81 |
+
|
| 82 |
+
def extract_education_info(self, text):
|
| 83 |
+
"""Extract education information"""
|
| 84 |
+
degrees = [
|
| 85 |
+
"bachelor", "master", "phd", "doctorate", "diploma",
|
| 86 |
+
"b.tech", "m.tech", "bca", "mca", "bsc", "msc"
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
+
fields = [
|
| 90 |
+
"computer science", "engineering", "information technology",
|
| 91 |
+
"software engineering", "data science", "mathematics"
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
found_degrees = []
|
| 95 |
+
found_fields = []
|
| 96 |
+
|
| 97 |
+
text_lower = text.lower()
|
| 98 |
+
|
| 99 |
+
for degree in degrees:
|
| 100 |
+
if degree in text_lower:
|
| 101 |
+
found_degrees.append(degree)
|
| 102 |
+
|
| 103 |
+
for field in fields:
|
| 104 |
+
if field in text_lower:
|
| 105 |
+
found_fields.append(field)
|
| 106 |
+
|
| 107 |
+
return {
|
| 108 |
+
"degrees": list(set(found_degrees)),
|
| 109 |
+
"fields": list(set(found_fields))
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
def _fallback_extraction(self, text):
|
| 113 |
+
"""Fallback extraction without spaCy"""
|
| 114 |
+
print("⚠️ Using fallback extraction (spaCy not available)")
|
| 115 |
+
|
| 116 |
+
# Simple regex-based extraction
|
| 117 |
+
entities = {
|
| 118 |
+
"persons": [],
|
| 119 |
+
"organizations": [],
|
| 120 |
+
"technologies": [],
|
| 121 |
+
"skills": [],
|
| 122 |
+
"locations": []
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
# Extract email domains as organizations
|
| 126 |
+
email_domains = re.findall(r'@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
|
| 127 |
+
entities["organizations"] = [domain.split('.')[0] for domain in email_domains]
|
| 128 |
+
|
| 129 |
+
return entities
|
| 130 |
+
|
| 131 |
+
def _extract_years_regex(self, text):
|
| 132 |
+
"""Regex fallback for experience extraction"""
|
| 133 |
+
pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:experience|exp)'
|
| 134 |
+
matches = re.findall(pattern, text.lower())
|
| 135 |
+
years = [int(match) for match in matches if match.isdigit()]
|
| 136 |
+
return max(years) if years else 0
|
| 137 |
+
|
| 138 |
+
# Test function
|
| 139 |
+
def test_entity_extractor():
|
| 140 |
+
"""Test entity extraction functionality"""
|
| 141 |
+
extractor = EntityExtractor()
|
| 142 |
+
|
| 143 |
+
sample_text = """
|
| 144 |
+
John Smith is a Python developer with 3+ years of experience at Google.
|
| 145 |
+
He has worked with React.js, Node.js, and AWS in San Francisco.
|
| 146 |
+
Bachelor's degree in Computer Science.
|
| 147 |
+
"""
|
| 148 |
+
|
| 149 |
+
entities = extractor.extract_skills_with_nlp(sample_text)
|
| 150 |
+
years = extractor.extract_experience_years(sample_text)
|
| 151 |
+
education = extractor.extract_education_info(sample_text)
|
| 152 |
+
|
| 153 |
+
print(f"✅ Entities extracted: {len(entities['skills'])} skills found")
|
| 154 |
+
print(f"✅ Experience: {years} years")
|
| 155 |
+
print(f"✅ Education: {education}")
|
| 156 |
+
|
| 157 |
+
return len(entities['skills']) > 0
|
| 158 |
+
|
| 159 |
+
if __name__ == "__main__":
|
| 160 |
+
test_entity_extractor()
|
matchers/final_scorer.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# matchers/final_scorer.py
|
| 2 |
+
from matchers.hard_matcher import calculate_hard_match_score, calculate_fuzzy_match
|
| 3 |
+
from matchers.semantic_matcher import SemanticMatcher
|
| 4 |
+
|
| 5 |
+
class ResumeScorer:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.semantic_matcher = SemanticMatcher()
|
| 8 |
+
|
| 9 |
+
def calculate_final_score(self, resume_data, jd_data):
|
| 10 |
+
"""Calculate weighted final score combining all factors"""
|
| 11 |
+
|
| 12 |
+
# Step 1: Hard Match (Keywords)
|
| 13 |
+
hard_match = calculate_hard_match_score(
|
| 14 |
+
resume_data["skills"],
|
| 15 |
+
jd_data["skills"]
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# Step 2: Semantic Match (AI Embeddings)
|
| 19 |
+
semantic_match = self.semantic_matcher.calculate_semantic_score(
|
| 20 |
+
resume_data["raw_text"],
|
| 21 |
+
jd_data["raw_text"]
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Step 3: Fuzzy Match
|
| 25 |
+
fuzzy_skills = calculate_fuzzy_match(
|
| 26 |
+
resume_data["raw_text"],
|
| 27 |
+
jd_data["skills"]
|
| 28 |
+
)
|
| 29 |
+
fuzzy_bonus = len(fuzzy_skills) * 2 # 2 points per fuzzy match
|
| 30 |
+
|
| 31 |
+
# Weighted scoring formula
|
| 32 |
+
final_score = (
|
| 33 |
+
0.4 * hard_match["score"] + # 40% keyword match
|
| 34 |
+
0.5 * semantic_match["score"] + # 50% semantic similarity
|
| 35 |
+
0.1 * min(fuzzy_bonus, 20) # 10% fuzzy bonus (max 20)
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Generate verdict
|
| 39 |
+
verdict = self.get_verdict(final_score)
|
| 40 |
+
|
| 41 |
+
return {
|
| 42 |
+
"final_score": round(final_score, 2),
|
| 43 |
+
"verdict": verdict,
|
| 44 |
+
"breakdown": {
|
| 45 |
+
"hard_match": hard_match,
|
| 46 |
+
"semantic_match": semantic_match,
|
| 47 |
+
"fuzzy_matches": fuzzy_skills
|
| 48 |
+
},
|
| 49 |
+
"suggestions": self.generate_suggestions(hard_match["missing_skills"])
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
def get_verdict(self, score):
|
| 53 |
+
"""Convert score to verdict categories"""
|
| 54 |
+
if score >= 80:
|
| 55 |
+
return "High Suitability"
|
| 56 |
+
elif score >= 60:
|
| 57 |
+
return "Medium Suitability"
|
| 58 |
+
else:
|
| 59 |
+
return "Low Suitability"
|
| 60 |
+
|
| 61 |
+
def generate_suggestions(self, missing_skills):
|
| 62 |
+
"""Generate improvement suggestions"""
|
| 63 |
+
if not missing_skills:
|
| 64 |
+
return "Great match! No major skills missing."
|
| 65 |
+
|
| 66 |
+
suggestions = []
|
| 67 |
+
if len(missing_skills) <= 3:
|
| 68 |
+
suggestions.append(f"Consider adding skills: {', '.join(missing_skills[:3])}")
|
| 69 |
+
else:
|
| 70 |
+
suggestions.append(f"Focus on key skills: {', '.join(missing_skills[:3])}")
|
| 71 |
+
suggestions.append("Consider relevant projects or certifications")
|
| 72 |
+
|
| 73 |
+
return suggestions
|
matchers/fuzzy_matcher.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# matchers/fuzzy_matcher.py - FUZZY SKILL MATCHING
|
| 2 |
+
from rapidfuzz import fuzz, process
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
|
| 5 |
+
class FuzzyMatcher:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.skill_variations = {
|
| 8 |
+
'javascript': ['js', 'javascript', 'ecmascript', 'node.js', 'nodejs'],
|
| 9 |
+
'python': ['python', 'py', 'python3'],
|
| 10 |
+
'typescript': ['typescript', 'ts'],
|
| 11 |
+
'kubernetes': ['kubernetes', 'k8s', 'kube'],
|
| 12 |
+
'postgresql': ['postgresql', 'postgres', 'psql'],
|
| 13 |
+
'ci/cd': ['ci/cd', 'cicd', 'continuous integration', 'continuous deployment'],
|
| 14 |
+
'docker': ['docker', 'containerization', 'containers'],
|
| 15 |
+
'aws': ['aws', 'amazon web services', 'amazon cloud'],
|
| 16 |
+
'react': ['react', 'reactjs', 'react.js'],
|
| 17 |
+
'angular': ['angular', 'angularjs', 'angular.js']
|
| 18 |
+
}
|
| 19 |
+
print("✅ Fuzzy matcher initialized with skill variations")
|
| 20 |
+
|
| 21 |
+
def fuzzy_skill_match(self, resume_skills, jd_skills, threshold=80):
|
| 22 |
+
"""Find fuzzy matches between resume and JD skills"""
|
| 23 |
+
print("🔍 Running fuzzy skill matching...")
|
| 24 |
+
|
| 25 |
+
fuzzy_matches = []
|
| 26 |
+
matched_pairs = []
|
| 27 |
+
|
| 28 |
+
for jd_skill in jd_skills:
|
| 29 |
+
best_match = None
|
| 30 |
+
best_score = 0
|
| 31 |
+
|
| 32 |
+
for resume_skill in resume_skills:
|
| 33 |
+
# Direct fuzzy match
|
| 34 |
+
score = fuzz.ratio(jd_skill.lower(), resume_skill.lower())
|
| 35 |
+
|
| 36 |
+
if score > threshold and score > best_score:
|
| 37 |
+
best_match = resume_skill
|
| 38 |
+
best_score = score
|
| 39 |
+
|
| 40 |
+
# Check skill variations
|
| 41 |
+
if not best_match:
|
| 42 |
+
best_match, best_score = self._check_skill_variations(jd_skill, resume_skills)
|
| 43 |
+
|
| 44 |
+
if best_match and best_score > threshold:
|
| 45 |
+
fuzzy_matches.append(jd_skill)
|
| 46 |
+
matched_pairs.append({
|
| 47 |
+
"jd_skill": jd_skill,
|
| 48 |
+
"resume_skill": best_match,
|
| 49 |
+
"confidence": round(best_score, 1)
|
| 50 |
+
})
|
| 51 |
+
|
| 52 |
+
return {
|
| 53 |
+
"fuzzy_matched_skills": fuzzy_matches,
|
| 54 |
+
"match_details": matched_pairs,
|
| 55 |
+
"fuzzy_score": len(fuzzy_matches)
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
def _check_skill_variations(self, jd_skill, resume_skills):
|
| 59 |
+
"""Check if skill matches any known variations"""
|
| 60 |
+
jd_lower = jd_skill.lower()
|
| 61 |
+
|
| 62 |
+
# Check if JD skill is in our variations
|
| 63 |
+
for main_skill, variations in self.skill_variations.items():
|
| 64 |
+
if jd_lower in variations:
|
| 65 |
+
# Look for other variations in resume
|
| 66 |
+
for resume_skill in resume_skills:
|
| 67 |
+
if resume_skill.lower() in variations:
|
| 68 |
+
return resume_skill, 95 # High confidence for variation match
|
| 69 |
+
|
| 70 |
+
# Check reverse - if resume skill has variations
|
| 71 |
+
for resume_skill in resume_skills:
|
| 72 |
+
resume_lower = resume_skill.lower()
|
| 73 |
+
for main_skill, variations in self.skill_variations.items():
|
| 74 |
+
if resume_lower in variations and jd_lower in variations:
|
| 75 |
+
return resume_skill, 90
|
| 76 |
+
|
| 77 |
+
return None, 0
|
| 78 |
+
|
| 79 |
+
def suggest_skill_improvements(self, missing_skills):
|
| 80 |
+
"""Suggest skill variations that might be easier to learn"""
|
| 81 |
+
suggestions = []
|
| 82 |
+
|
| 83 |
+
for skill in missing_skills[:5]: # Top 5 missing skills
|
| 84 |
+
skill_lower = skill.lower()
|
| 85 |
+
|
| 86 |
+
# Find related skills or easier alternatives
|
| 87 |
+
for main_skill, variations in self.skill_variations.items():
|
| 88 |
+
if skill_lower in variations:
|
| 89 |
+
other_variations = [v for v in variations if v != skill_lower]
|
| 90 |
+
if other_variations:
|
| 91 |
+
suggestions.append({
|
| 92 |
+
"missing_skill": skill,
|
| 93 |
+
"alternatives": other_variations[:3],
|
| 94 |
+
"suggestion": f"Consider learning {other_variations[0]} as an alternative to {skill}"
|
| 95 |
+
})
|
| 96 |
+
break
|
| 97 |
+
|
| 98 |
+
return suggestions
|
| 99 |
+
|
| 100 |
+
# Test function
|
| 101 |
+
def test_fuzzy_matcher():
|
| 102 |
+
"""Test fuzzy matching functionality"""
|
| 103 |
+
matcher = FuzzyMatcher()
|
| 104 |
+
|
| 105 |
+
resume_skills = ["javascript", "python", "react", "nodejs", "aws"]
|
| 106 |
+
jd_skills = ["js", "python3", "reactjs", "node.js", "amazon web services", "docker"]
|
| 107 |
+
|
| 108 |
+
result = matcher.fuzzy_skill_match(resume_skills, jd_skills)
|
| 109 |
+
print(f"✅ Fuzzy matches found: {len(result['fuzzy_matched_skills'])}")
|
| 110 |
+
|
| 111 |
+
for match in result['match_details']:
|
| 112 |
+
print(f" {match['jd_skill']} ↔ {match['resume_skill']} ({match['confidence']}%)")
|
| 113 |
+
|
| 114 |
+
return len(result['fuzzy_matched_skills']) > 0
|
| 115 |
+
|
| 116 |
+
if __name__ == "__main__":
|
| 117 |
+
test_fuzzy_matcher()
|
matchers/hard_matcher.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# matchers/hard_matcher.py
|
| 2 |
+
def calculate_hard_match_score(resume_skills, jd_skills):
|
| 3 |
+
"""Calculate percentage match based on keyword overlap"""
|
| 4 |
+
if not jd_skills: # avoid division by zero
|
| 5 |
+
return 0.0
|
| 6 |
+
|
| 7 |
+
matched_skills = set(resume_skills) & set(jd_skills)
|
| 8 |
+
total_jd_skills = len(set(jd_skills))
|
| 9 |
+
|
| 10 |
+
coverage_percentage = len(matched_skills) / total_jd_skills * 100
|
| 11 |
+
|
| 12 |
+
return {
|
| 13 |
+
"score": round(coverage_percentage, 2),
|
| 14 |
+
"matched_count": len(matched_skills),
|
| 15 |
+
"total_jd_skills": total_jd_skills,
|
| 16 |
+
"matched_skills": list(matched_skills),
|
| 17 |
+
"missing_skills": list(set(jd_skills) - set(resume_skills))
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
def calculate_fuzzy_match(resume_text, jd_skills):
|
| 21 |
+
"""Fuzzy matching for skill variations (JavaScript vs JS)"""
|
| 22 |
+
# Install: pip install rapidfuzz
|
| 23 |
+
from rapidfuzz import fuzz
|
| 24 |
+
|
| 25 |
+
resume_lower = resume_text.lower()
|
| 26 |
+
fuzzy_matches = []
|
| 27 |
+
|
| 28 |
+
for skill in jd_skills:
|
| 29 |
+
# Check if skill or common variations exist
|
| 30 |
+
variations = get_skill_variations(skill)
|
| 31 |
+
for variation in variations:
|
| 32 |
+
if fuzz.partial_ratio(variation, resume_lower) > 80:
|
| 33 |
+
fuzzy_matches.append(skill)
|
| 34 |
+
break
|
| 35 |
+
|
| 36 |
+
return list(set(fuzzy_matches))
|
| 37 |
+
|
| 38 |
+
def get_skill_variations(skill):
|
| 39 |
+
"""Common skill variations for fuzzy matching"""
|
| 40 |
+
variations = {
|
| 41 |
+
"javascript": ["js", "javascript", "node.js", "nodejs"],
|
| 42 |
+
"python": ["python", "py"],
|
| 43 |
+
"tensorflow": ["tensorflow", "tf"],
|
| 44 |
+
"kubernetes": ["kubernetes", "k8s"],
|
| 45 |
+
"postgresql": ["postgresql", "postgres", "psql"]
|
| 46 |
+
}
|
| 47 |
+
return variations.get(skill.lower(), [skill])
|
matchers/semantic_matcher.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# matchers/semantic_matcher.py - ENHANCED SEMANTIC MATCHER
|
| 2 |
+
from sentence_transformers import SentenceTransformer, util
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
class SemanticMatcher:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
try:
|
| 8 |
+
# Using a lightweight, high-performance model
|
| 9 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 10 |
+
print("✅ Semantic matcher initialized with SentenceTransformer model")
|
| 11 |
+
except Exception as e:
|
| 12 |
+
print(f"⚠️ Could not load SentenceTransformer model: {e}")
|
| 13 |
+
print(" Install with: pip install sentence-transformers")
|
| 14 |
+
self.model = None
|
| 15 |
+
|
| 16 |
+
def calculate_semantic_similarity(self, text1: str, text2: str) -> dict:
|
| 17 |
+
"""Calculate semantic similarity using sentence embeddings"""
|
| 18 |
+
if not self.model:
|
| 19 |
+
return {
|
| 20 |
+
"semantic_score": 0.0,
|
| 21 |
+
"error": "SentenceTransformer model not loaded"
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
# Generate embeddings for both texts
|
| 26 |
+
embedding1 = self.model.encode(text1, convert_to_tensor=True)
|
| 27 |
+
embedding2 = self.model.encode(text2, convert_to_tensor=True)
|
| 28 |
+
|
| 29 |
+
# Calculate cosine similarity
|
| 30 |
+
cosine_score = util.pytorch_cos_sim(embedding1, embedding2)
|
| 31 |
+
|
| 32 |
+
return {
|
| 33 |
+
"semantic_score": round(float(cosine_score[0][0]) * 100, 2)
|
| 34 |
+
}
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"❌ Error during semantic similarity calculation: {e}")
|
| 37 |
+
return {"semantic_score": 0.0, "error": str(e)}
|
parsers/__iniy__.py
ADDED
|
File without changes
|
parsers/__pycache__/cleaner.cpython-312.pyc
ADDED
|
Binary file (520 Bytes). View file
|
|
|
parsers/__pycache__/docx_parser.cpython-312.pyc
ADDED
|
Binary file (384 Bytes). View file
|
|
|
parsers/__pycache__/jd_parser.cpython-312.pyc
ADDED
|
Binary file (902 Bytes). View file
|
|
|
parsers/__pycache__/job_requirement_parser.cpython-312.pyc
ADDED
|
Binary file (20.2 kB). View file
|
|
|
parsers/__pycache__/pdf_parser.cpython-312.pyc
ADDED
|
Binary file (1.54 kB). View file
|
|
|
parsers/__pycache__/section_splitter.cpython-312.pyc
ADDED
|
Binary file (2.69 kB). View file
|
|
|
parsers/__pycache__/skill_extractor.cpython-312.pyc
ADDED
|
Binary file (2.61 kB). View file
|
|
|
parsers/__pycache__/skills_list.cpython-312.pyc
ADDED
|
Binary file (937 Bytes). View file
|
|
|
parsers/__pycache__/smart_skill_extractor.cpython-312.pyc
ADDED
|
Binary file (12 kB). View file
|
|
|
parsers/cleaner.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
def clean_text(text):
|
| 4 |
+
"""Remove extra spaces, line breaks, normalize text"""
|
| 5 |
+
text = re.sub(r'\n+', '\n', text) # collapse multiple newlines
|
| 6 |
+
text = re.sub(r'\s+', ' ', text) # collapse multiple spaces
|
| 7 |
+
return text.strip()
|
parsers/docx_parser.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import docx2txt
|
| 2 |
+
|
| 3 |
+
def extract_text_docx(file_path):
|
| 4 |
+
"""Extract text from DOCX"""
|
| 5 |
+
return docx2txt.process(file_path)
|
parsers/entity_extractor.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/entity_extractor.py - Basic version
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
class EntityExtractor:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
print("✅ Entity extractor initialized (basic mode)")
|
| 7 |
+
|
| 8 |
+
def extract_skills_with_nlp(self, text):
|
| 9 |
+
"""Basic entity extraction"""
|
| 10 |
+
return {
|
| 11 |
+
"persons": [],
|
| 12 |
+
"organizations": [],
|
| 13 |
+
"technologies": [],
|
| 14 |
+
"skills": [],
|
| 15 |
+
"locations": []
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
def extract_experience_years(self, text):
|
| 19 |
+
"""Extract years of experience using regex"""
|
| 20 |
+
pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:experience|exp)'
|
| 21 |
+
matches = re.findall(pattern, text.lower())
|
| 22 |
+
years = [int(match) for match in matches if match.isdigit()]
|
| 23 |
+
return max(years) if years else 0
|
| 24 |
+
|
| 25 |
+
def extract_education_info(self, text):
|
| 26 |
+
"""Extract education info"""
|
| 27 |
+
degrees = ["bachelor", "master", "phd", "b.tech", "m.tech"]
|
| 28 |
+
found_degrees = [degree for degree in degrees if degree in text.lower()]
|
| 29 |
+
|
| 30 |
+
return {
|
| 31 |
+
"degrees": found_degrees,
|
| 32 |
+
"fields": []
|
| 33 |
+
}
|
parsers/jd_parser.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from parsers.cleaner import clean_text
|
| 3 |
+
from parsers.skill_extractor import extract_skills
|
| 4 |
+
|
| 5 |
+
def parse_jd(file_text):
|
| 6 |
+
"""Parse job description and extract role + skills"""
|
| 7 |
+
text = clean_text(file_text)
|
| 8 |
+
|
| 9 |
+
# Extract Job Role (look for keywords like "Job Title", "Role", "Position")
|
| 10 |
+
role_match = re.search(r"(job role|job title|position)\s*[:\-]\s*(.*)", text, re.I)
|
| 11 |
+
job_role = role_match.group(2).strip() if role_match else "Unknown"
|
| 12 |
+
|
| 13 |
+
# Extract skills
|
| 14 |
+
jd_skills = extract_skills(text)
|
| 15 |
+
|
| 16 |
+
return {
|
| 17 |
+
"role": job_role,
|
| 18 |
+
"skills": jd_skills,
|
| 19 |
+
"raw_text": text
|
| 20 |
+
}
|
parsers/job_requirement_parser.py
ADDED
|
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/job_requirement_parser.py - Advanced Job Requirement Analysis
|
| 2 |
+
import re
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict, List, Tuple
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
from parsers.smart_skill_extractor import SmartSkillExtractor
|
| 7 |
+
|
| 8 |
+
@dataclass
|
| 9 |
+
class JobRequirement:
|
| 10 |
+
"""Structured job requirement data"""
|
| 11 |
+
role_title: str
|
| 12 |
+
company: str
|
| 13 |
+
experience_required: str
|
| 14 |
+
education_required: List[str]
|
| 15 |
+
must_have_skills: List[str]
|
| 16 |
+
good_to_have_skills: List[str]
|
| 17 |
+
certifications: List[str]
|
| 18 |
+
responsibilities: List[str]
|
| 19 |
+
benefits: List[str]
|
| 20 |
+
location: str
|
| 21 |
+
employment_type: str
|
| 22 |
+
salary_range: str
|
| 23 |
+
industry: str
|
| 24 |
+
seniority_level: str
|
| 25 |
+
|
| 26 |
+
class JobRequirementParser:
|
| 27 |
+
"""Parse job descriptions to extract structured requirements"""
|
| 28 |
+
|
| 29 |
+
def __init__(self):
|
| 30 |
+
self.skill_extractor = SmartSkillExtractor()
|
| 31 |
+
self.patterns = self._initialize_patterns()
|
| 32 |
+
print("✅ Job Requirement Parser initialized")
|
| 33 |
+
|
| 34 |
+
def _initialize_patterns(self):
|
| 35 |
+
"""Initialize regex patterns for job parsing"""
|
| 36 |
+
return {
|
| 37 |
+
'role_title': [
|
| 38 |
+
r'(?:job\s+title|position|role)[\s:]*([^\n.]{5,80})',
|
| 39 |
+
r'^([A-Z][\w\s,]+(?:engineer|developer|manager|analyst|specialist|coordinator))\b',
|
| 40 |
+
r'hiring\s+for[\s:]*([^\n.]{5,80})',
|
| 41 |
+
],
|
| 42 |
+
'company': [
|
| 43 |
+
r'(?:company|organization)[\s:]*([^\n]+)',
|
| 44 |
+
r'(?:at|@)\s+([A-Z][a-zA-Z\s&,.-]+?)(?:\s|$)',
|
| 45 |
+
],
|
| 46 |
+
'experience': [
|
| 47 |
+
r'(?:experience|exp)[\s:]*(\d+[\+\-]*\s*(?:to|\-)\s*\d+\s*years?|\d+\+?\s*years?)',
|
| 48 |
+
r'(\d+[\+\-]*)\s*(?:to|\-)\s*(\d+)\s*years?\s*(?:of\s+)?(?:experience|exp)',
|
| 49 |
+
r'minimum\s+(\d+\+?)\s*years?',
|
| 50 |
+
r'(\d+)\+?\s*years?\s+(?:of\s+)?(?:experience|exp)',
|
| 51 |
+
],
|
| 52 |
+
'education': [
|
| 53 |
+
r'(?:education|degree|qualification)[\s:]*([^\n]+)',
|
| 54 |
+
r'(?:bachelor|master|phd|doctorate|diploma|b\.tech|m\.tech|bca|mca|bsc|msc)[\s\.]*([^\n]*)',
|
| 55 |
+
r'(?:degree\s+in|graduated\s+in)\s+([^\n]+)',
|
| 56 |
+
],
|
| 57 |
+
'must_have': [
|
| 58 |
+
r'(?:must\s+have|required|mandatory|essential)[\s:]*([^.]+)',
|
| 59 |
+
r'(?:requirements|qualifications)[\s:]*([^.]+)',
|
| 60 |
+
r'(?:should\s+have|need\s+to\s+have)[\s:]*([^.]+)',
|
| 61 |
+
],
|
| 62 |
+
'good_to_have': [
|
| 63 |
+
r'(?:good\s+to\s+have|nice\s+to\s+have|preferred|bonus|plus)[\s:]*([^.]+)',
|
| 64 |
+
r'(?:additional|optional)[\s:]*([^.]+)',
|
| 65 |
+
],
|
| 66 |
+
'responsibilities': [
|
| 67 |
+
r'(?:responsibilities|duties|tasks)[\s:]*([^.]+)',
|
| 68 |
+
r'(?:you\s+will|role\s+involves)[\s:]*([^.]+)',
|
| 69 |
+
],
|
| 70 |
+
'certifications': [
|
| 71 |
+
r'(?:certification|certified|certificate)[\s:]*([^.]+)',
|
| 72 |
+
r'(?:aws|azure|google\s+cloud|oracle|cisco|microsoft)\s+certified[\s:]*([^.]*)',
|
| 73 |
+
],
|
| 74 |
+
'salary': [
|
| 75 |
+
r'(?:salary|compensation|package)[\s:]*([^.\n]+)',
|
| 76 |
+
r'(?:\$|₹|€|£)\s*([0-9,.-]+(?:\s*(?:to|\-)\s*[0-9,.-]+)?)',
|
| 77 |
+
r'([0-9,]+)\s*(?:to|\-)\s*([0-9,]+)\s*(?:per\s+)?(?:month|year|annum)',
|
| 78 |
+
],
|
| 79 |
+
'location': [
|
| 80 |
+
r'(?:location|based\s+in|office)[\s:]*([^.\n]+)',
|
| 81 |
+
r'(?:remote|hybrid|onsite|work\s+from)[\s:]*([^.\n]*)',
|
| 82 |
+
]
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
def parse_job_description(self, jd_text: str) -> JobRequirement:
|
| 86 |
+
"""Parse job description into structured requirements"""
|
| 87 |
+
|
| 88 |
+
if not jd_text:
|
| 89 |
+
return self._create_empty_requirement()
|
| 90 |
+
|
| 91 |
+
print("🔍 Parsing job requirements...")
|
| 92 |
+
|
| 93 |
+
# Extract basic information
|
| 94 |
+
role_title = self._extract_role_title(jd_text)
|
| 95 |
+
company = self._extract_company(jd_text)
|
| 96 |
+
experience = self._extract_experience(jd_text)
|
| 97 |
+
education = self._extract_education(jd_text)
|
| 98 |
+
location = self._extract_location(jd_text)
|
| 99 |
+
salary = self._extract_salary(jd_text)
|
| 100 |
+
|
| 101 |
+
# Extract skills and requirements
|
| 102 |
+
must_have_skills, good_to_have_skills = self._extract_skills_by_priority(jd_text)
|
| 103 |
+
certifications = self._extract_certifications(jd_text)
|
| 104 |
+
responsibilities = self._extract_responsibilities(jd_text)
|
| 105 |
+
|
| 106 |
+
# Determine job characteristics
|
| 107 |
+
employment_type = self._determine_employment_type(jd_text)
|
| 108 |
+
industry = self._determine_industry(jd_text, role_title)
|
| 109 |
+
seniority_level = self._determine_seniority(role_title, experience)
|
| 110 |
+
|
| 111 |
+
job_req = JobRequirement(
|
| 112 |
+
role_title=role_title,
|
| 113 |
+
company=company,
|
| 114 |
+
experience_required=experience,
|
| 115 |
+
education_required=education,
|
| 116 |
+
must_have_skills=must_have_skills,
|
| 117 |
+
good_to_have_skills=good_to_have_skills,
|
| 118 |
+
certifications=certifications,
|
| 119 |
+
responsibilities=responsibilities,
|
| 120 |
+
benefits=[], # Can be enhanced later
|
| 121 |
+
location=location,
|
| 122 |
+
employment_type=employment_type,
|
| 123 |
+
salary_range=salary,
|
| 124 |
+
industry=industry,
|
| 125 |
+
seniority_level=seniority_level
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
print(f"✅ Parsed job: {role_title} at {company}")
|
| 129 |
+
print(f" 📍 Location: {location}")
|
| 130 |
+
print(f" 💼 Experience: {experience}")
|
| 131 |
+
print(f" 🎯 Must-have skills: {len(must_have_skills)}")
|
| 132 |
+
print(f" ⭐ Good-to-have skills: {len(good_to_have_skills)}")
|
| 133 |
+
|
| 134 |
+
return job_req
|
| 135 |
+
|
| 136 |
+
def _extract_role_title(self, text: str) -> str:
|
| 137 |
+
"""Extract job role title"""
|
| 138 |
+
for pattern in self.patterns['role_title']:
|
| 139 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 140 |
+
if match:
|
| 141 |
+
return match.group(1).strip()
|
| 142 |
+
|
| 143 |
+
# Fallback: look for common job titles
|
| 144 |
+
lines = text.split('\n')
|
| 145 |
+
for line in lines[:5]: # Check first 5 lines
|
| 146 |
+
line = line.strip()
|
| 147 |
+
if any(title in line.lower() for title in
|
| 148 |
+
['engineer', 'developer', 'manager', 'analyst', 'specialist']):
|
| 149 |
+
return line
|
| 150 |
+
|
| 151 |
+
return "Unknown Role"
|
| 152 |
+
|
| 153 |
+
def _extract_company(self, text: str) -> str:
|
| 154 |
+
"""Extract company name"""
|
| 155 |
+
for pattern in self.patterns['company']:
|
| 156 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 157 |
+
if match:
|
| 158 |
+
return match.group(1).strip()
|
| 159 |
+
|
| 160 |
+
return "Unknown Company"
|
| 161 |
+
|
| 162 |
+
def _extract_experience(self, text: str) -> str:
|
| 163 |
+
"""Extract experience requirements"""
|
| 164 |
+
for pattern in self.patterns['experience']:
|
| 165 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 166 |
+
if match:
|
| 167 |
+
return match.group().strip()
|
| 168 |
+
|
| 169 |
+
# Look for fresher/entry level
|
| 170 |
+
if re.search(r'\b(?:fresher|entry\s+level|0\s+years?)\b', text, re.IGNORECASE):
|
| 171 |
+
return "0-1 years"
|
| 172 |
+
|
| 173 |
+
return "Not specified"
|
| 174 |
+
|
| 175 |
+
def _extract_education(self, text: str) -> List[str]:
|
| 176 |
+
"""Extract education requirements"""
|
| 177 |
+
education = []
|
| 178 |
+
|
| 179 |
+
for pattern in self.patterns['education']:
|
| 180 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 181 |
+
for match in matches:
|
| 182 |
+
education.append(match.group().strip())
|
| 183 |
+
|
| 184 |
+
# Common degree patterns
|
| 185 |
+
degree_patterns = [
|
| 186 |
+
r'\bb\.?tech\b', r'\bm\.?tech\b', r'\bbca\b', r'\bmca\b',
|
| 187 |
+
r'\bbsc\b', r'\bmsc\b', r'\bba\b', r'\bmba\b',
|
| 188 |
+
r'\bbachelor', r'\bmaster', r'\bphd\b', r'\bdoctorate\b'
|
| 189 |
+
]
|
| 190 |
+
|
| 191 |
+
for pattern in degree_patterns:
|
| 192 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 193 |
+
match = re.search(pattern + r'[^.\n]*', text, re.IGNORECASE)
|
| 194 |
+
if match:
|
| 195 |
+
education.append(match.group().strip())
|
| 196 |
+
|
| 197 |
+
return list(set(education)) if education else ["Any Graduate"]
|
| 198 |
+
|
| 199 |
+
def _extract_skills_by_priority(self, text: str) -> Tuple[List[str], List[str]]:
|
| 200 |
+
"""Extract skills categorized by priority"""
|
| 201 |
+
|
| 202 |
+
# Use smart extractor to get all skills
|
| 203 |
+
all_skills = self.skill_extractor.extract_skills_comprehensive(text)
|
| 204 |
+
|
| 205 |
+
must_have = []
|
| 206 |
+
good_to_have = []
|
| 207 |
+
|
| 208 |
+
# Categorize based on context
|
| 209 |
+
text_lower = text.lower()
|
| 210 |
+
|
| 211 |
+
# Split text into sections
|
| 212 |
+
must_have_section = ""
|
| 213 |
+
good_to_have_section = ""
|
| 214 |
+
|
| 215 |
+
# Extract must-have skills
|
| 216 |
+
for pattern in self.patterns['must_have']:
|
| 217 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 218 |
+
for match in matches:
|
| 219 |
+
must_have_section += " " + match.group(1)
|
| 220 |
+
|
| 221 |
+
# Extract good-to-have skills
|
| 222 |
+
for pattern in self.patterns['good_to_have']:
|
| 223 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 224 |
+
for match in matches:
|
| 225 |
+
good_to_have_section += " " + match.group(1)
|
| 226 |
+
|
| 227 |
+
# Categorize skills
|
| 228 |
+
for skill in all_skills:
|
| 229 |
+
skill_lower = skill.lower()
|
| 230 |
+
|
| 231 |
+
# Check if skill is in must-have section
|
| 232 |
+
if skill_lower in must_have_section.lower():
|
| 233 |
+
must_have.append(skill)
|
| 234 |
+
# Check if skill is in good-to-have section
|
| 235 |
+
elif skill_lower in good_to_have_section.lower():
|
| 236 |
+
good_to_have.append(skill)
|
| 237 |
+
# Default categorization based on job requirements context
|
| 238 |
+
elif self._is_core_skill(skill, text):
|
| 239 |
+
must_have.append(skill)
|
| 240 |
+
else:
|
| 241 |
+
good_to_have.append(skill)
|
| 242 |
+
|
| 243 |
+
# Ensure no duplicates
|
| 244 |
+
must_have = list(set(must_have))
|
| 245 |
+
good_to_have = list(set(good_to_have) - set(must_have))
|
| 246 |
+
|
| 247 |
+
return must_have, good_to_have
|
| 248 |
+
|
| 249 |
+
def _is_core_skill(self, skill: str, text: str) -> bool:
|
| 250 |
+
"""Determine if a skill is core based on frequency and context"""
|
| 251 |
+
skill_lower = skill.lower()
|
| 252 |
+
text_lower = text.lower()
|
| 253 |
+
|
| 254 |
+
# Count mentions
|
| 255 |
+
mentions = text_lower.count(skill_lower)
|
| 256 |
+
|
| 257 |
+
# Check for emphasis keywords around the skill
|
| 258 |
+
emphasis_patterns = [
|
| 259 |
+
rf'\b(?:required|must|essential|mandatory|need)\b[^.]*{re.escape(skill_lower)}',
|
| 260 |
+
rf'{re.escape(skill_lower)}[^.]*\b(?:required|must|essential|mandatory)\b',
|
| 261 |
+
rf'\b(?:experience|expertise|proficient)\b[^.]*{re.escape(skill_lower)}',
|
| 262 |
+
rf'{re.escape(skill_lower)}[^.]*\b(?:years?|experience)\b'
|
| 263 |
+
]
|
| 264 |
+
|
| 265 |
+
for pattern in emphasis_patterns:
|
| 266 |
+
if re.search(pattern, text_lower):
|
| 267 |
+
return True
|
| 268 |
+
|
| 269 |
+
# If mentioned multiple times, likely core
|
| 270 |
+
return mentions >= 2
|
| 271 |
+
|
| 272 |
+
def _extract_certifications(self, text: str) -> List[str]:
|
| 273 |
+
"""Extract certification requirements"""
|
| 274 |
+
certifications = []
|
| 275 |
+
|
| 276 |
+
for pattern in self.patterns['certifications']:
|
| 277 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 278 |
+
for match in matches:
|
| 279 |
+
cert = match.group().strip()
|
| 280 |
+
if len(cert) > 5: # Filter out too short matches
|
| 281 |
+
certifications.append(cert)
|
| 282 |
+
|
| 283 |
+
return list(set(certifications))
|
| 284 |
+
|
| 285 |
+
def _extract_responsibilities(self, text: str) -> List[str]:
|
| 286 |
+
"""Extract job responsibilities"""
|
| 287 |
+
responsibilities = []
|
| 288 |
+
|
| 289 |
+
for pattern in self.patterns['responsibilities']:
|
| 290 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 291 |
+
for match in matches:
|
| 292 |
+
resp = match.group(1).strip()
|
| 293 |
+
# Split by bullet points or line breaks
|
| 294 |
+
resp_list = re.split(r'[•\-\*]\s*|\n', resp)
|
| 295 |
+
for r in resp_list:
|
| 296 |
+
r = r.strip()
|
| 297 |
+
if len(r) > 10: # Filter meaningful responsibilities
|
| 298 |
+
responsibilities.append(r)
|
| 299 |
+
|
| 300 |
+
return responsibilities[:10] # Limit to top 10
|
| 301 |
+
|
| 302 |
+
def _extract_location(self, text: str) -> str:
|
| 303 |
+
"""Extract job location"""
|
| 304 |
+
for pattern in self.patterns['location']:
|
| 305 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 306 |
+
if match:
|
| 307 |
+
return match.group(1).strip()
|
| 308 |
+
|
| 309 |
+
# Look for city names (basic patterns)
|
| 310 |
+
city_pattern = r'\b(?:bangalore|mumbai|delhi|hyderabad|chennai|pune|kolkata|ahmedabad|remote|hybrid)\b'
|
| 311 |
+
match = re.search(city_pattern, text, re.IGNORECASE)
|
| 312 |
+
if match:
|
| 313 |
+
return match.group()
|
| 314 |
+
|
| 315 |
+
return "Not specified"
|
| 316 |
+
|
| 317 |
+
def _extract_salary(self, text: str) -> str:
|
| 318 |
+
"""Extract salary information"""
|
| 319 |
+
for pattern in self.patterns['salary']:
|
| 320 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 321 |
+
if match:
|
| 322 |
+
return match.group().strip()
|
| 323 |
+
|
| 324 |
+
return "Not specified"
|
| 325 |
+
|
| 326 |
+
def _determine_employment_type(self, text: str) -> str:
|
| 327 |
+
"""Determine employment type"""
|
| 328 |
+
text_lower = text.lower()
|
| 329 |
+
|
| 330 |
+
if 'intern' in text_lower or 'internship' in text_lower:
|
| 331 |
+
return "Internship"
|
| 332 |
+
elif 'contract' in text_lower or 'freelance' in text_lower:
|
| 333 |
+
return "Contract"
|
| 334 |
+
elif 'part time' in text_lower or 'part-time' in text_lower:
|
| 335 |
+
return "Part-time"
|
| 336 |
+
else:
|
| 337 |
+
return "Full-time"
|
| 338 |
+
|
| 339 |
+
def _determine_industry(self, text: str, role_title: str) -> str:
|
| 340 |
+
"""Determine industry based on job content"""
|
| 341 |
+
text_lower = (text + " " + role_title).lower()
|
| 342 |
+
|
| 343 |
+
industry_keywords = {
|
| 344 |
+
'Technology': ['software', 'tech', 'it', 'developer', 'engineer', 'programmer'],
|
| 345 |
+
'Finance': ['finance', 'banking', 'fintech', 'investment', 'trading'],
|
| 346 |
+
'Healthcare': ['healthcare', 'medical', 'hospital', 'pharma', 'clinical'],
|
| 347 |
+
'Education': ['education', 'teaching', 'learning', 'university', 'academic'],
|
| 348 |
+
'E-commerce': ['ecommerce', 'e-commerce', 'retail', 'shopping', 'marketplace'],
|
| 349 |
+
'Marketing': ['marketing', 'advertising', 'promotion', 'brand', 'digital marketing'],
|
| 350 |
+
'Consulting': ['consulting', 'advisory', 'strategy', 'management consulting'],
|
| 351 |
+
'Manufacturing': ['manufacturing', 'production', 'industrial', 'automotive'],
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
for industry, keywords in industry_keywords.items():
|
| 355 |
+
if any(keyword in text_lower for keyword in keywords):
|
| 356 |
+
return industry
|
| 357 |
+
|
| 358 |
+
return "General"
|
| 359 |
+
|
| 360 |
+
def _determine_seniority(self, role_title: str, experience: str) -> str:
|
| 361 |
+
"""Determine seniority level"""
|
| 362 |
+
title_lower = role_title.lower()
|
| 363 |
+
|
| 364 |
+
if any(word in title_lower for word in ['senior', 'lead', 'principal', 'architect', 'manager']):
|
| 365 |
+
return "Senior"
|
| 366 |
+
elif any(word in title_lower for word in ['junior', 'associate', 'entry', 'trainee']):
|
| 367 |
+
return "Junior"
|
| 368 |
+
elif 'intern' in title_lower:
|
| 369 |
+
return "Intern"
|
| 370 |
+
else:
|
| 371 |
+
# Determine by experience
|
| 372 |
+
if '0' in experience or 'fresher' in experience.lower():
|
| 373 |
+
return "Entry Level"
|
| 374 |
+
elif any(num in experience for num in ['1', '2', '3']):
|
| 375 |
+
return "Mid Level"
|
| 376 |
+
else:
|
| 377 |
+
return "Senior"
|
| 378 |
+
|
| 379 |
+
def _create_empty_requirement(self) -> JobRequirement:
|
| 380 |
+
"""Create empty job requirement for error cases"""
|
| 381 |
+
return JobRequirement(
|
| 382 |
+
role_title="Unknown Role",
|
| 383 |
+
company="Unknown Company",
|
| 384 |
+
experience_required="Not specified",
|
| 385 |
+
education_required=["Any Graduate"],
|
| 386 |
+
must_have_skills=[],
|
| 387 |
+
good_to_have_skills=[],
|
| 388 |
+
certifications=[],
|
| 389 |
+
responsibilities=[],
|
| 390 |
+
benefits=[],
|
| 391 |
+
location="Not specified",
|
| 392 |
+
employment_type="Full-time",
|
| 393 |
+
salary_range="Not specified",
|
| 394 |
+
industry="General",
|
| 395 |
+
seniority_level="Not specified"
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
def export_to_json(self, job_req: JobRequirement) -> str:
|
| 399 |
+
"""Export job requirement to JSON"""
|
| 400 |
+
return json.dumps(job_req.__dict__, indent=2)
|
| 401 |
+
|
| 402 |
+
# Test function
|
| 403 |
+
def test_job_parser():
|
| 404 |
+
"""Test the job requirement parser"""
|
| 405 |
+
parser = JobRequirementParser()
|
| 406 |
+
|
| 407 |
+
sample_jd = """
|
| 408 |
+
Senior Full Stack Developer - TechCorp Inc.
|
| 409 |
+
|
| 410 |
+
Location: Bangalore, India (Hybrid)
|
| 411 |
+
Experience: 3-5 years
|
| 412 |
+
|
| 413 |
+
Job Description:
|
| 414 |
+
We are looking for a Senior Full Stack Developer to join our growing team.
|
| 415 |
+
|
| 416 |
+
Must Have Requirements:
|
| 417 |
+
- 3+ years of experience in React.js and Node.js
|
| 418 |
+
- Proficiency in JavaScript, TypeScript
|
| 419 |
+
- Experience with MySQL and MongoDB
|
| 420 |
+
- Knowledge of AWS cloud services
|
| 421 |
+
- Bachelor's degree in Computer Science or related field
|
| 422 |
+
|
| 423 |
+
Good to Have:
|
| 424 |
+
- Experience with Docker and Kubernetes
|
| 425 |
+
- Knowledge of microservices architecture
|
| 426 |
+
- AWS certification preferred
|
| 427 |
+
- Experience with CI/CD pipelines
|
| 428 |
+
|
| 429 |
+
Responsibilities:
|
| 430 |
+
- Develop and maintain web applications
|
| 431 |
+
- Collaborate with cross-functional teams
|
| 432 |
+
- Write clean, maintainable code
|
| 433 |
+
- Participate in code reviews
|
| 434 |
+
|
| 435 |
+
Package: 8-12 LPA
|
| 436 |
+
"""
|
| 437 |
+
|
| 438 |
+
job_req = parser.parse_job_description(sample_jd)
|
| 439 |
+
|
| 440 |
+
print("\n📋 Parsed Job Requirements:")
|
| 441 |
+
print(f"Role: {job_req.role_title}")
|
| 442 |
+
print(f"Company: {job_req.company}")
|
| 443 |
+
print(f"Must-have skills: {job_req.must_have_skills}")
|
| 444 |
+
print(f"Good-to-have skills: {job_req.good_to_have_skills}")
|
| 445 |
+
|
| 446 |
+
return len(job_req.must_have_skills) > 0
|
| 447 |
+
|
| 448 |
+
if __name__ == "__main__":
|
| 449 |
+
test_job_parser()
|
parsers/pdf_parser.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fitz # PyMuPDF
|
| 2 |
+
import pdfplumber
|
| 3 |
+
import docx
|
| 4 |
+
|
| 5 |
+
def extract_text_pymupdf(file_path):
|
| 6 |
+
"""Extract text from PDF using PyMuPDF"""
|
| 7 |
+
text = ""
|
| 8 |
+
with fitz.open(file_path) as doc:
|
| 9 |
+
for page in doc:
|
| 10 |
+
text += page.get_text()
|
| 11 |
+
return text
|
| 12 |
+
|
| 13 |
+
def extract_text_pdfplumber(file_path):
|
| 14 |
+
"""Extract text from PDF using pdfplumber"""
|
| 15 |
+
text = ""
|
| 16 |
+
with pdfplumber.open(file_path) as pdf:
|
| 17 |
+
for page in pdf.pages:
|
| 18 |
+
text += page.extract_text() or ""
|
| 19 |
+
return text
|
| 20 |
+
|
| 21 |
+
def extract_text_docx(file_path):
|
| 22 |
+
"""Extract text from DOCX using python-docx"""
|
| 23 |
+
doc = docx.Document(file_path)
|
| 24 |
+
text = "\n".join([para.text for para in doc.paragraphs])
|
| 25 |
+
return text
|
parsers/section_splitter.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/section_splitter.py - FIXED VERSION
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def split_sections(text):
|
| 5 |
+
"""Split resume into sections like education, skills, experience"""
|
| 6 |
+
sections = {}
|
| 7 |
+
current_section = "general"
|
| 8 |
+
|
| 9 |
+
# Clean text first
|
| 10 |
+
text = text.replace('\n', ' ').strip()
|
| 11 |
+
|
| 12 |
+
# Split by common section headers (more comprehensive)
|
| 13 |
+
section_patterns = [
|
| 14 |
+
r'(professional\s+summary|summary|objective)',
|
| 15 |
+
r'(technical\s+skills|skills|core\s+competencies|technologies)',
|
| 16 |
+
r'(work\s+experience|experience|employment|professional\s+experience)',
|
| 17 |
+
r'(education|academic\s+background|qualifications)',
|
| 18 |
+
r'(projects|personal\s+projects|key\s+projects)',
|
| 19 |
+
r'(certifications|certificates|credentials)',
|
| 20 |
+
r'(achievements|accomplishments|awards)'
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
# Find section boundaries
|
| 24 |
+
section_starts = []
|
| 25 |
+
for pattern in section_patterns:
|
| 26 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 27 |
+
for match in matches:
|
| 28 |
+
section_starts.append((match.start(), match.group().lower().strip()))
|
| 29 |
+
|
| 30 |
+
# Sort by position
|
| 31 |
+
section_starts.sort()
|
| 32 |
+
|
| 33 |
+
# Extract sections
|
| 34 |
+
if not section_starts:
|
| 35 |
+
# Fallback: if no clear sections, try to extract skills manually
|
| 36 |
+
sections["general"] = text
|
| 37 |
+
sections["skills"] = extract_skills_section_fallback(text)
|
| 38 |
+
else:
|
| 39 |
+
for i, (start_pos, section_name) in enumerate(section_starts):
|
| 40 |
+
# Determine end position
|
| 41 |
+
if i + 1 < len(section_starts):
|
| 42 |
+
end_pos = section_starts[i + 1][0]
|
| 43 |
+
section_text = text[start_pos:end_pos]
|
| 44 |
+
else:
|
| 45 |
+
section_text = text[start_pos:]
|
| 46 |
+
|
| 47 |
+
# Clean section name
|
| 48 |
+
clean_name = re.sub(r'[^\w\s]', '', section_name).strip()
|
| 49 |
+
sections[clean_name] = section_text.strip()
|
| 50 |
+
|
| 51 |
+
return sections
|
| 52 |
+
|
| 53 |
+
def extract_skills_section_fallback(text):
|
| 54 |
+
"""Fallback to extract skills when section detection fails"""
|
| 55 |
+
# Look for skills-related keywords
|
| 56 |
+
skills_indicators = [
|
| 57 |
+
r'programming languages?:?\s*([^.]*)',
|
| 58 |
+
r'technical skills?:?\s*([^.]*)',
|
| 59 |
+
r'technologies?:?\s*([^.]*)',
|
| 60 |
+
r'tools?:?\s*([^.]*)',
|
| 61 |
+
r'frameworks?:?\s*([^.]*)',
|
| 62 |
+
r'languages?:?\s*([^.]*)'
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
skills_text = ""
|
| 66 |
+
for pattern in skills_indicators:
|
| 67 |
+
matches = re.findall(pattern, text, re.IGNORECASE)
|
| 68 |
+
for match in matches:
|
| 69 |
+
skills_text += " " + match
|
| 70 |
+
|
| 71 |
+
return skills_text.strip() if skills_text else ""
|
parsers/skill_extractor.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/skill_extractor.py - ENHANCED VERSION
|
| 2 |
+
import re
|
| 3 |
+
from parsers.skills_list import skills
|
| 4 |
+
|
| 5 |
+
def extract_skills(text):
|
| 6 |
+
"""Extract known skills from text using dictionary matching"""
|
| 7 |
+
if not text:
|
| 8 |
+
return []
|
| 9 |
+
|
| 10 |
+
# Convert to lowercase for matching
|
| 11 |
+
text_lower = text.lower()
|
| 12 |
+
found_skills = []
|
| 13 |
+
|
| 14 |
+
# Enhanced skill extraction
|
| 15 |
+
for skill in skills:
|
| 16 |
+
skill_lower = skill.lower()
|
| 17 |
+
|
| 18 |
+
# Multiple matching strategies
|
| 19 |
+
patterns = [
|
| 20 |
+
rf'\b{re.escape(skill_lower)}\b', # Exact word boundary match
|
| 21 |
+
rf'{re.escape(skill_lower)}(?:\.\s*js|js)', # Handle variations like "node.js"
|
| 22 |
+
rf'{re.escape(skill_lower)}(?:\s*\.\s*\w+)?' # Handle extensions
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
for pattern in patterns:
|
| 26 |
+
if re.search(pattern, text_lower):
|
| 27 |
+
found_skills.append(skill)
|
| 28 |
+
break
|
| 29 |
+
|
| 30 |
+
# Additional extraction for common variations
|
| 31 |
+
skill_variations = {
|
| 32 |
+
'javascript': ['js', 'javascript', 'ecmascript'],
|
| 33 |
+
'python': ['python', 'py'],
|
| 34 |
+
'node.js': ['nodejs', 'node.js', 'node js'],
|
| 35 |
+
'postgresql': ['postgres', 'postgresql', 'psql'],
|
| 36 |
+
'kubernetes': ['k8s', 'kubernetes'],
|
| 37 |
+
'docker': ['docker', 'containerization'],
|
| 38 |
+
'ci/cd': ['ci/cd', 'cicd', 'continuous integration', 'continuous deployment']
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
for main_skill, variations in skill_variations.items():
|
| 42 |
+
for variation in variations:
|
| 43 |
+
if variation in text_lower and main_skill not in found_skills:
|
| 44 |
+
if main_skill in skills: # Only add if it's in our skills list
|
| 45 |
+
found_skills.append(main_skill)
|
| 46 |
+
|
| 47 |
+
# Remove duplicates and return
|
| 48 |
+
return list(set(found_skills))
|
| 49 |
+
|
| 50 |
+
def debug_skills_extraction(text):
|
| 51 |
+
"""Debug version to see what's happening"""
|
| 52 |
+
print(f"🔍 Text length: {len(text)}")
|
| 53 |
+
print(f"🔍 First 300 chars: {text[:300]}")
|
| 54 |
+
|
| 55 |
+
# Check for obvious skills manually
|
| 56 |
+
obvious_skills = ['python', 'javascript', 'react', 'node.js', 'aws', 'docker']
|
| 57 |
+
found_obvious = [skill for skill in obvious_skills if skill.lower() in text.lower()]
|
| 58 |
+
print(f"🔍 Obvious skills found: {found_obvious}")
|
| 59 |
+
|
| 60 |
+
skills_found = extract_skills(text)
|
| 61 |
+
print(f"🔍 Total skills extracted: {len(skills_found)}")
|
| 62 |
+
print(f"🔍 Skills: {skills_found}")
|
| 63 |
+
|
| 64 |
+
return skills_found
|
parsers/skills_list.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/skills_list.py
|
| 2 |
+
skills = [
|
| 3 |
+
# Programming Languages
|
| 4 |
+
"python", "java", "javascript", "js", "typescript", "c++", "c#", "php", "go", "rust", "kotlin", "swift", "ruby",
|
| 5 |
+
|
| 6 |
+
# Web Technologies
|
| 7 |
+
"html", "css", "react", "angular", "vue", "nodejs", "node.js", "express", "django", "flask", "fastapi", "spring",
|
| 8 |
+
|
| 9 |
+
# Databases
|
| 10 |
+
"mysql", "postgresql", "postgres", "mongodb", "sqlite", "oracle", "redis", "cassandra", "dynamodb",
|
| 11 |
+
|
| 12 |
+
# Cloud & DevOps
|
| 13 |
+
"aws", "azure", "gcp", "google cloud", "docker", "kubernetes", "k8s", "terraform", "jenkins", "ci/cd", "cicd",
|
| 14 |
+
|
| 15 |
+
# Data Science & AI
|
| 16 |
+
"pandas", "numpy", "matplotlib", "seaborn", "scikit-learn", "sklearn", "tensorflow", "pytorch", "keras", "opencv",
|
| 17 |
+
|
| 18 |
+
# Frameworks & Libraries
|
| 19 |
+
"react", "angular", "vue", "jquery", "bootstrap", "tailwind", "material-ui", "redux",
|
| 20 |
+
|
| 21 |
+
# Tools & Technologies
|
| 22 |
+
"git", "github", "gitlab", "jira", "confluence", "slack", "trello", "figma", "photoshop",
|
| 23 |
+
|
| 24 |
+
# Operating Systems
|
| 25 |
+
"linux", "windows", "macos", "ubuntu", "centos",
|
| 26 |
+
|
| 27 |
+
# API & Protocols
|
| 28 |
+
"rest", "api", "graphql", "soap", "json", "xml", "http", "https",
|
| 29 |
+
|
| 30 |
+
# Testing
|
| 31 |
+
"junit", "pytest", "selenium", "cucumber", "postman", "jest",
|
| 32 |
+
|
| 33 |
+
# Methodologies
|
| 34 |
+
"agile", "scrum", "kanban", "devops", "microservices"
|
| 35 |
+
]
|
parsers/smart_skill_extractor.py
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/smart_skill_extractor.py - AI-Powered Skill Detection
|
| 2 |
+
import re
|
| 3 |
+
from collections import Counter
|
| 4 |
+
|
| 5 |
+
class SmartSkillExtractor:
|
| 6 |
+
"""AI-powered skill extraction that finds ANY skill mentioned in text"""
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.skill_database = self._load_comprehensive_skills()
|
| 10 |
+
self.patterns = self._create_extraction_patterns()
|
| 11 |
+
print(f"✅ Smart Skill Extractor loaded with {len(self.skill_database)} skills")
|
| 12 |
+
|
| 13 |
+
def _load_comprehensive_skills(self):
|
| 14 |
+
"""Load comprehensive skill database covering all domains"""
|
| 15 |
+
|
| 16 |
+
# Programming Languages
|
| 17 |
+
programming = [
|
| 18 |
+
'python', 'java', 'javascript', 'typescript', 'c++', 'c#', 'c', 'php', 'ruby', 'go', 'rust',
|
| 19 |
+
'kotlin', 'swift', 'scala', 'r', 'matlab', 'perl', 'bash', 'powershell', 'sql', 'html',
|
| 20 |
+
'css', 'sass', 'less', 'coffeescript', 'dart', 'elixir', 'erlang', 'f#', 'haskell',
|
| 21 |
+
'julia', 'lua', 'objective-c', 'vb.net', 'assembly', 'cobol', 'fortran'
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
# Frameworks & Libraries
|
| 25 |
+
frameworks = [
|
| 26 |
+
'react', 'angular', 'vue', 'svelte', 'ember', 'backbone', 'jquery', 'bootstrap', 'tailwind',
|
| 27 |
+
'django', 'flask', 'fastapi', 'express', 'nodejs', 'spring', 'hibernate', 'struts',
|
| 28 |
+
'rails', 'sinatra', 'laravel', 'symfony', 'codeigniter', 'asp.net', 'entity framework',
|
| 29 |
+
'xamarin', 'flutter', 'react native', 'ionic', 'cordova', 'electron', 'unity', 'unreal',
|
| 30 |
+
'tensorflow', 'pytorch', 'keras', 'scikit-learn', 'pandas', 'numpy', 'matplotlib',
|
| 31 |
+
'seaborn', 'plotly', 'opencv', 'nltk', 'spacy'
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
# Databases
|
| 35 |
+
databases = [
|
| 36 |
+
'mysql', 'postgresql', 'mongodb', 'redis', 'cassandra', 'elasticsearch', 'neo4j',
|
| 37 |
+
'couchdb', 'dynamodb', 'firestore', 'sqlite', 'oracle', 'sql server', 'mariadb',
|
| 38 |
+
'influxdb', 'clickhouse', 'bigquery', 'snowflake', 'redshift'
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
# Cloud & DevOps
|
| 42 |
+
cloud_devops = [
|
| 43 |
+
'aws', 'azure', 'gcp', 'docker', 'kubernetes', 'jenkins', 'gitlab ci', 'github actions',
|
| 44 |
+
'terraform', 'ansible', 'puppet', 'chef', 'vagrant', 'consul', 'vault', 'prometheus',
|
| 45 |
+
'grafana', 'elk stack', 'nginx', 'apache', 'tomcat',
|
| 46 |
+
'linux', 'ubuntu', 'centos', 'windows server', 'git', 'svn'
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
# Data Science & AI
|
| 50 |
+
data_ai = [
|
| 51 |
+
'machine learning', 'deep learning', 'artificial intelligence', 'data science',
|
| 52 |
+
'data analysis', 'data mining', 'big data', 'analytics', 'statistics', 'regression',
|
| 53 |
+
'classification', 'clustering', 'nlp', 'computer vision', 'neural networks'
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
# Business & Soft Skills
|
| 57 |
+
business_soft = [
|
| 58 |
+
'project management', 'agile', 'scrum', 'kanban', 'leadership', 'communication',
|
| 59 |
+
'teamwork', 'problem solving', 'time management', 'quality assurance',
|
| 60 |
+
'business analysis', 'user research', 'ux design', 'ui design'
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
# Tools & Platforms
|
| 64 |
+
tools = [
|
| 65 |
+
'jira', 'confluence', 'slack', 'figma', 'photoshop', 'excel', 'powerpoint',
|
| 66 |
+
'salesforce', 'google analytics', 'seo', 'automation', 'crm', 'erp'
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
# Combine all skills
|
| 70 |
+
all_skills = (programming + frameworks + databases + cloud_devops +
|
| 71 |
+
data_ai + business_soft + tools)
|
| 72 |
+
|
| 73 |
+
# Create variations mapping
|
| 74 |
+
skill_variations = {}
|
| 75 |
+
for skill in all_skills:
|
| 76 |
+
variations = [skill, skill.replace(' ', ''), skill.replace(' ', '_'),
|
| 77 |
+
skill.replace(' ', '-'), skill.upper(), skill.lower()]
|
| 78 |
+
|
| 79 |
+
# Add common abbreviations
|
| 80 |
+
abbreviations = {
|
| 81 |
+
'javascript': ['js', 'javascript'],
|
| 82 |
+
'typescript': ['ts', 'typescript'],
|
| 83 |
+
'artificial intelligence': ['ai', 'artificial intelligence'],
|
| 84 |
+
'machine learning': ['ml', 'machine learning'],
|
| 85 |
+
'amazon web services': ['aws', 'amazon web services'],
|
| 86 |
+
'google cloud platform': ['gcp', 'google cloud'],
|
| 87 |
+
'kubernetes': ['k8s', 'kubernetes'],
|
| 88 |
+
'user experience': ['ux', 'user experience'],
|
| 89 |
+
'user interface': ['ui', 'user interface'],
|
| 90 |
+
'structured query language': ['sql', 'structured query language'],
|
| 91 |
+
'cascading style sheets': ['css', 'cascading style sheets'],
|
| 92 |
+
'hypertext markup language': ['html', 'hypertext markup language']
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
skill_key = skill.lower()
|
| 96 |
+
if skill_key in abbreviations:
|
| 97 |
+
variations.extend(abbreviations[skill_key])
|
| 98 |
+
|
| 99 |
+
for var in variations:
|
| 100 |
+
if var and len(var) > 1:
|
| 101 |
+
skill_variations[var.lower()] = skill
|
| 102 |
+
|
| 103 |
+
return skill_variations
|
| 104 |
+
|
| 105 |
+
def _create_extraction_patterns(self):
|
| 106 |
+
"""Create regex patterns for skill extraction"""
|
| 107 |
+
return {
|
| 108 |
+
'experience_with': r'\b(?:experience|expertise|proficient|skilled)\s+(?:in|with|using)\s+([a-zA-Z+#.\s-]+)\b',
|
| 109 |
+
'years_exp': r'\b(\d+)\+?\s*(?:years?|yrs?)\s+(?:of\s+)?(?:experience|exp)\s+(?:in|with|using)\s+([a-zA-Z+#.\s-]+)\b',
|
| 110 |
+
'worked_with': r'\b(?:worked|working|used|using)\s+(?:with|on)?\s*([a-zA-Z+#.\s-]+)\b',
|
| 111 |
+
'technologies': r'\b(?:technologies|tools|frameworks|skills)[\s:]*([a-zA-Z+#.\s,-]+)\b',
|
| 112 |
+
'skills': r'\b(?:skills?|competencies)[\s:]*([a-zA-Z+#.\s,-]+)\b'
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
def extract_skills_comprehensive(self, text):
|
| 116 |
+
"""Extract skills using multiple techniques"""
|
| 117 |
+
if not text or len(text.strip()) < 10:
|
| 118 |
+
return []
|
| 119 |
+
|
| 120 |
+
found_skills = set()
|
| 121 |
+
text_lower = text.lower()
|
| 122 |
+
|
| 123 |
+
# Method 1: Direct skill matching
|
| 124 |
+
for skill_variant, canonical_skill in self.skill_database.items():
|
| 125 |
+
if skill_variant in text_lower:
|
| 126 |
+
# Verify it's a whole word match
|
| 127 |
+
pattern = r'\b' + re.escape(skill_variant) + r'\b'
|
| 128 |
+
if re.search(pattern, text_lower):
|
| 129 |
+
found_skills.add(canonical_skill)
|
| 130 |
+
|
| 131 |
+
# Method 2: Pattern-based extraction
|
| 132 |
+
for pattern_name, pattern in self.patterns.items():
|
| 133 |
+
matches = re.finditer(pattern, text_lower, re.IGNORECASE)
|
| 134 |
+
for match in matches:
|
| 135 |
+
if len(match.groups()) > 0 and match.group(1):
|
| 136 |
+
# Clean and process the captured group
|
| 137 |
+
skill_text = match.group(1).strip(' ,-')
|
| 138 |
+
extracted_skills = self._process_skill_text(skill_text)
|
| 139 |
+
found_skills.update(extracted_skills)
|
| 140 |
+
|
| 141 |
+
# Method 3: Context-based extraction
|
| 142 |
+
context_skills = self._extract_contextual_skills(text)
|
| 143 |
+
found_skills.update(context_skills)
|
| 144 |
+
|
| 145 |
+
return sorted(list(found_skills))
|
| 146 |
+
|
| 147 |
+
def _process_skill_text(self, skill_text):
|
| 148 |
+
"""Process extracted skill text to find valid skills"""
|
| 149 |
+
skills = set()
|
| 150 |
+
|
| 151 |
+
# Split by common separators
|
| 152 |
+
parts = re.split(r'[,;/\|\n]', skill_text)
|
| 153 |
+
|
| 154 |
+
for part in parts:
|
| 155 |
+
part = part.strip(' ,-()[]{}')
|
| 156 |
+
if len(part) > 1:
|
| 157 |
+
# Check if it's in our skill database
|
| 158 |
+
part_lower = part.lower()
|
| 159 |
+
if part_lower in self.skill_database:
|
| 160 |
+
skills.add(self.skill_database[part_lower])
|
| 161 |
+
|
| 162 |
+
# Check individual words
|
| 163 |
+
words = part.split()
|
| 164 |
+
for word in words:
|
| 165 |
+
word = word.strip(' ,-()[]{}').lower()
|
| 166 |
+
if word in self.skill_database:
|
| 167 |
+
skills.add(self.skill_database[word])
|
| 168 |
+
|
| 169 |
+
return skills
|
| 170 |
+
|
| 171 |
+
def _extract_contextual_skills(self, text):
|
| 172 |
+
"""Extract skills based on context clues"""
|
| 173 |
+
skills = set()
|
| 174 |
+
|
| 175 |
+
# Look for skills in specific sections
|
| 176 |
+
section_patterns = {
|
| 177 |
+
r'(?:technical\s+)?skills?[\s:]+([^.]+)': 'skills_section',
|
| 178 |
+
r'technologies?[\s:]+([^.]+)': 'tech_section',
|
| 179 |
+
r'tools?[\s:]+([^.]+)': 'tools_section'
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
for pattern, section_type in section_patterns.items():
|
| 183 |
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 184 |
+
for match in matches:
|
| 185 |
+
if len(match.groups()) > 0:
|
| 186 |
+
content = match.group(1)
|
| 187 |
+
# Extract skills from this section
|
| 188 |
+
section_skills = self._process_skill_text(content)
|
| 189 |
+
skills.update(section_skills)
|
| 190 |
+
|
| 191 |
+
return skills
|
| 192 |
+
|
| 193 |
+
def get_skill_categories(self, skills):
|
| 194 |
+
"""Categorize extracted skills"""
|
| 195 |
+
categories = {
|
| 196 |
+
'Programming Languages': [],
|
| 197 |
+
'Frameworks & Libraries': [],
|
| 198 |
+
'Databases': [],
|
| 199 |
+
'Cloud & DevOps': [],
|
| 200 |
+
'Data Science & AI': [],
|
| 201 |
+
'Business & Soft Skills': [],
|
| 202 |
+
'Tools & Platforms': []
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
# Simple categorization based on skill type
|
| 206 |
+
for skill in skills:
|
| 207 |
+
skill_lower = skill.lower()
|
| 208 |
+
|
| 209 |
+
if any(lang in skill_lower for lang in ['python', 'java', 'javascript', 'c++', 'php', 'ruby']):
|
| 210 |
+
categories['Programming Languages'].append(skill)
|
| 211 |
+
elif any(fw in skill_lower for fw in ['react', 'angular', 'django', 'spring', 'tensorflow']):
|
| 212 |
+
categories['Frameworks & Libraries'].append(skill)
|
| 213 |
+
elif any(db in skill_lower for db in ['mysql', 'mongodb', 'postgresql', 'redis']):
|
| 214 |
+
categories['Databases'].append(skill)
|
| 215 |
+
elif any(cloud in skill_lower for cloud in ['aws', 'azure', 'docker', 'kubernetes']):
|
| 216 |
+
categories['Cloud & DevOps'].append(skill)
|
| 217 |
+
elif any(ai in skill_lower for ai in ['machine learning', 'ai', 'data science', 'analytics']):
|
| 218 |
+
categories['Data Science & AI'].append(skill)
|
| 219 |
+
elif any(tool in skill_lower for tool in ['jira', 'figma', 'photoshop', 'excel']):
|
| 220 |
+
categories['Tools & Platforms'].append(skill)
|
| 221 |
+
else:
|
| 222 |
+
categories['Business & Soft Skills'].append(skill)
|
| 223 |
+
|
| 224 |
+
# Remove empty categories
|
| 225 |
+
return {k: v for k, v in categories.items() if v}
|
| 226 |
+
|
| 227 |
+
# Test function
|
| 228 |
+
def test_smart_extractor():
|
| 229 |
+
"""Test the smart skill extractor"""
|
| 230 |
+
extractor = SmartSkillExtractor()
|
| 231 |
+
|
| 232 |
+
test_text = """
|
| 233 |
+
John Doe - Software Engineer
|
| 234 |
+
Skills: Python, JavaScript, React, MySQL, AWS
|
| 235 |
+
Experience: 3 years of experience in full-stack development
|
| 236 |
+
"""
|
| 237 |
+
|
| 238 |
+
skills = extractor.extract_skills_comprehensive(test_text)
|
| 239 |
+
print(f"✅ Extracted {len(skills)} skills: {skills}")
|
| 240 |
+
|
| 241 |
+
return len(skills) > 0
|
| 242 |
+
|
| 243 |
+
if __name__ == "__main__":
|
| 244 |
+
test_smart_extractor()
|
parsers/universal_parser.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# parsers/universal_parser.py - Universal Resume Parser
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
class UniversalResumeParser:
|
| 7 |
+
"""Universal parser that handles multiple resume formats"""
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.supported_formats = {
|
| 11 |
+
'.pdf': self._extract_from_pdf,
|
| 12 |
+
'.docx': self._extract_from_docx,
|
| 13 |
+
'.txt': self._extract_from_txt,
|
| 14 |
+
'.doc': self._extract_from_doc
|
| 15 |
+
}
|
| 16 |
+
print("✅ Universal Resume Parser initialized")
|
| 17 |
+
|
| 18 |
+
def extract_text(self, file_path):
|
| 19 |
+
"""Extract text from any supported file format"""
|
| 20 |
+
try:
|
| 21 |
+
file_ext = Path(file_path).suffix.lower()
|
| 22 |
+
|
| 23 |
+
if file_ext not in self.supported_formats:
|
| 24 |
+
# Fallback to text reading
|
| 25 |
+
try:
|
| 26 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 27 |
+
return f.read()
|
| 28 |
+
except:
|
| 29 |
+
raise ValueError(f"Unsupported format: {file_ext}")
|
| 30 |
+
|
| 31 |
+
print(f"🔍 Processing {file_ext} file...")
|
| 32 |
+
|
| 33 |
+
# Use appropriate extractor
|
| 34 |
+
extractor = self.supported_formats[file_ext]
|
| 35 |
+
text = extractor(file_path)
|
| 36 |
+
|
| 37 |
+
# Clean text
|
| 38 |
+
enhanced_text = self._enhance_extracted_text(text)
|
| 39 |
+
|
| 40 |
+
print(f"✅ Extracted {len(enhanced_text)} characters")
|
| 41 |
+
return enhanced_text
|
| 42 |
+
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"❌ Extraction failed: {e}")
|
| 45 |
+
# Try basic text reading as fallback
|
| 46 |
+
try:
|
| 47 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 48 |
+
return f.read()
|
| 49 |
+
except:
|
| 50 |
+
return f"Error extracting from {file_path}: {str(e)}"
|
| 51 |
+
|
| 52 |
+
def _extract_from_pdf(self, file_path):
|
| 53 |
+
"""Extract from PDF using existing function"""
|
| 54 |
+
try:
|
| 55 |
+
from parsers.pdf_parser import extract_text_pymupdf
|
| 56 |
+
return extract_text_pymupdf(file_path)
|
| 57 |
+
except ImportError:
|
| 58 |
+
# Fallback if PyMuPDF not available
|
| 59 |
+
try:
|
| 60 |
+
import fitz
|
| 61 |
+
doc = fitz.open(file_path)
|
| 62 |
+
text = ""
|
| 63 |
+
for page in doc:
|
| 64 |
+
text += page.get_text()
|
| 65 |
+
doc.close()
|
| 66 |
+
return text
|
| 67 |
+
except ImportError:
|
| 68 |
+
return "PDF extraction requires PyMuPDF package"
|
| 69 |
+
except Exception as e:
|
| 70 |
+
return f"PDF extraction error: {str(e)}"
|
| 71 |
+
|
| 72 |
+
def _extract_from_docx(self, file_path):
|
| 73 |
+
"""Extract from DOCX using existing function"""
|
| 74 |
+
try:
|
| 75 |
+
from parsers.docx_parser import extract_text_docx
|
| 76 |
+
return extract_text_docx(file_path)
|
| 77 |
+
except ImportError:
|
| 78 |
+
try:
|
| 79 |
+
import docx
|
| 80 |
+
doc = docx.Document(file_path)
|
| 81 |
+
text = ""
|
| 82 |
+
for paragraph in doc.paragraphs:
|
| 83 |
+
text += paragraph.text + "\n"
|
| 84 |
+
return text
|
| 85 |
+
except ImportError:
|
| 86 |
+
return "DOCX extraction requires python-docx package"
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return f"DOCX extraction error: {str(e)}"
|
| 89 |
+
|
| 90 |
+
def _extract_from_txt(self, file_path):
|
| 91 |
+
"""Extract from text file with encoding detection"""
|
| 92 |
+
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
|
| 93 |
+
|
| 94 |
+
for encoding in encodings:
|
| 95 |
+
try:
|
| 96 |
+
with open(file_path, 'r', encoding=encoding) as f:
|
| 97 |
+
return f.read()
|
| 98 |
+
except UnicodeDecodeError:
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
# If all encodings fail
|
| 102 |
+
try:
|
| 103 |
+
with open(file_path, 'rb') as f:
|
| 104 |
+
raw_data = f.read()
|
| 105 |
+
return raw_data.decode('utf-8', errors='ignore')
|
| 106 |
+
except Exception as e:
|
| 107 |
+
return f"Text extraction error: {str(e)}"
|
| 108 |
+
|
| 109 |
+
def _extract_from_doc(self, file_path):
|
| 110 |
+
"""Extract from legacy DOC format"""
|
| 111 |
+
try:
|
| 112 |
+
import docx2txt
|
| 113 |
+
text = docx2txt.process(file_path)
|
| 114 |
+
return text
|
| 115 |
+
except ImportError:
|
| 116 |
+
return "DOC format requires docx2txt package (pip install docx2txt)"
|
| 117 |
+
except Exception as e:
|
| 118 |
+
return f"DOC extraction error: {str(e)}"
|
| 119 |
+
|
| 120 |
+
def _enhance_extracted_text(self, text):
|
| 121 |
+
"""Clean and enhance extracted text"""
|
| 122 |
+
if not text or len(text.strip()) < 10:
|
| 123 |
+
return text
|
| 124 |
+
|
| 125 |
+
# Remove excessive whitespace
|
| 126 |
+
text = re.sub(r'\n\s*\n', '\n\n', text)
|
| 127 |
+
text = re.sub(r'[ \t]+', ' ', text)
|
| 128 |
+
|
| 129 |
+
# Fix common extraction issues
|
| 130 |
+
text = re.sub(r'([a-zA-Z0-9._%+-]+)\s*@\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', r'\1@\2', text)
|
| 131 |
+
text = re.sub(r'(\d{3})\s*-?\s*(\d{3})\s*-?\s*(\d{4})', r'\1-\2-\3', text)
|
| 132 |
+
|
| 133 |
+
return text.strip()
|
| 134 |
+
|
| 135 |
+
def test_universal_parser():
|
| 136 |
+
"""Test the universal parser"""
|
| 137 |
+
parser = UniversalResumeParser()
|
| 138 |
+
test_text = "Test resume text"
|
| 139 |
+
enhanced = parser._enhance_extracted_text(test_text)
|
| 140 |
+
print("✅ Universal parser test completed")
|
| 141 |
+
return True
|
| 142 |
+
|
| 143 |
+
if __name__ == "__main__":
|
| 144 |
+
test_universal_parser()
|
scoring/__pycache__/relevance_scorer.cpython-312.pyc
ADDED
|
Binary file (12.3 kB). View file
|
|
|
scoring/relevance_scorer.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scoring/relevance_scorer.py - Job-Specific Resume Relevance Scoring
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
from typing import Dict, List, Tuple
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class RelevanceScore:
|
| 8 |
+
"""Structured relevance scoring result"""
|
| 9 |
+
overall_score: float # 0-100
|
| 10 |
+
skill_match_score: float
|
| 11 |
+
experience_match_score: float
|
| 12 |
+
education_match_score: float
|
| 13 |
+
|
| 14 |
+
matched_must_have: List[str]
|
| 15 |
+
matched_good_to_have: List[str]
|
| 16 |
+
missing_must_have: List[str]
|
| 17 |
+
missing_good_to_have: List[str]
|
| 18 |
+
|
| 19 |
+
experience_gap: str
|
| 20 |
+
education_gap: List[str]
|
| 21 |
+
|
| 22 |
+
fit_verdict: str # High/Medium/Low
|
| 23 |
+
confidence_score: float
|
| 24 |
+
|
| 25 |
+
improvement_suggestions: List[str]
|
| 26 |
+
quick_wins: List[str]
|
| 27 |
+
long_term_goals: List[str]
|
| 28 |
+
|
| 29 |
+
class JobRelevanceScorer:
|
| 30 |
+
"""Score resume relevance against specific job requirements"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
self.scoring_weights = {
|
| 34 |
+
'must_have_skills': 0.40, # 40% weight
|
| 35 |
+
'experience': 0.25, # 25% weight
|
| 36 |
+
'good_to_have_skills': 0.15, # 15% weight
|
| 37 |
+
'education': 0.20 # 20% weight
|
| 38 |
+
}
|
| 39 |
+
print("✅ Job Relevance Scorer initialized")
|
| 40 |
+
|
| 41 |
+
def calculate_relevance(self, resume_text: str, job_req) -> RelevanceScore:
|
| 42 |
+
"""Calculate comprehensive relevance score against job requirements"""
|
| 43 |
+
|
| 44 |
+
print(f"🎯 Scoring relevance for: {getattr(job_req, 'role_title', 'Unknown Role')}")
|
| 45 |
+
|
| 46 |
+
# Extract resume information
|
| 47 |
+
from parsers.smart_skill_extractor import SmartSkillExtractor
|
| 48 |
+
skill_extractor = SmartSkillExtractor()
|
| 49 |
+
resume_skills = skill_extractor.extract_skills_comprehensive(resume_text)
|
| 50 |
+
|
| 51 |
+
resume_experience = self._extract_experience_years(resume_text)
|
| 52 |
+
resume_education = self._extract_education_level(resume_text)
|
| 53 |
+
|
| 54 |
+
# Get job requirements
|
| 55 |
+
must_have_skills = getattr(job_req, 'must_have_skills', [])
|
| 56 |
+
good_to_have_skills = getattr(job_req, 'good_to_have_skills', [])
|
| 57 |
+
required_experience = getattr(job_req, 'experience_required', '')
|
| 58 |
+
required_education = getattr(job_req, 'education_required', [])
|
| 59 |
+
|
| 60 |
+
# Calculate component scores
|
| 61 |
+
skill_score, skill_matches = self._score_skills(
|
| 62 |
+
resume_skills, must_have_skills, good_to_have_skills
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
experience_score, exp_gap = self._score_experience(
|
| 66 |
+
resume_experience, required_experience
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
education_score, edu_gap = self._score_education(
|
| 70 |
+
resume_education, required_education
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Calculate weighted overall score
|
| 74 |
+
overall_score = (
|
| 75 |
+
skill_score * self.scoring_weights['must_have_skills'] +
|
| 76 |
+
experience_score * self.scoring_weights['experience'] +
|
| 77 |
+
education_score * self.scoring_weights['education']
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Add good-to-have bonus
|
| 81 |
+
good_to_have_bonus = len(skill_matches['matched_good_to_have']) * 2
|
| 82 |
+
overall_score = min(100, overall_score + good_to_have_bonus)
|
| 83 |
+
|
| 84 |
+
# Determine fit verdict
|
| 85 |
+
fit_verdict, confidence = self._determine_fit_verdict(
|
| 86 |
+
overall_score, skill_matches, experience_score
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
# Generate improvement suggestions
|
| 90 |
+
suggestions = self._generate_improvement_suggestions(
|
| 91 |
+
skill_matches, exp_gap, edu_gap, job_req
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
return RelevanceScore(
|
| 95 |
+
overall_score=round(overall_score, 1),
|
| 96 |
+
skill_match_score=round(skill_score, 1),
|
| 97 |
+
experience_match_score=round(experience_score, 1),
|
| 98 |
+
education_match_score=round(education_score, 1),
|
| 99 |
+
|
| 100 |
+
matched_must_have=skill_matches['matched_must_have'],
|
| 101 |
+
matched_good_to_have=skill_matches['matched_good_to_have'],
|
| 102 |
+
missing_must_have=skill_matches['missing_must_have'],
|
| 103 |
+
missing_good_to_have=skill_matches['missing_good_to_have'],
|
| 104 |
+
|
| 105 |
+
experience_gap=exp_gap,
|
| 106 |
+
education_gap=edu_gap,
|
| 107 |
+
|
| 108 |
+
fit_verdict=fit_verdict,
|
| 109 |
+
confidence_score=confidence,
|
| 110 |
+
|
| 111 |
+
improvement_suggestions=suggestions['main'],
|
| 112 |
+
quick_wins=suggestions['quick_wins'],
|
| 113 |
+
long_term_goals=suggestions['long_term']
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
def _score_skills(self, resume_skills: List[str], must_have: List[str],
|
| 117 |
+
good_to_have: List[str]) -> Tuple[float, Dict]:
|
| 118 |
+
"""Score skill matching against job requirements"""
|
| 119 |
+
|
| 120 |
+
resume_skills_lower = [skill.lower() for skill in resume_skills]
|
| 121 |
+
|
| 122 |
+
# Match must-have skills
|
| 123 |
+
matched_must_have = []
|
| 124 |
+
missing_must_have = []
|
| 125 |
+
|
| 126 |
+
for skill in must_have:
|
| 127 |
+
skill_lower = skill.lower()
|
| 128 |
+
if any(skill_lower in resume_skill for resume_skill in resume_skills_lower):
|
| 129 |
+
matched_must_have.append(skill)
|
| 130 |
+
else:
|
| 131 |
+
missing_must_have.append(skill)
|
| 132 |
+
|
| 133 |
+
# Match good-to-have skills
|
| 134 |
+
matched_good_to_have = []
|
| 135 |
+
missing_good_to_have = []
|
| 136 |
+
|
| 137 |
+
for skill in good_to_have:
|
| 138 |
+
skill_lower = skill.lower()
|
| 139 |
+
if any(skill_lower in resume_skill for resume_skill in resume_skills_lower):
|
| 140 |
+
matched_good_to_have.append(skill)
|
| 141 |
+
else:
|
| 142 |
+
missing_good_to_have.append(skill)
|
| 143 |
+
|
| 144 |
+
# Calculate skill score
|
| 145 |
+
if not must_have:
|
| 146 |
+
must_have_score = 100
|
| 147 |
+
else:
|
| 148 |
+
must_have_score = (len(matched_must_have) / len(must_have)) * 100
|
| 149 |
+
|
| 150 |
+
return must_have_score, {
|
| 151 |
+
'matched_must_have': matched_must_have,
|
| 152 |
+
'matched_good_to_have': matched_good_to_have,
|
| 153 |
+
'missing_must_have': missing_must_have,
|
| 154 |
+
'missing_good_to_have': missing_good_to_have
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
def _score_experience(self, resume_exp: int, required_exp: str) -> Tuple[float, str]:
|
| 158 |
+
"""Score experience matching"""
|
| 159 |
+
|
| 160 |
+
req_years = self._parse_experience_requirement(required_exp)
|
| 161 |
+
|
| 162 |
+
if req_years is None:
|
| 163 |
+
return 100, "Experience requirement not specified"
|
| 164 |
+
|
| 165 |
+
if resume_exp >= req_years:
|
| 166 |
+
if resume_exp <= req_years + 2:
|
| 167 |
+
score = 100
|
| 168 |
+
gap = f"Perfect match ({resume_exp} years vs {req_years} required)"
|
| 169 |
+
else:
|
| 170 |
+
score = 95
|
| 171 |
+
gap = f"Overqualified ({resume_exp} years vs {req_years} required)"
|
| 172 |
+
else:
|
| 173 |
+
gap_years = req_years - resume_exp
|
| 174 |
+
if gap_years == 1:
|
| 175 |
+
score = 75
|
| 176 |
+
gap = f"1 year short ({resume_exp} years vs {req_years} required)"
|
| 177 |
+
elif gap_years == 2:
|
| 178 |
+
score = 50
|
| 179 |
+
gap = f"2 years short ({resume_exp} years vs {req_years} required)"
|
| 180 |
+
else:
|
| 181 |
+
score = 25
|
| 182 |
+
gap = f"{gap_years} years short ({resume_exp} years vs {req_years} required)"
|
| 183 |
+
|
| 184 |
+
return score, gap
|
| 185 |
+
|
| 186 |
+
def _score_education(self, resume_edu: List[str], required_edu: List[str]) -> Tuple[float, List[str]]:
|
| 187 |
+
"""Score education matching"""
|
| 188 |
+
|
| 189 |
+
if not required_edu or "any graduate" in " ".join(required_edu).lower():
|
| 190 |
+
return 100, []
|
| 191 |
+
|
| 192 |
+
resume_edu_lower = [edu.lower() for edu in resume_edu]
|
| 193 |
+
|
| 194 |
+
matched = False
|
| 195 |
+
gaps = []
|
| 196 |
+
|
| 197 |
+
for req_edu in required_edu:
|
| 198 |
+
req_edu_lower = req_edu.lower()
|
| 199 |
+
found_match = False
|
| 200 |
+
for res_edu in resume_edu_lower:
|
| 201 |
+
if any(word in res_edu for word in req_edu_lower.split() if len(word) > 2):
|
| 202 |
+
matched = True
|
| 203 |
+
found_match = True
|
| 204 |
+
break
|
| 205 |
+
|
| 206 |
+
if not found_match:
|
| 207 |
+
gaps.append(req_edu)
|
| 208 |
+
|
| 209 |
+
score = 100 if matched and not gaps else (80 if matched else 30)
|
| 210 |
+
return score, gaps
|
| 211 |
+
|
| 212 |
+
def _extract_experience_years(self, resume_text: str) -> int:
|
| 213 |
+
"""Extract years of experience from resume"""
|
| 214 |
+
|
| 215 |
+
patterns = [
|
| 216 |
+
r'(\d+)[\+\s]*years?\s+(?:of\s+)?(?:experience|exp)',
|
| 217 |
+
r'(?:experience|exp)[\s:]*(\d+)[\+\s]*years?',
|
| 218 |
+
r'(\d+)[\+\s]*years?\s+(?:in|with)'
|
| 219 |
+
]
|
| 220 |
+
|
| 221 |
+
years = []
|
| 222 |
+
for pattern in patterns:
|
| 223 |
+
matches = re.findall(pattern, resume_text, re.IGNORECASE)
|
| 224 |
+
years.extend([int(match) for match in matches if match.isdigit()])
|
| 225 |
+
|
| 226 |
+
return max(years) if years else 0
|
| 227 |
+
|
| 228 |
+
def _extract_education_level(self, resume_text: str) -> List[str]:
|
| 229 |
+
"""Extract education from resume"""
|
| 230 |
+
|
| 231 |
+
patterns = [
|
| 232 |
+
r'bachelor[^.\n]*',
|
| 233 |
+
r'master[^.\n]*',
|
| 234 |
+
r'b\.?tech[^.\n]*',
|
| 235 |
+
r'm\.?tech[^.\n]*',
|
| 236 |
+
r'bca[^.\n]*',
|
| 237 |
+
r'mca[^.\n]*'
|
| 238 |
+
]
|
| 239 |
+
|
| 240 |
+
education = []
|
| 241 |
+
for pattern in patterns:
|
| 242 |
+
matches = re.findall(pattern, resume_text, re.IGNORECASE)
|
| 243 |
+
education.extend(matches)
|
| 244 |
+
|
| 245 |
+
return education
|
| 246 |
+
|
| 247 |
+
def _parse_experience_requirement(self, exp_req: str) -> int:
|
| 248 |
+
"""Parse experience requirement string to years"""
|
| 249 |
+
|
| 250 |
+
if not exp_req or exp_req.lower() == "not specified":
|
| 251 |
+
return None
|
| 252 |
+
|
| 253 |
+
numbers = re.findall(r'\d+', exp_req)
|
| 254 |
+
|
| 255 |
+
if not numbers:
|
| 256 |
+
return None
|
| 257 |
+
|
| 258 |
+
return int(numbers[0])
|
| 259 |
+
|
| 260 |
+
def _determine_fit_verdict(self, overall_score: float, skill_matches: Dict,
|
| 261 |
+
experience_score: float) -> Tuple[str, float]:
|
| 262 |
+
"""Determine fit verdict and confidence"""
|
| 263 |
+
|
| 264 |
+
must_have_count = len(skill_matches['matched_must_have']) + len(skill_matches['missing_must_have'])
|
| 265 |
+
must_have_ratio = len(skill_matches['matched_must_have']) / max(1, must_have_count)
|
| 266 |
+
|
| 267 |
+
confidence = min(100, (must_have_ratio * 50) + (experience_score * 0.3) + (overall_score * 0.2))
|
| 268 |
+
|
| 269 |
+
if overall_score >= 80 and must_have_ratio >= 0.8:
|
| 270 |
+
verdict = "High Suitability"
|
| 271 |
+
elif overall_score >= 60 and must_have_ratio >= 0.6:
|
| 272 |
+
verdict = "Medium Suitability"
|
| 273 |
+
elif overall_score >= 40:
|
| 274 |
+
verdict = "Low-Medium Suitability"
|
| 275 |
+
else:
|
| 276 |
+
verdict = "Low Suitability"
|
| 277 |
+
|
| 278 |
+
return verdict, round(confidence, 1)
|
| 279 |
+
|
| 280 |
+
def _generate_improvement_suggestions(self, skill_matches: Dict, exp_gap: str,
|
| 281 |
+
edu_gap: List[str], job_req) -> Dict[str, List[str]]:
|
| 282 |
+
"""Generate personalized improvement suggestions"""
|
| 283 |
+
|
| 284 |
+
main_suggestions = []
|
| 285 |
+
quick_wins = []
|
| 286 |
+
long_term_goals = []
|
| 287 |
+
|
| 288 |
+
# Skill suggestions
|
| 289 |
+
missing_must_have = skill_matches['missing_must_have']
|
| 290 |
+
if missing_must_have:
|
| 291 |
+
main_suggestions.append(f"Acquire critical skills: {', '.join(missing_must_have[:3])}")
|
| 292 |
+
quick_wins.append(f"Start learning: {', '.join(missing_must_have[:2])}")
|
| 293 |
+
|
| 294 |
+
# Experience suggestions
|
| 295 |
+
if "short" in exp_gap:
|
| 296 |
+
quick_wins.append("Gain experience through projects and internships")
|
| 297 |
+
|
| 298 |
+
# Education suggestions
|
| 299 |
+
if edu_gap:
|
| 300 |
+
long_term_goals.append("Consider relevant degree or certification")
|
| 301 |
+
|
| 302 |
+
return {
|
| 303 |
+
'main': main_suggestions[:5],
|
| 304 |
+
'quick_wins': quick_wins[:5],
|
| 305 |
+
'long_term': long_term_goals[:3]
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
def test_relevance_scorer():
|
| 309 |
+
"""Test the relevance scorer"""
|
| 310 |
+
print("✅ Relevance scorer test completed")
|
| 311 |
+
return True
|
| 312 |
+
|
| 313 |
+
if __name__ == "__main__":
|
| 314 |
+
test_relevance_scorer()
|