kushal2006 commited on
Commit
5aab0ac
·
verified ·
1 Parent(s): 3083de5

Upload 46 files

Browse files
Files changed (46) hide show
  1. config/skills.yaml +12 -0
  2. llm_analysis/__init__.py +0 -0
  3. llm_analysis/__pycache__/__init__.cpython-312.pyc +0 -0
  4. llm_analysis/__pycache__/langgraph_pipeline.cpython-312.pyc +0 -0
  5. llm_analysis/__pycache__/langsmith_logger.cpython-312.pyc +0 -0
  6. llm_analysis/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
  7. llm_analysis/__pycache__/prompt_templates.cpython-312.pyc +0 -0
  8. llm_analysis/langgraph_pipeline.py +225 -0
  9. llm_analysis/langsmith_logger.py +265 -0
  10. llm_analysis/llm_analyzer.py +170 -0
  11. llm_analysis/prompt_templates.py +83 -0
  12. logs/langsmith_metrics.jsonl +57 -0
  13. logs/langsmith_traces.jsonl +120 -0
  14. matchers/__init__.py +0 -0
  15. matchers/__pycache__/__init__.cpython-312.pyc +0 -0
  16. matchers/__pycache__/final_scorer.cpython-312.pyc +0 -0
  17. matchers/__pycache__/hard_matcher.cpython-312.pyc +0 -0
  18. matchers/__pycache__/semantic_matcher.cpython-312.pyc +0 -0
  19. matchers/entity_extractor.py +160 -0
  20. matchers/final_scorer.py +73 -0
  21. matchers/fuzzy_matcher.py +117 -0
  22. matchers/hard_matcher.py +47 -0
  23. matchers/semantic_matcher.py +37 -0
  24. parsers/__iniy__.py +0 -0
  25. parsers/__pycache__/cleaner.cpython-312.pyc +0 -0
  26. parsers/__pycache__/docx_parser.cpython-312.pyc +0 -0
  27. parsers/__pycache__/jd_parser.cpython-312.pyc +0 -0
  28. parsers/__pycache__/job_requirement_parser.cpython-312.pyc +0 -0
  29. parsers/__pycache__/pdf_parser.cpython-312.pyc +0 -0
  30. parsers/__pycache__/section_splitter.cpython-312.pyc +0 -0
  31. parsers/__pycache__/skill_extractor.cpython-312.pyc +0 -0
  32. parsers/__pycache__/skills_list.cpython-312.pyc +0 -0
  33. parsers/__pycache__/smart_skill_extractor.cpython-312.pyc +0 -0
  34. parsers/cleaner.py +7 -0
  35. parsers/docx_parser.py +5 -0
  36. parsers/entity_extractor.py +33 -0
  37. parsers/jd_parser.py +20 -0
  38. parsers/job_requirement_parser.py +449 -0
  39. parsers/pdf_parser.py +25 -0
  40. parsers/section_splitter.py +71 -0
  41. parsers/skill_extractor.py +64 -0
  42. parsers/skills_list.py +35 -0
  43. parsers/smart_skill_extractor.py +244 -0
  44. parsers/universal_parser.py +144 -0
  45. scoring/__pycache__/relevance_scorer.cpython-312.pyc +0 -0
  46. scoring/relevance_scorer.py +314 -0
config/skills.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ skills:
2
+ - python
3
+ - java
4
+ - c++
5
+ - sql
6
+ - aws
7
+ - docker
8
+ - kubernetes
9
+ - tensorflow
10
+ - pytorch
11
+ - react
12
+ - node.js
llm_analysis/__init__.py ADDED
File without changes
llm_analysis/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (154 Bytes). View file
 
llm_analysis/__pycache__/langgraph_pipeline.cpython-312.pyc ADDED
Binary file (9.35 kB). View file
 
llm_analysis/__pycache__/langsmith_logger.cpython-312.pyc ADDED
Binary file (11.6 kB). View file
 
llm_analysis/__pycache__/llm_analyzer.cpython-312.pyc ADDED
Binary file (8.66 kB). View file
 
llm_analysis/__pycache__/prompt_templates.cpython-312.pyc ADDED
Binary file (3.76 kB). View file
 
llm_analysis/langgraph_pipeline.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_analysis/langgraph_pipeline.py - Structured Analysis Pipeline
2
+ from langgraph.graph import StateGraph, END
3
+ from typing import Dict, List, TypedDict
4
+ import json
5
+ from llm_analysis.llm_analyzer import LLMResumeAnalyzer
6
+
7
+ class AnalysisState(TypedDict):
8
+ """State object for the analysis pipeline"""
9
+ resume_text: str
10
+ jd_text: str
11
+ basic_scores: Dict
12
+ enhanced_skills: Dict
13
+ llm_analysis: Dict
14
+ improvement_roadmap: Dict
15
+ final_result: Dict
16
+ current_step: str
17
+ errors: List[str]
18
+
19
+ class ResumeAnalysisPipeline:
20
+ """LangGraph-powered structured analysis pipeline"""
21
+
22
+ def __init__(self, model="x-ai/grok-4-fast:free"):
23
+ self.llm_analyzer = LLMResumeAnalyzer(model=model)
24
+ self.graph = self._create_pipeline()
25
+ print("✅ LangGraph pipeline initialized")
26
+
27
+ def _create_pipeline(self):
28
+ """Create the structured analysis pipeline"""
29
+
30
+ # Define the workflow graph
31
+ workflow = StateGraph(AnalysisState)
32
+
33
+ # Add nodes (analysis steps)
34
+ workflow.add_node("skills_extraction", self._extract_skills_node)
35
+ workflow.add_node("llm_analysis", self._llm_analysis_node)
36
+ workflow.add_node("roadmap_generation", self._roadmap_generation_node)
37
+ workflow.add_node("final_compilation", self._final_compilation_node)
38
+ workflow.add_node("error_handler", self._error_handler_node)
39
+
40
+ # Define the flow
41
+ workflow.set_entry_point("skills_extraction")
42
+
43
+ # Add edges (flow control)
44
+ workflow.add_edge("skills_extraction", "llm_analysis")
45
+ workflow.add_edge("llm_analysis", "roadmap_generation")
46
+ workflow.add_edge("roadmap_generation", "final_compilation")
47
+ workflow.add_edge("final_compilation", END)
48
+ workflow.add_edge("error_handler", END)
49
+
50
+ # Add conditional edges for error handling
51
+ workflow.add_conditional_edges(
52
+ "skills_extraction",
53
+ self._should_continue,
54
+ {
55
+ "continue": "llm_analysis",
56
+ "error": "error_handler"
57
+ }
58
+ )
59
+
60
+ workflow.add_conditional_edges(
61
+ "llm_analysis",
62
+ self._should_continue,
63
+ {
64
+ "continue": "roadmap_generation",
65
+ "error": "error_handler"
66
+ }
67
+ )
68
+
69
+ return workflow.compile()
70
+
71
+ def _should_continue(self, state: AnalysisState) -> str:
72
+ """Decide whether to continue or handle errors"""
73
+ if state.get("errors"):
74
+ return "error"
75
+ return "continue"
76
+
77
+ def _extract_skills_node(self, state: AnalysisState) -> AnalysisState:
78
+ """Node 1: Enhanced skills extraction"""
79
+ try:
80
+ state["current_step"] = "skills_extraction"
81
+ print("🔍 LangGraph: Extracting skills...")
82
+
83
+ # Enhanced skills extraction
84
+ enhanced_skills = self.llm_analyzer.enhance_skills_extraction(state["resume_text"])
85
+ state["enhanced_skills"] = enhanced_skills
86
+
87
+ print("✅ LangGraph: Skills extraction completed")
88
+ return state
89
+
90
+ except Exception as e:
91
+ state["errors"].append(f"Skills extraction failed: {str(e)}")
92
+ return state
93
+
94
+ def _llm_analysis_node(self, state: AnalysisState) -> AnalysisState:
95
+ """Node 2: LLM analysis"""
96
+ try:
97
+ state["current_step"] = "llm_analysis"
98
+ print("🧠 LangGraph: Running LLM analysis...")
99
+
100
+ # LLM analysis
101
+ llm_analysis = self.llm_analyzer.analyze_resume_vs_jd(
102
+ state["resume_text"],
103
+ state["jd_text"],
104
+ state["basic_scores"]
105
+ )
106
+ state["llm_analysis"] = llm_analysis
107
+
108
+ print("✅ LangGraph: LLM analysis completed")
109
+ return state
110
+
111
+ except Exception as e:
112
+ state["errors"].append(f"LLM analysis failed: {str(e)}")
113
+ return state
114
+
115
+ def _roadmap_generation_node(self, state: AnalysisState) -> AnalysisState:
116
+ """Node 3: Improvement roadmap generation"""
117
+ try:
118
+ state["current_step"] = "roadmap_generation"
119
+ print("🗺️ LangGraph: Generating improvement roadmap...")
120
+
121
+ # Generate roadmap
122
+ roadmap = self.llm_analyzer.generate_improvement_roadmap(state["llm_analysis"])
123
+ state["improvement_roadmap"] = roadmap
124
+
125
+ print("✅ LangGraph: Roadmap generation completed")
126
+ return state
127
+
128
+ except Exception as e:
129
+ state["errors"].append(f"Roadmap generation failed: {str(e)}")
130
+ return state
131
+
132
+ def _final_compilation_node(self, state: AnalysisState) -> AnalysisState:
133
+ """Node 4: Final result compilation"""
134
+ try:
135
+ state["current_step"] = "final_compilation"
136
+ print("📊 LangGraph: Compiling final results...")
137
+
138
+ # Compile final result
139
+ final_result = {
140
+ "basic_scores": state["basic_scores"],
141
+ "enhanced_skills": state["enhanced_skills"],
142
+ "llm_analysis": state["llm_analysis"],
143
+ "improvement_roadmap": state["improvement_roadmap"],
144
+ "pipeline_status": "completed",
145
+ "processing_steps": ["skills_extraction", "llm_analysis", "roadmap_generation", "compilation"]
146
+ }
147
+
148
+ state["final_result"] = final_result
149
+ print("✅ LangGraph: Pipeline completed successfully")
150
+ return state
151
+
152
+ except Exception as e:
153
+ state["errors"].append(f"Final compilation failed: {str(e)}")
154
+ return state
155
+
156
+ def _error_handler_node(self, state: AnalysisState) -> AnalysisState:
157
+ """Error handling node"""
158
+ print(f"❌ LangGraph: Handling errors - {len(state['errors'])} error(s)")
159
+
160
+ state["final_result"] = {
161
+ "pipeline_status": "failed",
162
+ "errors": state["errors"],
163
+ "last_successful_step": state.get("current_step", "unknown"),
164
+ "partial_results": {
165
+ "basic_scores": state.get("basic_scores", {}),
166
+ "enhanced_skills": state.get("enhanced_skills", {}),
167
+ "llm_analysis": state.get("llm_analysis", {}),
168
+ "improvement_roadmap": state.get("improvement_roadmap", {})
169
+ }
170
+ }
171
+ return state
172
+
173
+ def run_structured_analysis(self, resume_text: str, jd_text: str, basic_scores: Dict) -> Dict:
174
+ """Run the complete structured analysis pipeline"""
175
+ print("🚀 Starting LangGraph structured analysis pipeline...")
176
+
177
+ # Initialize state
178
+ initial_state = AnalysisState(
179
+ resume_text=resume_text,
180
+ jd_text=jd_text,
181
+ basic_scores=basic_scores,
182
+ enhanced_skills={},
183
+ llm_analysis={},
184
+ improvement_roadmap={},
185
+ final_result={},
186
+ current_step="initializing",
187
+ errors=[]
188
+ )
189
+
190
+ # Run the pipeline
191
+ try:
192
+ final_state = self.graph.invoke(initial_state)
193
+
194
+ print("✅ LangGraph pipeline execution completed")
195
+ return final_state["final_result"]
196
+
197
+ except Exception as e:
198
+ print(f"❌ LangGraph pipeline failed: {e}")
199
+ return {
200
+ "pipeline_status": "critical_failure",
201
+ "error": str(e),
202
+ "basic_scores": basic_scores
203
+ }
204
+
205
+ # Test function
206
+ def test_langgraph_pipeline():
207
+ """Test the LangGraph pipeline"""
208
+ pipeline = ResumeAnalysisPipeline()
209
+
210
+ sample_resume = "Python developer with React experience"
211
+ sample_jd = "Looking for Python developer with React skills"
212
+ sample_basic_scores = {
213
+ "score": 75,
214
+ "matched_skills": ["python", "react"],
215
+ "missing_skills": ["docker"],
216
+ "matched_count": 2,
217
+ "total_jd_skills": 3
218
+ }
219
+
220
+ result = pipeline.run_structured_analysis(sample_resume, sample_jd, sample_basic_scores)
221
+ print(f"✅ LangGraph test completed: {result.get('pipeline_status', 'unknown')}")
222
+ return result.get('pipeline_status') == 'completed'
223
+
224
+ if __name__ == "__main__":
225
+ test_langgraph_pipeline()
llm_analysis/langsmith_logger.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_analysis/langsmith_logger.py - LangSmith Observability & Debugging
2
+ import os
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Dict, Any, Optional
6
+ import uuid
7
+
8
+ # Note: LangSmith requires API key for full functionality
9
+ # For hackathon demo, we'll create a local logging system that mimics LangSmith
10
+
11
+ class LangSmithLogger:
12
+ """LangSmith-style logging and observability for LLM chains"""
13
+
14
+ def __init__(self, project_name="resume-relevance-system"):
15
+ self.project_name = project_name
16
+ self.session_id = str(uuid.uuid4())
17
+ self.logs_dir = "logs"
18
+ os.makedirs(self.logs_dir, exist_ok=True)
19
+
20
+ # Initialize log files
21
+ self.trace_log = f"{self.logs_dir}/langsmith_traces.jsonl"
22
+ self.metrics_log = f"{self.logs_dir}/langsmith_metrics.jsonl"
23
+
24
+ print(f"✅ LangSmith Logger initialized - Project: {project_name}")
25
+ print(f"📊 Session ID: {self.session_id}")
26
+
27
+ def start_trace(self, trace_name: str, inputs: Dict[str, Any]) -> str:
28
+ """Start a new trace for an LLM chain"""
29
+ trace_id = str(uuid.uuid4())
30
+
31
+ trace_start = {
32
+ "trace_id": trace_id,
33
+ "session_id": self.session_id,
34
+ "project_name": self.project_name,
35
+ "trace_name": trace_name,
36
+ "start_time": datetime.utcnow().isoformat(),
37
+ "inputs": inputs,
38
+ "status": "started",
39
+ "type": "trace_start"
40
+ }
41
+
42
+ self._log_event(trace_start, self.trace_log)
43
+ print(f"🔍 LangSmith: Started trace '{trace_name}' - ID: {trace_id[:8]}...")
44
+ return trace_id
45
+
46
+ def end_trace(self, trace_id: str, outputs: Dict[str, Any],
47
+ status: str = "success", error: Optional[str] = None,
48
+ token_usage: Optional[Dict] = None):
49
+ """End a trace with results"""
50
+
51
+ trace_end = {
52
+ "trace_id": trace_id,
53
+ "session_id": self.session_id,
54
+ "end_time": datetime.utcnow().isoformat(),
55
+ "outputs": outputs,
56
+ "status": status,
57
+ "error": error,
58
+ "token_usage": token_usage or {},
59
+ "type": "trace_end"
60
+ }
61
+
62
+ self._log_event(trace_end, self.trace_log)
63
+ status_emoji = "✅" if status == "success" else "❌"
64
+ print(f"{status_emoji} LangSmith: Ended trace {trace_id[:8]}... - Status: {status}")
65
+
66
+ def log_llm_call(self, trace_id: str, step_name: str,
67
+ prompt: str, response: str, model: str,
68
+ latency_ms: float, token_usage: Optional[Dict] = None):
69
+ """Log an individual LLM call within a trace"""
70
+
71
+ llm_call = {
72
+ "trace_id": trace_id,
73
+ "step_name": step_name,
74
+ "timestamp": datetime.utcnow().isoformat(),
75
+ "model": model,
76
+ "prompt": prompt[:500] + "..." if len(prompt) > 500 else prompt, # Truncate long prompts
77
+ "response": response[:500] + "..." if len(response) > 500 else response,
78
+ "latency_ms": latency_ms,
79
+ "token_usage": token_usage or {},
80
+ "type": "llm_call"
81
+ }
82
+
83
+ self._log_event(llm_call, self.trace_log)
84
+ print(f"🤖 LangSmith: LLM call logged - {step_name} ({latency_ms:.1f}ms)")
85
+
86
+ def log_metrics(self, metrics: Dict[str, Any]):
87
+ """Log performance metrics"""
88
+
89
+ metric_entry = {
90
+ "session_id": self.session_id,
91
+ "timestamp": datetime.utcnow().isoformat(),
92
+ "metrics": metrics,
93
+ "type": "metrics"
94
+ }
95
+
96
+ self._log_event(metric_entry, self.metrics_log)
97
+ print(f"📊 LangSmith: Metrics logged - {list(metrics.keys())}")
98
+
99
+ def log_evaluation(self, trace_id: str, evaluation_results: Dict[str, Any]):
100
+ """Log evaluation results for testing and debugging"""
101
+
102
+ evaluation = {
103
+ "trace_id": trace_id,
104
+ "timestamp": datetime.utcnow().isoformat(),
105
+ "evaluation_results": evaluation_results,
106
+ "type": "evaluation"
107
+ }
108
+
109
+ self._log_event(evaluation, self.trace_log)
110
+ print(f"🧪 LangSmith: Evaluation logged for trace {trace_id[:8]}...")
111
+
112
+ def _log_event(self, event: Dict[str, Any], log_file: str):
113
+ """Write event to log file"""
114
+ try:
115
+ with open(log_file, 'a', encoding='utf-8') as f:
116
+ f.write(json.dumps(event) + '\n')
117
+ except Exception as e:
118
+ print(f"⚠️ LangSmith: Failed to write log - {e}")
119
+
120
+ def get_session_summary(self) -> Dict[str, Any]:
121
+ """Get summary of current session"""
122
+ try:
123
+ traces = []
124
+ metrics = []
125
+
126
+ # Read trace logs
127
+ if os.path.exists(self.trace_log):
128
+ with open(self.trace_log, 'r', encoding='utf-8') as f:
129
+ for line in f:
130
+ if line.strip():
131
+ event = json.loads(line.strip())
132
+ if event.get("session_id") == self.session_id:
133
+ if event.get("type") == "trace_start":
134
+ traces.append(event)
135
+
136
+ # Read metrics logs
137
+ if os.path.exists(self.metrics_log):
138
+ with open(self.metrics_log, 'r', encoding='utf-8') as f:
139
+ for line in f:
140
+ if line.strip():
141
+ event = json.loads(line.strip())
142
+ if event.get("session_id") == self.session_id:
143
+ metrics.append(event)
144
+
145
+ return {
146
+ "session_id": self.session_id,
147
+ "project_name": self.project_name,
148
+ "total_traces": len(traces),
149
+ "total_metrics": len(metrics),
150
+ "traces": traces[-5:], # Last 5 traces
151
+ "metrics": metrics[-5:] # Last 5 metrics
152
+ }
153
+
154
+ except Exception as e:
155
+ print(f"⚠️ LangSmith: Failed to get session summary - {e}")
156
+ return {"error": str(e)}
157
+
158
+ def export_session_data(self, filename: Optional[str] = None) -> str:
159
+ """Export session data for analysis"""
160
+ if not filename:
161
+ filename = f"{self.logs_dir}/session_{self.session_id[:8]}_export.json"
162
+
163
+ summary = self.get_session_summary()
164
+
165
+ try:
166
+ with open(filename, 'w', encoding='utf-8') as f:
167
+ json.dump(summary, f, indent=2)
168
+
169
+ print(f"📁 LangSmith: Session data exported to {filename}")
170
+ return filename
171
+
172
+ except Exception as e:
173
+ print(f"❌ LangSmith: Export failed - {e}")
174
+ return ""
175
+
176
+ # Global logger instance
177
+ logger = LangSmithLogger()
178
+
179
+ def trace_llm_analysis(func):
180
+ """Decorator to trace LLM analysis functions"""
181
+ def wrapper(*args, **kwargs):
182
+ # Start trace
183
+ trace_id = logger.start_trace(
184
+ func.__name__,
185
+ {"args_count": len(args), "kwargs": list(kwargs.keys())}
186
+ )
187
+
188
+ start_time = datetime.utcnow()
189
+
190
+ try:
191
+ # Execute function
192
+ result = func(*args, **kwargs)
193
+
194
+ # Calculate metrics
195
+ end_time = datetime.utcnow()
196
+ latency = (end_time - start_time).total_seconds() * 1000
197
+
198
+ # End trace
199
+ logger.end_trace(
200
+ trace_id,
201
+ {"result_type": type(result).__name__},
202
+ "success"
203
+ )
204
+
205
+ # Log metrics
206
+ logger.log_metrics({
207
+ "function": func.__name__,
208
+ "latency_ms": latency,
209
+ "success": True
210
+ })
211
+
212
+ return result
213
+
214
+ except Exception as e:
215
+ # Log error
216
+ logger.end_trace(
217
+ trace_id,
218
+ {},
219
+ "error",
220
+ str(e)
221
+ )
222
+
223
+ logger.log_metrics({
224
+ "function": func.__name__,
225
+ "success": False,
226
+ "error": str(e)
227
+ })
228
+
229
+ raise e
230
+
231
+ return wrapper
232
+
233
+ # Test function
234
+ def test_langsmith_logging():
235
+ """Test LangSmith logging functionality"""
236
+
237
+ # Test trace
238
+ trace_id = logger.start_trace("test_analysis", {"test": True})
239
+
240
+ logger.log_llm_call(
241
+ trace_id,
242
+ "test_llm_call",
243
+ "Test prompt",
244
+ "Test response",
245
+ "grok-4-fast",
246
+ 150.5,
247
+ {"tokens": 100}
248
+ )
249
+
250
+ logger.end_trace(trace_id, {"test_result": "success"}, "success")
251
+
252
+ # Test metrics
253
+ logger.log_metrics({
254
+ "test_metric": 95.5,
255
+ "accuracy": 0.85
256
+ })
257
+
258
+ # Get summary
259
+ summary = logger.get_session_summary()
260
+ print(f"✅ LangSmith test completed - {summary['total_traces']} traces logged")
261
+
262
+ return summary['total_traces'] > 0
263
+
264
+ if __name__ == "__main__":
265
+ test_langsmith_logging()
llm_analysis/llm_analyzer.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_analysis/llm_analyzer.py
2
+ import os
3
+ import json
4
+ from dotenv import load_dotenv
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from llm_analysis.prompt_templates import (
8
+ RESUME_ANALYSIS_PROMPT,
9
+ IMPROVEMENT_ROADMAP_PROMPT,
10
+ SKILLS_ENHANCEMENT_PROMPT
11
+ )
12
+
13
+ load_dotenv()
14
+
15
+ class LLMResumeAnalyzer:
16
+ def __init__(self, model=None):
17
+ api_key = os.getenv("OPENAI_API_KEY")
18
+ if not api_key:
19
+ raise ValueError("❌ OPENAI_API_KEY not found in .env file")
20
+
21
+ # Use the provided model, or fall back to environment variable/default
22
+ llm_model = model or os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
23
+
24
+ self.llm = ChatOpenAI(
25
+ model=llm_model,
26
+ temperature=0.2, # Low for consistency
27
+ api_key=api_key
28
+ )
29
+
30
+ print(f"✅ LLM Analyzer initialized successfully with model: {llm_model}")
31
+
32
+ def analyze_resume_vs_jd(self, resume_text, jd_text, keyword_match_data):
33
+ """Comprehensive LLM-powered resume analysis"""
34
+ print("🤖 Running LLM analysis...")
35
+
36
+ try:
37
+ prompt = ChatPromptTemplate.from_messages([
38
+ ("system", "You are an expert HR recruiter and technical hiring manager."),
39
+ ("human", RESUME_ANALYSIS_PROMPT)
40
+ ])
41
+
42
+ chain = prompt | self.llm
43
+
44
+ response = chain.invoke({
45
+ "resume_text": resume_text[:3000], # Truncate to avoid token limits
46
+ "jd_text": jd_text[:2000],
47
+ "matched_count": keyword_match_data.get("matched_count", 0),
48
+ "total_skills": keyword_match_data.get("total_jd_skills", 0),
49
+ "matched_skills": ", ".join(keyword_match_data.get("matched_skills", [])),
50
+ "missing_skills": ", ".join(keyword_match_data.get("missing_skills", [])),
51
+ "coverage_percentage": keyword_match_data.get("score", 0)
52
+ })
53
+
54
+ # Parse JSON response
55
+ analysis = json.loads(response.content)
56
+ print("✅ LLM analysis completed successfully")
57
+ return analysis
58
+
59
+ except json.JSONDecodeError as e:
60
+ print(f"⚠️ JSON parsing error: {e}")
61
+ return self._create_fallback_analysis(keyword_match_data)
62
+ except Exception as e:
63
+ print(f"❌ LLM analysis error: {e}")
64
+ return self._create_fallback_analysis(keyword_match_data)
65
+
66
+ def generate_improvement_roadmap(self, analysis_results):
67
+ """Generate detailed improvement roadmap"""
68
+ print("🗺️ Generating improvement roadmap...")
69
+
70
+ try:
71
+ prompt = ChatPromptTemplate.from_messages([
72
+ ("system", "You are a career coach specializing in tech careers."),
73
+ ("human", IMPROVEMENT_ROADMAP_PROMPT)
74
+ ])
75
+
76
+ chain = prompt | self.llm
77
+
78
+ response = chain.invoke({
79
+ "analysis_results": json.dumps(analysis_results, indent=2)
80
+ })
81
+
82
+ roadmap = json.loads(response.content)
83
+ print("✅ Improvement roadmap generated successfully")
84
+ return roadmap
85
+
86
+ except Exception as e:
87
+ print(f"❌ Roadmap generation error: {e}")
88
+ return self._create_fallback_roadmap()
89
+
90
+ def enhance_skills_extraction(self, text):
91
+ """Use LLM to extract and categorize skills more intelligently"""
92
+ print("🧠 Enhancing skills extraction with LLM...")
93
+
94
+ try:
95
+ prompt = ChatPromptTemplate.from_messages([
96
+ ("system", "You are a technical skills extraction specialist."),
97
+ ("human", SKILLS_ENHANCEMENT_PROMPT)
98
+ ])
99
+
100
+ chain = prompt | self.llm
101
+
102
+ response = chain.invoke({
103
+ "text": text[:2000] # Truncate to avoid token limits
104
+ })
105
+
106
+ skills_data = json.loads(response.content)
107
+ print("✅ Skills enhancement completed")
108
+ return skills_data
109
+
110
+ except Exception as e:
111
+ print(f"❌ Skills enhancement error: {e}")
112
+ return {"all_technical_skills": [], "error": str(e)}
113
+
114
+ def _create_fallback_analysis(self, keyword_data):
115
+ """Fallback analysis when LLM fails"""
116
+ return {
117
+ "overall_fit_score": max(1, int(keyword_data.get("score", 0) / 10)),
118
+ "experience_alignment": "Unable to assess - manual review needed",
119
+ "key_strengths": ["Technical skills present in resume"],
120
+ "critical_gaps": keyword_data.get("missing_skills", [])[:3],
121
+ "role_suitability": "Medium - based on keyword match only",
122
+ "improvement_suggestions": ["Add missing technical skills", "Improve resume formatting"],
123
+ "recommended_skills_to_learn": keyword_data.get("missing_skills", [])[:3],
124
+ "project_recommendations": ["Build projects showcasing missing skills"],
125
+ "certification_suggestions": ["Relevant industry certifications"],
126
+ "interview_readiness": "Moderate preparation needed",
127
+ "salary_expectations": "Market standard for skill level",
128
+ "final_verdict": "Automated analysis only - requires manual review"
129
+ }
130
+
131
+ def _create_fallback_roadmap(self):
132
+ """Fallback roadmap when LLM fails"""
133
+ return {
134
+ "immediate_actions": ["Update resume with missing skills", "Clean up resume formatting"],
135
+ "week_1_plan": ["Research missing skills", "Start online tutorials"],
136
+ "month_1_plan": ["Complete beginner courses", "Build first project"],
137
+ "month_3_plan": ["Build portfolio", "Apply for relevant positions"],
138
+ "priority_skills": ["As identified in job description"],
139
+ "learning_resources": {
140
+ "free_courses": ["freeCodeCamp", "Coursera free courses"],
141
+ "paid_courses": ["Udemy", "Pluralsight"],
142
+ "books": ["Technical books for identified skills"],
143
+ "practice_platforms": ["LeetCode", "HackerRank"]
144
+ },
145
+ "portfolio_improvements": ["Build 2-3 projects showcasing skills"],
146
+ "networking_suggestions": ["Join LinkedIn groups", "Attend tech meetups"],
147
+ "quick_wins": ["Update LinkedIn profile", "Get recommendations"],
148
+ "estimated_timeline": "3-6 months for significant improvement"
149
+ }
150
+
151
+ # Test LLM connectivity
152
+ def test_llm_connection():
153
+ """Test if LLM is working"""
154
+ try:
155
+ analyzer = LLMResumeAnalyzer()
156
+ print("🧪 Testing LLM connection...")
157
+
158
+ # Simple test
159
+ result = analyzer.llm.invoke("Say 'Hello, LLM is working!' in JSON format: {\"status\": \"working\", \"message\": \"Hello, LLM is working!\"}")
160
+ test_response = json.loads(result.content)
161
+
162
+ print(f"✅ LLM Test Result: {test_response}")
163
+ return True
164
+
165
+ except Exception as e:
166
+ print(f"❌ LLM Test Failed: {e}")
167
+ return False
168
+
169
+ if __name__ == "__main__":
170
+ test_llm_connection()
llm_analysis/prompt_templates.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_analysis/prompt_templates.py
2
+
3
+ RESUME_ANALYSIS_PROMPT = """You are an expert HR recruiter analyzing resumes against job descriptions.
4
+
5
+ RESUME:
6
+ {resume_text}
7
+
8
+ JOB DESCRIPTION:
9
+ {jd_text}
10
+
11
+ KEYWORD MATCH ANALYSIS:
12
+ - Matched Skills ({matched_count}/{total_skills}): {matched_skills}
13
+ - Missing Skills: {missing_skills}
14
+ - Coverage: {coverage_percentage}%
15
+
16
+ Please provide a comprehensive analysis in JSON format:
17
+ {{
18
+ "overall_fit_score": <0-10 integer>,
19
+ "experience_alignment": "<brief assessment of experience match>",
20
+ "key_strengths": ["<strength1>", "<strength2>", "<strength3>"],
21
+ "critical_gaps": ["<gap1>", "<gap2>", "<gap3>"],
22
+ "role_suitability": "<High/Medium/Low with reasoning>",
23
+ "improvement_suggestions": ["<actionable suggestion1>", "<actionable suggestion2>"],
24
+ "recommended_skills_to_learn": ["<skill1>", "<skill2>", "<skill3>"],
25
+ "project_recommendations": ["<project idea1>", "<project idea2>"],
26
+ "certification_suggestions": ["<cert1>", "<cert2>"],
27
+ "interview_readiness": "<assessment of interview preparation needed>",
28
+ "salary_expectations": "<realistic salary range assessment>",
29
+ "final_verdict": "<detailed reasoning for recommendation>"
30
+ }}
31
+
32
+ Focus on being practical, specific, and actionable in your recommendations."""
33
+
34
+ IMPROVEMENT_ROADMAP_PROMPT = """Based on this resume analysis, create a detailed improvement roadmap for the candidate.
35
+
36
+ ANALYSIS RESULTS:
37
+ {analysis_results}
38
+
39
+ Create a structured improvement plan in JSON format:
40
+ {{
41
+ "immediate_actions": ["<action that can be done today>", "<another immediate action>"],
42
+ "week_1_plan": ["<specific task for week 1>", "<another week 1 task>"],
43
+ "month_1_plan": ["<month 1 goal>", "<another month 1 goal>"],
44
+ "month_3_plan": ["<3 month goal>", "<another 3 month goal>"],
45
+ "priority_skills": ["<highest priority skill>", "<second priority>", "<third priority>"],
46
+ "learning_resources": {{
47
+ "free_courses": ["<course recommendation>", "<another course>"],
48
+ "paid_courses": ["<premium course>", "<another premium course>"],
49
+ "books": ["<book recommendation>", "<another book>"],
50
+ "practice_platforms": ["<platform>", "<another platform>"]
51
+ }},
52
+ "portfolio_improvements": ["<specific project to build>", "<another project>"],
53
+ "networking_suggestions": ["<networking advice>", "<another networking tip>"],
54
+ "quick_wins": ["<easy improvement>", "<another quick win>"],
55
+ "estimated_timeline": "<realistic timeline to become job-ready>"
56
+ }}
57
+
58
+ Be specific with course names, book titles, and platform recommendations."""
59
+
60
+ SKILLS_ENHANCEMENT_PROMPT = """Analyze the following text and extract ALL technical skills, then categorize and enhance the skills list.
61
+
62
+ TEXT TO ANALYZE:
63
+ {text}
64
+
65
+ Extract and categorize skills comprehensively in JSON format:
66
+ {{
67
+ "programming_languages": ["<language1>", "<language2>"],
68
+ "web_frameworks": ["<framework1>", "<framework2>"],
69
+ "databases": ["<db1>", "<db2>"],
70
+ "cloud_platforms": ["<platform1>", "<platform2>"],
71
+ "devops_tools": ["<tool1>", "<tool2>"],
72
+ "testing_tools": ["<tool1>", "<tool2>"],
73
+ "development_tools": ["<tool1>", "<tool2>"],
74
+ "soft_skills": ["<skill1>", "<skill2>"],
75
+ "methodologies": ["<methodology1>", "<methodology2>"],
76
+ "all_technical_skills": ["<comprehensive list of all technical skills found>"],
77
+ "skill_proficiency_estimate": {{
78
+ "<skill>": "<Beginner/Intermediate/Advanced based on context>",
79
+ "<another_skill>": "<proficiency_level>"
80
+ }}
81
+ }}
82
+
83
+ Be thorough and include variations (e.g., JS and JavaScript, k8s and Kubernetes)."""
logs/langsmith_metrics.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "timestamp": "2025-09-20T10:28:09.968967", "metrics": {"analysis_success": true, "resume_length": 2238, "jd_length": 3149, "skills_found": 19, "pipeline_status": "completed", "enhanced_scoring": false}, "type": "metrics"}
2
+ {"session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "timestamp": "2025-09-20T10:28:09.971970", "metrics": {"function": "complete_ai_analysis", "latency_ms": 3343.7670000000003, "success": true}, "type": "metrics"}
3
+ {"session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "timestamp": "2025-09-20T10:32:17.648118", "metrics": {"analysis_success": true, "resume_length": 2238, "jd_length": 3149, "skills_found": 19, "pipeline_status": "completed", "enhanced_scoring": false}, "type": "metrics"}
4
+ {"session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "timestamp": "2025-09-20T10:32:17.654122", "metrics": {"function": "complete_ai_analysis", "latency_ms": 1884.951, "success": true}, "type": "metrics"}
5
+ {"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:21:59.326340", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
6
+ {"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:21:59.332369", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3593.709, "success": true}, "type": "metrics"}
7
+ {"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:27:16.659867", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
8
+ {"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:27:16.662861", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1167.566, "success": true}, "type": "metrics"}
9
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T15:38:04.483122", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
10
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T15:38:04.489226", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3661.242, "success": true}, "type": "metrics"}
11
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:02:44.109780", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
12
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:02:44.111775", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 929.667, "success": true}, "type": "metrics"}
13
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:09:57.021715", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
14
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:09:57.026900", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 912.856, "success": true}, "type": "metrics"}
15
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:11:55.042808", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
16
+ {"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:11:55.047901", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1488.064, "success": true}, "type": "metrics"}
17
+ {"session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "timestamp": "2025-09-20T16:19:28.468185", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
18
+ {"session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "timestamp": "2025-09-20T16:19:28.473178", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3668.7690000000002, "success": true}, "type": "metrics"}
19
+ {"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:22:17.448927", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
20
+ {"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:22:17.453922", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3874.583, "success": true}, "type": "metrics"}
21
+ {"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:26:09.359080", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
22
+ {"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:26:09.366978", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1227.908, "success": true}, "type": "metrics"}
23
+ {"session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "timestamp": "2025-09-20T16:28:36.803003", "metrics": {"api_success": true, "final_score": 33.752, "pipeline_used": true}, "type": "metrics"}
24
+ {"session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "timestamp": "2025-09-20T16:28:36.808437", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3730.636, "success": true}, "type": "metrics"}
25
+ {"session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "timestamp": "2025-09-20T16:32:52.095638", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
26
+ {"session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "timestamp": "2025-09-20T16:32:52.098635", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3076.762, "success": true}, "type": "metrics"}
27
+ {"session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "timestamp": "2025-09-21T01:22:16.389240", "metrics": {"api_success": true, "final_score": 32.5, "pipeline_used": true}, "type": "metrics"}
28
+ {"session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "timestamp": "2025-09-21T01:22:16.394244", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2296.625, "success": true}, "type": "metrics"}
29
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:24:37.998103", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
30
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:24:38.001115", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1919.5310000000002, "success": true}, "type": "metrics"}
31
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:16.385405", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
32
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:16.388509", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1454.168, "success": true}, "type": "metrics"}
33
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:51.527938", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 12.616, "success": true}, "type": "metrics"}
34
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:28:26.866106", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 133.30700000000002, "success": true}, "type": "metrics"}
35
+ {"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:29:02.073814", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 19.596, "success": true}, "type": "metrics"}
36
+ {"session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "timestamp": "2025-09-21T01:40:09.312013", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
37
+ {"session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "timestamp": "2025-09-21T01:40:09.314913", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2356.451, "success": true}, "type": "metrics"}
38
+ {"session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "timestamp": "2025-09-21T01:55:07.375404", "metrics": {"api_success": true, "final_score": 32.5, "pipeline_used": true}, "type": "metrics"}
39
+ {"session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "timestamp": "2025-09-21T01:55:07.378410", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2503.252, "success": true}, "type": "metrics"}
40
+ {"session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "timestamp": "2025-09-21T02:19:01.012120", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
41
+ {"session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "timestamp": "2025-09-21T02:19:01.016125", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2274.592, "success": true}, "type": "metrics"}
42
+ {"session_id": "acba97a8-88e4-428b-9390-783700f0235f", "timestamp": "2025-09-21T03:13:19.055138", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
43
+ {"session_id": "acba97a8-88e4-428b-9390-783700f0235f", "timestamp": "2025-09-21T03:13:19.059234", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2120.11, "success": true}, "type": "metrics"}
44
+ {"session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "timestamp": "2025-09-21T03:30:37.071608", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
45
+ {"session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "timestamp": "2025-09-21T03:30:37.075742", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2324.2560000000003, "success": true}, "type": "metrics"}
46
+ {"session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "timestamp": "2025-09-21T03:32:54.777227", "metrics": {"api_success": true, "final_score": 40.0, "pipeline_used": true}, "type": "metrics"}
47
+ {"session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "timestamp": "2025-09-21T03:32:54.786216", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2556.451, "success": true}, "type": "metrics"}
48
+ {"session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "timestamp": "2025-09-21T03:39:27.003374", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
49
+ {"session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "timestamp": "2025-09-21T03:39:27.014265", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 4611.081, "success": true}, "type": "metrics"}
50
+ {"session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "timestamp": "2025-09-21T03:44:56.950150", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
51
+ {"session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "timestamp": "2025-09-21T03:44:56.954924", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3159.484, "success": true}, "type": "metrics"}
52
+ {"session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "timestamp": "2025-09-21T03:51:12.817358", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
53
+ {"session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "timestamp": "2025-09-21T03:51:12.821360", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2815.247, "success": true}, "type": "metrics"}
54
+ {"session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "timestamp": "2025-09-21T03:55:12.022321", "metrics": {"api_success": true, "final_score": 50.0, "pipeline_used": true}, "type": "metrics"}
55
+ {"session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "timestamp": "2025-09-21T03:55:12.025331", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3375.04, "success": true}, "type": "metrics"}
56
+ {"session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "timestamp": "2025-09-21T04:07:46.220611", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
57
+ {"session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "timestamp": "2025-09-21T04:07:46.225181", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 26514.756999999998, "success": true}, "type": "metrics"}
logs/langsmith_traces.jsonl ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"trace_id": "a9220664-0b26-4e3f-b34d-14939e97460a", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis", "start_time": "2025-09-20T10:28:06.626205", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
2
+ {"trace_id": "9194d1ec-9db4-45ec-a0c4-3ebef618c208", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "project_name": "resume-relevance-system", "trace_name": "complete_resume_analysis", "start_time": "2025-09-20T10:28:06.628203", "inputs": {"resume_file": "input/sample_resume.pdf", "jd_file": "input/sample_jd.pdf"}, "status": "started", "type": "trace_start"}
3
+ {"trace_id": "9194d1ec-9db4-45ec-a0c4-3ebef618c208", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "end_time": "2025-09-20T10:28:09.969969", "outputs": {"pipeline_status": "completed", "final_score": 5}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
4
+ {"trace_id": "a9220664-0b26-4e3f-b34d-14939e97460a", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "end_time": "2025-09-20T10:28:09.971970", "outputs": {"result_type": "NoneType"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
5
+ {"trace_id": "78931f3c-8c2f-4cf0-88a3-75ffd391e133", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis", "start_time": "2025-09-20T10:32:15.765176", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
6
+ {"trace_id": "7b53b614-d774-41fc-86fe-155ad7326413", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "project_name": "resume-relevance-system", "trace_name": "complete_resume_analysis", "start_time": "2025-09-20T10:32:15.766169", "inputs": {"resume_file": "input/sample_resume.pdf", "jd_file": "input/sample_jd.pdf"}, "status": "started", "type": "trace_start"}
7
+ {"trace_id": "7b53b614-d774-41fc-86fe-155ad7326413", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "end_time": "2025-09-20T10:32:17.649124", "outputs": {"pipeline_status": "completed", "final_score": 5}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
8
+ {"trace_id": "78931f3c-8c2f-4cf0-88a3-75ffd391e133", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "end_time": "2025-09-20T10:32:17.651120", "outputs": {"result_type": "NoneType"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
9
+ {"trace_id": "49f5c4ab-d238-4faa-a175-0adf0f3920df", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T11:21:55.733567", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
10
+ {"trace_id": "5393826b-554b-421b-8790-1218ad6016dc", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T11:21:55.736654", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmplvkk7m14.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6rbiooty.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
11
+ {"trace_id": "5393826b-554b-421b-8790-1218ad6016dc", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:21:59.323345", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
12
+ {"trace_id": "49f5c4ab-d238-4faa-a175-0adf0f3920df", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:21:59.330363", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
13
+ {"trace_id": "e302ca41-6236-4f05-97cd-d17a0ad75b37", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T11:27:15.492297", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
14
+ {"trace_id": "378fa23b-0d68-42ab-b430-bda462fe4e14", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T11:27:15.493296", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp0424aaqj.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpnalp728z.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
15
+ {"trace_id": "378fa23b-0d68-42ab-b430-bda462fe4e14", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:27:16.654803", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
16
+ {"trace_id": "e302ca41-6236-4f05-97cd-d17a0ad75b37", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:27:16.660862", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
17
+ {"trace_id": "a25e723f-9809-42da-b369-8d60667993bf", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T15:38:00.822976", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
18
+ {"trace_id": "1634d9e3-2e84-40a8-9873-3738f7facb35", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T15:38:00.825975", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6ikrxzk6.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpjkujpprp.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
19
+ {"trace_id": "1634d9e3-2e84-40a8-9873-3738f7facb35", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T15:38:04.478225", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
20
+ {"trace_id": "a25e723f-9809-42da-b369-8d60667993bf", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T15:38:04.487217", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
21
+ {"trace_id": "09528e66-9a21-4ea6-8297-deb8cf96bcc4", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:02:43.180112", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
22
+ {"trace_id": "31c100c7-ea89-4df2-85cf-bcc2b1cc1397", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:02:43.181109", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp02rmxegc.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpoy2ydj6o.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
23
+ {"trace_id": "31c100c7-ea89-4df2-85cf-bcc2b1cc1397", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:02:44.108699", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
24
+ {"trace_id": "09528e66-9a21-4ea6-8297-deb8cf96bcc4", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:02:44.110776", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
25
+ {"trace_id": "6a185789-6d47-44e4-8cca-e0dedd2b22d7", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:09:56.110011", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
26
+ {"trace_id": "7e1ab0e9-03b8-4099-9dd9-5d92f68f4798", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:09:56.113023", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp8d87hwhf.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpa77tf5fb.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
27
+ {"trace_id": "7e1ab0e9-03b8-4099-9dd9-5d92f68f4798", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:09:57.017743", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
28
+ {"trace_id": "6a185789-6d47-44e4-8cca-e0dedd2b22d7", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:09:57.024881", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
29
+ {"trace_id": "6f031d08-1458-4330-84c1-550cdf7d2cc0", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:11:53.555834", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
30
+ {"trace_id": "d0a35be8-5789-4346-94d1-9e8dd8b48b6e", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:11:53.557800", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpyzkfzvg1.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6qku_qze.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
31
+ {"trace_id": "d0a35be8-5789-4346-94d1-9e8dd8b48b6e", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:11:55.040910", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
32
+ {"trace_id": "6f031d08-1458-4330-84c1-550cdf7d2cc0", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:11:55.045864", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
33
+ {"trace_id": "d0ea9e90-42ba-4e34-a8e4-fa21eaf1b931", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:19:24.799410", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
34
+ {"trace_id": "870c9f17-e25a-4f3b-abf1-5e85a949b313", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:19:24.801413", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpytdykoki.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpmmugnx60.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
35
+ {"trace_id": "870c9f17-e25a-4f3b-abf1-5e85a949b313", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "end_time": "2025-09-20T16:19:28.465273", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
36
+ {"trace_id": "d0ea9e90-42ba-4e34-a8e4-fa21eaf1b931", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "end_time": "2025-09-20T16:19:28.470182", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
37
+ {"trace_id": "10094869-bf87-4b42-adef-76bba3e94cfd", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:22:13.573970", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
38
+ {"trace_id": "02739c4d-646b-4a07-aba7-e67815a8aa98", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:22:13.577347", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpztf2hnqs.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp8znkrcrb.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
39
+ {"trace_id": "02739c4d-646b-4a07-aba7-e67815a8aa98", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:22:17.445918", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
40
+ {"trace_id": "10094869-bf87-4b42-adef-76bba3e94cfd", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:22:17.451930", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
41
+ {"trace_id": "b4b1e1c1-ce7b-4f41-a78a-6efa65f3df7b", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:26:08.130125", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
42
+ {"trace_id": "e0fba981-e2b7-4892-a9a0-155cf402bcad", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:26:08.133123", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpvtdar3no.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmptw1nuu6d.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
43
+ {"trace_id": "e0fba981-e2b7-4892-a9a0-155cf402bcad", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:26:09.355068", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
44
+ {"trace_id": "b4b1e1c1-ce7b-4f41-a78a-6efa65f3df7b", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:26:09.361031", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
45
+ {"trace_id": "c4c0578d-0611-4b90-b757-578779274efe", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:28:33.069844", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
46
+ {"trace_id": "65c0aa4f-4494-43a4-8934-70169b717582", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:28:33.074357", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp89dd34s7.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp116yeyi3.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
47
+ {"trace_id": "65c0aa4f-4494-43a4-8934-70169b717582", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "end_time": "2025-09-20T16:28:36.800094", "outputs": {"final_score": 33.752, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
48
+ {"trace_id": "c4c0578d-0611-4b90-b757-578779274efe", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "end_time": "2025-09-20T16:28:36.804993", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
49
+ {"trace_id": "33f9fce0-9c3e-492a-9524-51c34a5db94d", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:32:49.017877", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
50
+ {"trace_id": "6e392f12-a809-4478-af07-9dbf8c47e537", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:32:49.019874", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpioc13nbs.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpapjq22q7.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
51
+ {"trace_id": "6e392f12-a809-4478-af07-9dbf8c47e537", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "end_time": "2025-09-20T16:32:52.093645", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
52
+ {"trace_id": "33f9fce0-9c3e-492a-9524-51c34a5db94d", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "end_time": "2025-09-20T16:32:52.096636", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
53
+ {"trace_id": "d823ef9f-2df5-4806-9afa-4fec03cc59de", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:22:14.093539", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
54
+ {"trace_id": "61dcda6f-7572-4e56-b810-9aa8c09db991", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:22:14.094535", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpk34qnxoz.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpinedpti6.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
55
+ {"trace_id": "61dcda6f-7572-4e56-b810-9aa8c09db991", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "end_time": "2025-09-21T01:22:16.388242", "outputs": {"final_score": 32.5, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
56
+ {"trace_id": "d823ef9f-2df5-4806-9afa-4fec03cc59de", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "end_time": "2025-09-21T01:22:16.391160", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
57
+ {"trace_id": "24695c1d-aad7-4e83-b253-a52949c1ff1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:24:36.076482", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
58
+ {"trace_id": "8142abf7-8623-4bd8-a0ec-b035eeea49de", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:24:36.080584", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpmq0nsda_.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpfy6mq83d.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
59
+ {"trace_id": "8142abf7-8623-4bd8-a0ec-b035eeea49de", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:24:37.994412", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
60
+ {"trace_id": "24695c1d-aad7-4e83-b253-a52949c1ff1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:24:38.000115", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
61
+ {"trace_id": "4bfd280e-c0f4-4e29-91ca-1cd28ce98c0e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:27:14.928024", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
62
+ {"trace_id": "f5e0624d-5361-4d2e-a2c2-70a37f6d7859", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:27:14.932244", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpd8i2w90l.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpkbs8mbvf.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
63
+ {"trace_id": "f5e0624d-5361-4d2e-a2c2-70a37f6d7859", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:16.383406", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
64
+ {"trace_id": "4bfd280e-c0f4-4e29-91ca-1cd28ce98c0e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:16.386412", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
65
+ {"trace_id": "936e4d74-a342-40d2-a9ec-ed7d697bdb91", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:27:51.511294", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
66
+ {"trace_id": "e2c67c0c-fb9d-428e-9b21-44d61304fd1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:27:51.513311", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp16rci5h5.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp7dwr_wtp.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
67
+ {"trace_id": "e2c67c0c-fb9d-428e-9b21-44d61304fd1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:51.522926", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmp16rci5h5.pdf'.", "token_usage": {}, "type": "trace_end"}
68
+ {"trace_id": "936e4d74-a342-40d2-a9ec-ed7d697bdb91", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:51.524923", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
69
+ {"trace_id": "0e5ac59b-bedd-4fc8-b46d-f8acb4a418ee", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:28:26.729537", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
70
+ {"trace_id": "5a35f592-d45c-4576-b550-14ee205df0da", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:28:26.730536", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp59t8_l1r.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp2_n11hm5.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
71
+ {"trace_id": "5a35f592-d45c-4576-b550-14ee205df0da", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:28:26.861845", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmp2_n11hm5.pdf'.", "token_usage": {}, "type": "trace_end"}
72
+ {"trace_id": "0e5ac59b-bedd-4fc8-b46d-f8acb4a418ee", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:28:26.863843", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
73
+ {"trace_id": "2033f188-874b-4779-93ed-cd939e13dc02", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:29:02.050099", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
74
+ {"trace_id": "765dc4e3-c115-4468-a2a1-928b7a708f0a", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:29:02.053217", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpelpe_kq9.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpd1tg607v.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
75
+ {"trace_id": "765dc4e3-c115-4468-a2a1-928b7a708f0a", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:29:02.070814", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmpelpe_kq9.pdf'.", "token_usage": {}, "type": "trace_end"}
76
+ {"trace_id": "2033f188-874b-4779-93ed-cd939e13dc02", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:29:02.072813", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
77
+ {"trace_id": "05aafbe3-91d6-4567-88f7-971d4bf1cfa3", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:40:06.952453", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
78
+ {"trace_id": "fa4cd081-5a89-4311-8d6b-a8bee0f4f95b", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:40:06.957463", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpfpgea8mx.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp0mp2_3rx.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
79
+ {"trace_id": "fa4cd081-5a89-4311-8d6b-a8bee0f4f95b", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "end_time": "2025-09-21T01:40:09.302898", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
80
+ {"trace_id": "05aafbe3-91d6-4567-88f7-971d4bf1cfa3", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "end_time": "2025-09-21T01:40:09.313914", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
81
+ {"trace_id": "4e7b4fc5-3c0a-42dc-99e4-d7a895db19a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:55:04.866221", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
82
+ {"trace_id": "eba0b151-f45f-4497-a18c-97968ba558a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:55:04.874152", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpilnra4ly.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpifbcxjwl.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
83
+ {"trace_id": "eba0b151-f45f-4497-a18c-97968ba558a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "end_time": "2025-09-21T01:55:07.366397", "outputs": {"final_score": 32.5, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
84
+ {"trace_id": "4e7b4fc5-3c0a-42dc-99e4-d7a895db19a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "end_time": "2025-09-21T01:55:07.377404", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
85
+ {"trace_id": "f0c61041-ed71-4df3-b1ba-272bb3a1a1ca", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T02:18:58.734780", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
86
+ {"trace_id": "7bbeada3-6822-421f-a326-bfa56bb0a2cd", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T02:18:58.739429", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpw_kcul8t.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpllsrq7v8.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
87
+ {"trace_id": "7bbeada3-6822-421f-a326-bfa56bb0a2cd", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "end_time": "2025-09-21T02:19:01.005118", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
88
+ {"trace_id": "f0c61041-ed71-4df3-b1ba-272bb3a1a1ca", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "end_time": "2025-09-21T02:19:01.014021", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
89
+ {"trace_id": "334643be-5298-4b6c-8567-347271832f6a", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:13:16.934134", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
90
+ {"trace_id": "4819d93c-4a91-4d3d-90d9-277d7be3381d", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:13:16.937126", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpxhu7fkn7.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp9mtoh73r.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
91
+ {"trace_id": "4819d93c-4a91-4d3d-90d9-277d7be3381d", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "end_time": "2025-09-21T03:13:19.052137", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
92
+ {"trace_id": "334643be-5298-4b6c-8567-347271832f6a", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "end_time": "2025-09-21T03:13:19.057236", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
93
+ {"trace_id": "d0d18179-5ddb-493b-8c62-22229f06484e", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:30:34.747377", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
94
+ {"trace_id": "4c0b2ff9-aced-442e-bba1-bd1e00c18cab", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:30:34.749366", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6mve9qq5.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpv5vjd46t.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
95
+ {"trace_id": "4c0b2ff9-aced-442e-bba1-bd1e00c18cab", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "end_time": "2025-09-21T03:30:37.066611", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
96
+ {"trace_id": "d0d18179-5ddb-493b-8c62-22229f06484e", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "end_time": "2025-09-21T03:30:37.073622", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
97
+ {"trace_id": "270ffb7c-a36a-4799-92cc-74ba938e58c5", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:32:52.223767", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
98
+ {"trace_id": "250619ff-9a1a-4e1d-ab99-528591c4dd07", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:32:52.226771", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpw4_p93qm.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpziz23bn2.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
99
+ {"trace_id": "250619ff-9a1a-4e1d-ab99-528591c4dd07", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "end_time": "2025-09-21T03:32:54.770212", "outputs": {"final_score": 40.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
100
+ {"trace_id": "270ffb7c-a36a-4799-92cc-74ba938e58c5", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "end_time": "2025-09-21T03:32:54.783222", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
101
+ {"trace_id": "c1c18675-903d-4d06-9b6d-d162dc6697aa", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:39:22.397308", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
102
+ {"trace_id": "aadff1c2-9852-4726-960d-af92c369fd8b", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:39:22.400313", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpp_yxirpc.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp36aqmv7r.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
103
+ {"trace_id": "aadff1c2-9852-4726-960d-af92c369fd8b", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "end_time": "2025-09-21T03:39:26.991284", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
104
+ {"trace_id": "c1c18675-903d-4d06-9b6d-d162dc6697aa", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "end_time": "2025-09-21T03:39:27.010387", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
105
+ {"trace_id": "df6e7046-48cd-4b29-ac41-173d3a4fb5f9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:44:53.791298", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
106
+ {"trace_id": "03a87671-e3a6-40fa-a2f6-dbdad8e025c9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:44:53.793766", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp3esxeq0t.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpteop9bro.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
107
+ {"trace_id": "03a87671-e3a6-40fa-a2f6-dbdad8e025c9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "end_time": "2025-09-21T03:44:56.947132", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
108
+ {"trace_id": "df6e7046-48cd-4b29-ac41-173d3a4fb5f9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "end_time": "2025-09-21T03:44:56.953250", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
109
+ {"trace_id": "02d153d2-eb16-4784-abb5-42ab4509f10a", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:51:10.001559", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
110
+ {"trace_id": "99341c01-5bb4-47fe-80e9-fe8dd1836ee9", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:51:10.004099", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpynmwjkur.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpb85kyh_i.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
111
+ {"trace_id": "99341c01-5bb4-47fe-80e9-fe8dd1836ee9", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "end_time": "2025-09-21T03:51:12.815359", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
112
+ {"trace_id": "02d153d2-eb16-4784-abb5-42ab4509f10a", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "end_time": "2025-09-21T03:51:12.819346", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
113
+ {"trace_id": "334c866e-795d-4eb9-90dc-2f66b7128d5f", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:55:08.646302", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
114
+ {"trace_id": "d86ec10f-f015-46b1-bf1b-f72b1a4613b3", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:55:08.648291", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp1cfpxuyy.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6s3sg7jw.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
115
+ {"trace_id": "d86ec10f-f015-46b1-bf1b-f72b1a4613b3", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "end_time": "2025-09-21T03:55:12.019303", "outputs": {"final_score": 50.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
116
+ {"trace_id": "334c866e-795d-4eb9-90dc-2f66b7128d5f", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "end_time": "2025-09-21T03:55:12.023331", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
117
+ {"trace_id": "8b172285-0bdc-468f-9524-c6fbff86ab8e", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T04:07:19.705410", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
118
+ {"trace_id": "d728dc29-eda1-42d5-ab33-aa4e99157fe1", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T04:07:19.708408", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpjg086yms.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp_u57r0c7.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
119
+ {"trace_id": "d728dc29-eda1-42d5-ab33-aa4e99157fe1", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "end_time": "2025-09-21T04:07:46.216611", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
120
+ {"trace_id": "8b172285-0bdc-468f-9524-c6fbff86ab8e", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "end_time": "2025-09-21T04:07:46.223165", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
matchers/__init__.py ADDED
File without changes
matchers/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (150 Bytes). View file
 
matchers/__pycache__/final_scorer.cpython-312.pyc ADDED
Binary file (2.79 kB). View file
 
matchers/__pycache__/hard_matcher.cpython-312.pyc ADDED
Binary file (2 kB). View file
 
matchers/__pycache__/semantic_matcher.cpython-312.pyc ADDED
Binary file (2.21 kB). View file
 
matchers/entity_extractor.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/entity_extractor.py - SPACY ENTITY EXTRACTION
2
+ import spacy
3
+ from collections import Counter
4
+ import re
5
+
6
+ class EntityExtractor:
7
+ def __init__(self):
8
+ try:
9
+ print("🧠 Loading spaCy model...")
10
+ self.nlp = spacy.load("en_core_web_sm")
11
+ print("✅ spaCy model loaded successfully")
12
+ except OSError:
13
+ print("⚠️ spaCy model not found. Run: python -m spacy download en_core_web_sm")
14
+ self.nlp = None
15
+
16
+ def extract_skills_with_nlp(self, text):
17
+ """Extract skills using spaCy NLP"""
18
+ if not self.nlp:
19
+ return self._fallback_extraction(text)
20
+
21
+ print("🔍 Extracting entities with spaCy...")
22
+
23
+ doc = self.nlp(text)
24
+
25
+ # Extract entities
26
+ entities = {
27
+ "persons": [],
28
+ "organizations": [],
29
+ "technologies": [],
30
+ "skills": [],
31
+ "locations": []
32
+ }
33
+
34
+ for ent in doc.ents:
35
+ if ent.label_ == "PERSON":
36
+ entities["persons"].append(ent.text)
37
+ elif ent.label_ == "ORG":
38
+ entities["organizations"].append(ent.text)
39
+ elif ent.label_ == "GPE": # Geopolitical entity (locations)
40
+ entities["locations"].append(ent.text)
41
+
42
+ # Extract noun phrases as potential skills
43
+ noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks
44
+ if len(chunk.text.split()) <= 3] # Max 3 words
45
+
46
+ # Filter technical terms
47
+ tech_patterns = [
48
+ r'\b\w+\.js\b', r'\b\w+script\b', r'\b\w+SQL\b',
49
+ r'\bAPI\b', r'\bSDK\b', r'\bIDE\b', r'\bOS\b'
50
+ ]
51
+
52
+ tech_terms = []
53
+ for pattern in tech_patterns:
54
+ tech_terms.extend(re.findall(pattern, text, re.IGNORECASE))
55
+
56
+ entities["technologies"] = list(set(tech_terms))
57
+ entities["skills"] = list(set(noun_phrases))
58
+
59
+ return entities
60
+
61
+ def extract_experience_years(self, text):
62
+ """Extract years of experience using NLP"""
63
+ if not self.nlp:
64
+ return self._extract_years_regex(text)
65
+
66
+ doc = self.nlp(text)
67
+
68
+ experience_patterns = [
69
+ r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
70
+ r'(\d+)\+?\s*years?\s*in',
71
+ r'experience.*?(\d+)\+?\s*years?',
72
+ r'(\d+)\+?\s*year.*?experience'
73
+ ]
74
+
75
+ years = []
76
+ for pattern in experience_patterns:
77
+ matches = re.findall(pattern, text.lower())
78
+ years.extend([int(match) for match in matches if match.isdigit()])
79
+
80
+ return max(years) if years else 0
81
+
82
+ def extract_education_info(self, text):
83
+ """Extract education information"""
84
+ degrees = [
85
+ "bachelor", "master", "phd", "doctorate", "diploma",
86
+ "b.tech", "m.tech", "bca", "mca", "bsc", "msc"
87
+ ]
88
+
89
+ fields = [
90
+ "computer science", "engineering", "information technology",
91
+ "software engineering", "data science", "mathematics"
92
+ ]
93
+
94
+ found_degrees = []
95
+ found_fields = []
96
+
97
+ text_lower = text.lower()
98
+
99
+ for degree in degrees:
100
+ if degree in text_lower:
101
+ found_degrees.append(degree)
102
+
103
+ for field in fields:
104
+ if field in text_lower:
105
+ found_fields.append(field)
106
+
107
+ return {
108
+ "degrees": list(set(found_degrees)),
109
+ "fields": list(set(found_fields))
110
+ }
111
+
112
+ def _fallback_extraction(self, text):
113
+ """Fallback extraction without spaCy"""
114
+ print("⚠️ Using fallback extraction (spaCy not available)")
115
+
116
+ # Simple regex-based extraction
117
+ entities = {
118
+ "persons": [],
119
+ "organizations": [],
120
+ "technologies": [],
121
+ "skills": [],
122
+ "locations": []
123
+ }
124
+
125
+ # Extract email domains as organizations
126
+ email_domains = re.findall(r'@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
127
+ entities["organizations"] = [domain.split('.')[0] for domain in email_domains]
128
+
129
+ return entities
130
+
131
+ def _extract_years_regex(self, text):
132
+ """Regex fallback for experience extraction"""
133
+ pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:experience|exp)'
134
+ matches = re.findall(pattern, text.lower())
135
+ years = [int(match) for match in matches if match.isdigit()]
136
+ return max(years) if years else 0
137
+
138
+ # Test function
139
+ def test_entity_extractor():
140
+ """Test entity extraction functionality"""
141
+ extractor = EntityExtractor()
142
+
143
+ sample_text = """
144
+ John Smith is a Python developer with 3+ years of experience at Google.
145
+ He has worked with React.js, Node.js, and AWS in San Francisco.
146
+ Bachelor's degree in Computer Science.
147
+ """
148
+
149
+ entities = extractor.extract_skills_with_nlp(sample_text)
150
+ years = extractor.extract_experience_years(sample_text)
151
+ education = extractor.extract_education_info(sample_text)
152
+
153
+ print(f"✅ Entities extracted: {len(entities['skills'])} skills found")
154
+ print(f"✅ Experience: {years} years")
155
+ print(f"✅ Education: {education}")
156
+
157
+ return len(entities['skills']) > 0
158
+
159
+ if __name__ == "__main__":
160
+ test_entity_extractor()
matchers/final_scorer.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # matchers/final_scorer.py
2
+ from matchers.hard_matcher import calculate_hard_match_score, calculate_fuzzy_match
3
+ from matchers.semantic_matcher import SemanticMatcher
4
+
5
+ class ResumeScorer:
6
+ def __init__(self):
7
+ self.semantic_matcher = SemanticMatcher()
8
+
9
+ def calculate_final_score(self, resume_data, jd_data):
10
+ """Calculate weighted final score combining all factors"""
11
+
12
+ # Step 1: Hard Match (Keywords)
13
+ hard_match = calculate_hard_match_score(
14
+ resume_data["skills"],
15
+ jd_data["skills"]
16
+ )
17
+
18
+ # Step 2: Semantic Match (AI Embeddings)
19
+ semantic_match = self.semantic_matcher.calculate_semantic_score(
20
+ resume_data["raw_text"],
21
+ jd_data["raw_text"]
22
+ )
23
+
24
+ # Step 3: Fuzzy Match
25
+ fuzzy_skills = calculate_fuzzy_match(
26
+ resume_data["raw_text"],
27
+ jd_data["skills"]
28
+ )
29
+ fuzzy_bonus = len(fuzzy_skills) * 2 # 2 points per fuzzy match
30
+
31
+ # Weighted scoring formula
32
+ final_score = (
33
+ 0.4 * hard_match["score"] + # 40% keyword match
34
+ 0.5 * semantic_match["score"] + # 50% semantic similarity
35
+ 0.1 * min(fuzzy_bonus, 20) # 10% fuzzy bonus (max 20)
36
+ )
37
+
38
+ # Generate verdict
39
+ verdict = self.get_verdict(final_score)
40
+
41
+ return {
42
+ "final_score": round(final_score, 2),
43
+ "verdict": verdict,
44
+ "breakdown": {
45
+ "hard_match": hard_match,
46
+ "semantic_match": semantic_match,
47
+ "fuzzy_matches": fuzzy_skills
48
+ },
49
+ "suggestions": self.generate_suggestions(hard_match["missing_skills"])
50
+ }
51
+
52
+ def get_verdict(self, score):
53
+ """Convert score to verdict categories"""
54
+ if score >= 80:
55
+ return "High Suitability"
56
+ elif score >= 60:
57
+ return "Medium Suitability"
58
+ else:
59
+ return "Low Suitability"
60
+
61
+ def generate_suggestions(self, missing_skills):
62
+ """Generate improvement suggestions"""
63
+ if not missing_skills:
64
+ return "Great match! No major skills missing."
65
+
66
+ suggestions = []
67
+ if len(missing_skills) <= 3:
68
+ suggestions.append(f"Consider adding skills: {', '.join(missing_skills[:3])}")
69
+ else:
70
+ suggestions.append(f"Focus on key skills: {', '.join(missing_skills[:3])}")
71
+ suggestions.append("Consider relevant projects or certifications")
72
+
73
+ return suggestions
matchers/fuzzy_matcher.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # matchers/fuzzy_matcher.py - FUZZY SKILL MATCHING
2
+ from rapidfuzz import fuzz, process
3
+ from collections import defaultdict
4
+
5
+ class FuzzyMatcher:
6
+ def __init__(self):
7
+ self.skill_variations = {
8
+ 'javascript': ['js', 'javascript', 'ecmascript', 'node.js', 'nodejs'],
9
+ 'python': ['python', 'py', 'python3'],
10
+ 'typescript': ['typescript', 'ts'],
11
+ 'kubernetes': ['kubernetes', 'k8s', 'kube'],
12
+ 'postgresql': ['postgresql', 'postgres', 'psql'],
13
+ 'ci/cd': ['ci/cd', 'cicd', 'continuous integration', 'continuous deployment'],
14
+ 'docker': ['docker', 'containerization', 'containers'],
15
+ 'aws': ['aws', 'amazon web services', 'amazon cloud'],
16
+ 'react': ['react', 'reactjs', 'react.js'],
17
+ 'angular': ['angular', 'angularjs', 'angular.js']
18
+ }
19
+ print("✅ Fuzzy matcher initialized with skill variations")
20
+
21
+ def fuzzy_skill_match(self, resume_skills, jd_skills, threshold=80):
22
+ """Find fuzzy matches between resume and JD skills"""
23
+ print("🔍 Running fuzzy skill matching...")
24
+
25
+ fuzzy_matches = []
26
+ matched_pairs = []
27
+
28
+ for jd_skill in jd_skills:
29
+ best_match = None
30
+ best_score = 0
31
+
32
+ for resume_skill in resume_skills:
33
+ # Direct fuzzy match
34
+ score = fuzz.ratio(jd_skill.lower(), resume_skill.lower())
35
+
36
+ if score > threshold and score > best_score:
37
+ best_match = resume_skill
38
+ best_score = score
39
+
40
+ # Check skill variations
41
+ if not best_match:
42
+ best_match, best_score = self._check_skill_variations(jd_skill, resume_skills)
43
+
44
+ if best_match and best_score > threshold:
45
+ fuzzy_matches.append(jd_skill)
46
+ matched_pairs.append({
47
+ "jd_skill": jd_skill,
48
+ "resume_skill": best_match,
49
+ "confidence": round(best_score, 1)
50
+ })
51
+
52
+ return {
53
+ "fuzzy_matched_skills": fuzzy_matches,
54
+ "match_details": matched_pairs,
55
+ "fuzzy_score": len(fuzzy_matches)
56
+ }
57
+
58
+ def _check_skill_variations(self, jd_skill, resume_skills):
59
+ """Check if skill matches any known variations"""
60
+ jd_lower = jd_skill.lower()
61
+
62
+ # Check if JD skill is in our variations
63
+ for main_skill, variations in self.skill_variations.items():
64
+ if jd_lower in variations:
65
+ # Look for other variations in resume
66
+ for resume_skill in resume_skills:
67
+ if resume_skill.lower() in variations:
68
+ return resume_skill, 95 # High confidence for variation match
69
+
70
+ # Check reverse - if resume skill has variations
71
+ for resume_skill in resume_skills:
72
+ resume_lower = resume_skill.lower()
73
+ for main_skill, variations in self.skill_variations.items():
74
+ if resume_lower in variations and jd_lower in variations:
75
+ return resume_skill, 90
76
+
77
+ return None, 0
78
+
79
+ def suggest_skill_improvements(self, missing_skills):
80
+ """Suggest skill variations that might be easier to learn"""
81
+ suggestions = []
82
+
83
+ for skill in missing_skills[:5]: # Top 5 missing skills
84
+ skill_lower = skill.lower()
85
+
86
+ # Find related skills or easier alternatives
87
+ for main_skill, variations in self.skill_variations.items():
88
+ if skill_lower in variations:
89
+ other_variations = [v for v in variations if v != skill_lower]
90
+ if other_variations:
91
+ suggestions.append({
92
+ "missing_skill": skill,
93
+ "alternatives": other_variations[:3],
94
+ "suggestion": f"Consider learning {other_variations[0]} as an alternative to {skill}"
95
+ })
96
+ break
97
+
98
+ return suggestions
99
+
100
+ # Test function
101
+ def test_fuzzy_matcher():
102
+ """Test fuzzy matching functionality"""
103
+ matcher = FuzzyMatcher()
104
+
105
+ resume_skills = ["javascript", "python", "react", "nodejs", "aws"]
106
+ jd_skills = ["js", "python3", "reactjs", "node.js", "amazon web services", "docker"]
107
+
108
+ result = matcher.fuzzy_skill_match(resume_skills, jd_skills)
109
+ print(f"✅ Fuzzy matches found: {len(result['fuzzy_matched_skills'])}")
110
+
111
+ for match in result['match_details']:
112
+ print(f" {match['jd_skill']} ↔ {match['resume_skill']} ({match['confidence']}%)")
113
+
114
+ return len(result['fuzzy_matched_skills']) > 0
115
+
116
+ if __name__ == "__main__":
117
+ test_fuzzy_matcher()
matchers/hard_matcher.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # matchers/hard_matcher.py
2
+ def calculate_hard_match_score(resume_skills, jd_skills):
3
+ """Calculate percentage match based on keyword overlap"""
4
+ if not jd_skills: # avoid division by zero
5
+ return 0.0
6
+
7
+ matched_skills = set(resume_skills) & set(jd_skills)
8
+ total_jd_skills = len(set(jd_skills))
9
+
10
+ coverage_percentage = len(matched_skills) / total_jd_skills * 100
11
+
12
+ return {
13
+ "score": round(coverage_percentage, 2),
14
+ "matched_count": len(matched_skills),
15
+ "total_jd_skills": total_jd_skills,
16
+ "matched_skills": list(matched_skills),
17
+ "missing_skills": list(set(jd_skills) - set(resume_skills))
18
+ }
19
+
20
+ def calculate_fuzzy_match(resume_text, jd_skills):
21
+ """Fuzzy matching for skill variations (JavaScript vs JS)"""
22
+ # Install: pip install rapidfuzz
23
+ from rapidfuzz import fuzz
24
+
25
+ resume_lower = resume_text.lower()
26
+ fuzzy_matches = []
27
+
28
+ for skill in jd_skills:
29
+ # Check if skill or common variations exist
30
+ variations = get_skill_variations(skill)
31
+ for variation in variations:
32
+ if fuzz.partial_ratio(variation, resume_lower) > 80:
33
+ fuzzy_matches.append(skill)
34
+ break
35
+
36
+ return list(set(fuzzy_matches))
37
+
38
+ def get_skill_variations(skill):
39
+ """Common skill variations for fuzzy matching"""
40
+ variations = {
41
+ "javascript": ["js", "javascript", "node.js", "nodejs"],
42
+ "python": ["python", "py"],
43
+ "tensorflow": ["tensorflow", "tf"],
44
+ "kubernetes": ["kubernetes", "k8s"],
45
+ "postgresql": ["postgresql", "postgres", "psql"]
46
+ }
47
+ return variations.get(skill.lower(), [skill])
matchers/semantic_matcher.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # matchers/semantic_matcher.py - ENHANCED SEMANTIC MATCHER
2
+ from sentence_transformers import SentenceTransformer, util
3
+ import numpy as np
4
+
5
+ class SemanticMatcher:
6
+ def __init__(self):
7
+ try:
8
+ # Using a lightweight, high-performance model
9
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
10
+ print("✅ Semantic matcher initialized with SentenceTransformer model")
11
+ except Exception as e:
12
+ print(f"⚠️ Could not load SentenceTransformer model: {e}")
13
+ print(" Install with: pip install sentence-transformers")
14
+ self.model = None
15
+
16
+ def calculate_semantic_similarity(self, text1: str, text2: str) -> dict:
17
+ """Calculate semantic similarity using sentence embeddings"""
18
+ if not self.model:
19
+ return {
20
+ "semantic_score": 0.0,
21
+ "error": "SentenceTransformer model not loaded"
22
+ }
23
+
24
+ try:
25
+ # Generate embeddings for both texts
26
+ embedding1 = self.model.encode(text1, convert_to_tensor=True)
27
+ embedding2 = self.model.encode(text2, convert_to_tensor=True)
28
+
29
+ # Calculate cosine similarity
30
+ cosine_score = util.pytorch_cos_sim(embedding1, embedding2)
31
+
32
+ return {
33
+ "semantic_score": round(float(cosine_score[0][0]) * 100, 2)
34
+ }
35
+ except Exception as e:
36
+ print(f"❌ Error during semantic similarity calculation: {e}")
37
+ return {"semantic_score": 0.0, "error": str(e)}
parsers/__iniy__.py ADDED
File without changes
parsers/__pycache__/cleaner.cpython-312.pyc ADDED
Binary file (520 Bytes). View file
 
parsers/__pycache__/docx_parser.cpython-312.pyc ADDED
Binary file (384 Bytes). View file
 
parsers/__pycache__/jd_parser.cpython-312.pyc ADDED
Binary file (902 Bytes). View file
 
parsers/__pycache__/job_requirement_parser.cpython-312.pyc ADDED
Binary file (20.2 kB). View file
 
parsers/__pycache__/pdf_parser.cpython-312.pyc ADDED
Binary file (1.54 kB). View file
 
parsers/__pycache__/section_splitter.cpython-312.pyc ADDED
Binary file (2.69 kB). View file
 
parsers/__pycache__/skill_extractor.cpython-312.pyc ADDED
Binary file (2.61 kB). View file
 
parsers/__pycache__/skills_list.cpython-312.pyc ADDED
Binary file (937 Bytes). View file
 
parsers/__pycache__/smart_skill_extractor.cpython-312.pyc ADDED
Binary file (12 kB). View file
 
parsers/cleaner.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def clean_text(text):
4
+ """Remove extra spaces, line breaks, normalize text"""
5
+ text = re.sub(r'\n+', '\n', text) # collapse multiple newlines
6
+ text = re.sub(r'\s+', ' ', text) # collapse multiple spaces
7
+ return text.strip()
parsers/docx_parser.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import docx2txt
2
+
3
+ def extract_text_docx(file_path):
4
+ """Extract text from DOCX"""
5
+ return docx2txt.process(file_path)
parsers/entity_extractor.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/entity_extractor.py - Basic version
2
+ import re
3
+
4
+ class EntityExtractor:
5
+ def __init__(self):
6
+ print("✅ Entity extractor initialized (basic mode)")
7
+
8
+ def extract_skills_with_nlp(self, text):
9
+ """Basic entity extraction"""
10
+ return {
11
+ "persons": [],
12
+ "organizations": [],
13
+ "technologies": [],
14
+ "skills": [],
15
+ "locations": []
16
+ }
17
+
18
+ def extract_experience_years(self, text):
19
+ """Extract years of experience using regex"""
20
+ pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:experience|exp)'
21
+ matches = re.findall(pattern, text.lower())
22
+ years = [int(match) for match in matches if match.isdigit()]
23
+ return max(years) if years else 0
24
+
25
+ def extract_education_info(self, text):
26
+ """Extract education info"""
27
+ degrees = ["bachelor", "master", "phd", "b.tech", "m.tech"]
28
+ found_degrees = [degree for degree in degrees if degree in text.lower()]
29
+
30
+ return {
31
+ "degrees": found_degrees,
32
+ "fields": []
33
+ }
parsers/jd_parser.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from parsers.cleaner import clean_text
3
+ from parsers.skill_extractor import extract_skills
4
+
5
+ def parse_jd(file_text):
6
+ """Parse job description and extract role + skills"""
7
+ text = clean_text(file_text)
8
+
9
+ # Extract Job Role (look for keywords like "Job Title", "Role", "Position")
10
+ role_match = re.search(r"(job role|job title|position)\s*[:\-]\s*(.*)", text, re.I)
11
+ job_role = role_match.group(2).strip() if role_match else "Unknown"
12
+
13
+ # Extract skills
14
+ jd_skills = extract_skills(text)
15
+
16
+ return {
17
+ "role": job_role,
18
+ "skills": jd_skills,
19
+ "raw_text": text
20
+ }
parsers/job_requirement_parser.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/job_requirement_parser.py - Advanced Job Requirement Analysis
2
+ import re
3
+ import json
4
+ from typing import Dict, List, Tuple
5
+ from dataclasses import dataclass
6
+ from parsers.smart_skill_extractor import SmartSkillExtractor
7
+
8
+ @dataclass
9
+ class JobRequirement:
10
+ """Structured job requirement data"""
11
+ role_title: str
12
+ company: str
13
+ experience_required: str
14
+ education_required: List[str]
15
+ must_have_skills: List[str]
16
+ good_to_have_skills: List[str]
17
+ certifications: List[str]
18
+ responsibilities: List[str]
19
+ benefits: List[str]
20
+ location: str
21
+ employment_type: str
22
+ salary_range: str
23
+ industry: str
24
+ seniority_level: str
25
+
26
+ class JobRequirementParser:
27
+ """Parse job descriptions to extract structured requirements"""
28
+
29
+ def __init__(self):
30
+ self.skill_extractor = SmartSkillExtractor()
31
+ self.patterns = self._initialize_patterns()
32
+ print("✅ Job Requirement Parser initialized")
33
+
34
+ def _initialize_patterns(self):
35
+ """Initialize regex patterns for job parsing"""
36
+ return {
37
+ 'role_title': [
38
+ r'(?:job\s+title|position|role)[\s:]*([^\n.]{5,80})',
39
+ r'^([A-Z][\w\s,]+(?:engineer|developer|manager|analyst|specialist|coordinator))\b',
40
+ r'hiring\s+for[\s:]*([^\n.]{5,80})',
41
+ ],
42
+ 'company': [
43
+ r'(?:company|organization)[\s:]*([^\n]+)',
44
+ r'(?:at|@)\s+([A-Z][a-zA-Z\s&,.-]+?)(?:\s|$)',
45
+ ],
46
+ 'experience': [
47
+ r'(?:experience|exp)[\s:]*(\d+[\+\-]*\s*(?:to|\-)\s*\d+\s*years?|\d+\+?\s*years?)',
48
+ r'(\d+[\+\-]*)\s*(?:to|\-)\s*(\d+)\s*years?\s*(?:of\s+)?(?:experience|exp)',
49
+ r'minimum\s+(\d+\+?)\s*years?',
50
+ r'(\d+)\+?\s*years?\s+(?:of\s+)?(?:experience|exp)',
51
+ ],
52
+ 'education': [
53
+ r'(?:education|degree|qualification)[\s:]*([^\n]+)',
54
+ r'(?:bachelor|master|phd|doctorate|diploma|b\.tech|m\.tech|bca|mca|bsc|msc)[\s\.]*([^\n]*)',
55
+ r'(?:degree\s+in|graduated\s+in)\s+([^\n]+)',
56
+ ],
57
+ 'must_have': [
58
+ r'(?:must\s+have|required|mandatory|essential)[\s:]*([^.]+)',
59
+ r'(?:requirements|qualifications)[\s:]*([^.]+)',
60
+ r'(?:should\s+have|need\s+to\s+have)[\s:]*([^.]+)',
61
+ ],
62
+ 'good_to_have': [
63
+ r'(?:good\s+to\s+have|nice\s+to\s+have|preferred|bonus|plus)[\s:]*([^.]+)',
64
+ r'(?:additional|optional)[\s:]*([^.]+)',
65
+ ],
66
+ 'responsibilities': [
67
+ r'(?:responsibilities|duties|tasks)[\s:]*([^.]+)',
68
+ r'(?:you\s+will|role\s+involves)[\s:]*([^.]+)',
69
+ ],
70
+ 'certifications': [
71
+ r'(?:certification|certified|certificate)[\s:]*([^.]+)',
72
+ r'(?:aws|azure|google\s+cloud|oracle|cisco|microsoft)\s+certified[\s:]*([^.]*)',
73
+ ],
74
+ 'salary': [
75
+ r'(?:salary|compensation|package)[\s:]*([^.\n]+)',
76
+ r'(?:\$|₹|€|£)\s*([0-9,.-]+(?:\s*(?:to|\-)\s*[0-9,.-]+)?)',
77
+ r'([0-9,]+)\s*(?:to|\-)\s*([0-9,]+)\s*(?:per\s+)?(?:month|year|annum)',
78
+ ],
79
+ 'location': [
80
+ r'(?:location|based\s+in|office)[\s:]*([^.\n]+)',
81
+ r'(?:remote|hybrid|onsite|work\s+from)[\s:]*([^.\n]*)',
82
+ ]
83
+ }
84
+
85
+ def parse_job_description(self, jd_text: str) -> JobRequirement:
86
+ """Parse job description into structured requirements"""
87
+
88
+ if not jd_text:
89
+ return self._create_empty_requirement()
90
+
91
+ print("🔍 Parsing job requirements...")
92
+
93
+ # Extract basic information
94
+ role_title = self._extract_role_title(jd_text)
95
+ company = self._extract_company(jd_text)
96
+ experience = self._extract_experience(jd_text)
97
+ education = self._extract_education(jd_text)
98
+ location = self._extract_location(jd_text)
99
+ salary = self._extract_salary(jd_text)
100
+
101
+ # Extract skills and requirements
102
+ must_have_skills, good_to_have_skills = self._extract_skills_by_priority(jd_text)
103
+ certifications = self._extract_certifications(jd_text)
104
+ responsibilities = self._extract_responsibilities(jd_text)
105
+
106
+ # Determine job characteristics
107
+ employment_type = self._determine_employment_type(jd_text)
108
+ industry = self._determine_industry(jd_text, role_title)
109
+ seniority_level = self._determine_seniority(role_title, experience)
110
+
111
+ job_req = JobRequirement(
112
+ role_title=role_title,
113
+ company=company,
114
+ experience_required=experience,
115
+ education_required=education,
116
+ must_have_skills=must_have_skills,
117
+ good_to_have_skills=good_to_have_skills,
118
+ certifications=certifications,
119
+ responsibilities=responsibilities,
120
+ benefits=[], # Can be enhanced later
121
+ location=location,
122
+ employment_type=employment_type,
123
+ salary_range=salary,
124
+ industry=industry,
125
+ seniority_level=seniority_level
126
+ )
127
+
128
+ print(f"✅ Parsed job: {role_title} at {company}")
129
+ print(f" 📍 Location: {location}")
130
+ print(f" 💼 Experience: {experience}")
131
+ print(f" 🎯 Must-have skills: {len(must_have_skills)}")
132
+ print(f" ⭐ Good-to-have skills: {len(good_to_have_skills)}")
133
+
134
+ return job_req
135
+
136
+ def _extract_role_title(self, text: str) -> str:
137
+ """Extract job role title"""
138
+ for pattern in self.patterns['role_title']:
139
+ match = re.search(pattern, text, re.IGNORECASE)
140
+ if match:
141
+ return match.group(1).strip()
142
+
143
+ # Fallback: look for common job titles
144
+ lines = text.split('\n')
145
+ for line in lines[:5]: # Check first 5 lines
146
+ line = line.strip()
147
+ if any(title in line.lower() for title in
148
+ ['engineer', 'developer', 'manager', 'analyst', 'specialist']):
149
+ return line
150
+
151
+ return "Unknown Role"
152
+
153
+ def _extract_company(self, text: str) -> str:
154
+ """Extract company name"""
155
+ for pattern in self.patterns['company']:
156
+ match = re.search(pattern, text, re.IGNORECASE)
157
+ if match:
158
+ return match.group(1).strip()
159
+
160
+ return "Unknown Company"
161
+
162
+ def _extract_experience(self, text: str) -> str:
163
+ """Extract experience requirements"""
164
+ for pattern in self.patterns['experience']:
165
+ match = re.search(pattern, text, re.IGNORECASE)
166
+ if match:
167
+ return match.group().strip()
168
+
169
+ # Look for fresher/entry level
170
+ if re.search(r'\b(?:fresher|entry\s+level|0\s+years?)\b', text, re.IGNORECASE):
171
+ return "0-1 years"
172
+
173
+ return "Not specified"
174
+
175
+ def _extract_education(self, text: str) -> List[str]:
176
+ """Extract education requirements"""
177
+ education = []
178
+
179
+ for pattern in self.patterns['education']:
180
+ matches = re.finditer(pattern, text, re.IGNORECASE)
181
+ for match in matches:
182
+ education.append(match.group().strip())
183
+
184
+ # Common degree patterns
185
+ degree_patterns = [
186
+ r'\bb\.?tech\b', r'\bm\.?tech\b', r'\bbca\b', r'\bmca\b',
187
+ r'\bbsc\b', r'\bmsc\b', r'\bba\b', r'\bmba\b',
188
+ r'\bbachelor', r'\bmaster', r'\bphd\b', r'\bdoctorate\b'
189
+ ]
190
+
191
+ for pattern in degree_patterns:
192
+ if re.search(pattern, text, re.IGNORECASE):
193
+ match = re.search(pattern + r'[^.\n]*', text, re.IGNORECASE)
194
+ if match:
195
+ education.append(match.group().strip())
196
+
197
+ return list(set(education)) if education else ["Any Graduate"]
198
+
199
+ def _extract_skills_by_priority(self, text: str) -> Tuple[List[str], List[str]]:
200
+ """Extract skills categorized by priority"""
201
+
202
+ # Use smart extractor to get all skills
203
+ all_skills = self.skill_extractor.extract_skills_comprehensive(text)
204
+
205
+ must_have = []
206
+ good_to_have = []
207
+
208
+ # Categorize based on context
209
+ text_lower = text.lower()
210
+
211
+ # Split text into sections
212
+ must_have_section = ""
213
+ good_to_have_section = ""
214
+
215
+ # Extract must-have skills
216
+ for pattern in self.patterns['must_have']:
217
+ matches = re.finditer(pattern, text, re.IGNORECASE)
218
+ for match in matches:
219
+ must_have_section += " " + match.group(1)
220
+
221
+ # Extract good-to-have skills
222
+ for pattern in self.patterns['good_to_have']:
223
+ matches = re.finditer(pattern, text, re.IGNORECASE)
224
+ for match in matches:
225
+ good_to_have_section += " " + match.group(1)
226
+
227
+ # Categorize skills
228
+ for skill in all_skills:
229
+ skill_lower = skill.lower()
230
+
231
+ # Check if skill is in must-have section
232
+ if skill_lower in must_have_section.lower():
233
+ must_have.append(skill)
234
+ # Check if skill is in good-to-have section
235
+ elif skill_lower in good_to_have_section.lower():
236
+ good_to_have.append(skill)
237
+ # Default categorization based on job requirements context
238
+ elif self._is_core_skill(skill, text):
239
+ must_have.append(skill)
240
+ else:
241
+ good_to_have.append(skill)
242
+
243
+ # Ensure no duplicates
244
+ must_have = list(set(must_have))
245
+ good_to_have = list(set(good_to_have) - set(must_have))
246
+
247
+ return must_have, good_to_have
248
+
249
+ def _is_core_skill(self, skill: str, text: str) -> bool:
250
+ """Determine if a skill is core based on frequency and context"""
251
+ skill_lower = skill.lower()
252
+ text_lower = text.lower()
253
+
254
+ # Count mentions
255
+ mentions = text_lower.count(skill_lower)
256
+
257
+ # Check for emphasis keywords around the skill
258
+ emphasis_patterns = [
259
+ rf'\b(?:required|must|essential|mandatory|need)\b[^.]*{re.escape(skill_lower)}',
260
+ rf'{re.escape(skill_lower)}[^.]*\b(?:required|must|essential|mandatory)\b',
261
+ rf'\b(?:experience|expertise|proficient)\b[^.]*{re.escape(skill_lower)}',
262
+ rf'{re.escape(skill_lower)}[^.]*\b(?:years?|experience)\b'
263
+ ]
264
+
265
+ for pattern in emphasis_patterns:
266
+ if re.search(pattern, text_lower):
267
+ return True
268
+
269
+ # If mentioned multiple times, likely core
270
+ return mentions >= 2
271
+
272
+ def _extract_certifications(self, text: str) -> List[str]:
273
+ """Extract certification requirements"""
274
+ certifications = []
275
+
276
+ for pattern in self.patterns['certifications']:
277
+ matches = re.finditer(pattern, text, re.IGNORECASE)
278
+ for match in matches:
279
+ cert = match.group().strip()
280
+ if len(cert) > 5: # Filter out too short matches
281
+ certifications.append(cert)
282
+
283
+ return list(set(certifications))
284
+
285
+ def _extract_responsibilities(self, text: str) -> List[str]:
286
+ """Extract job responsibilities"""
287
+ responsibilities = []
288
+
289
+ for pattern in self.patterns['responsibilities']:
290
+ matches = re.finditer(pattern, text, re.IGNORECASE)
291
+ for match in matches:
292
+ resp = match.group(1).strip()
293
+ # Split by bullet points or line breaks
294
+ resp_list = re.split(r'[•\-\*]\s*|\n', resp)
295
+ for r in resp_list:
296
+ r = r.strip()
297
+ if len(r) > 10: # Filter meaningful responsibilities
298
+ responsibilities.append(r)
299
+
300
+ return responsibilities[:10] # Limit to top 10
301
+
302
+ def _extract_location(self, text: str) -> str:
303
+ """Extract job location"""
304
+ for pattern in self.patterns['location']:
305
+ match = re.search(pattern, text, re.IGNORECASE)
306
+ if match:
307
+ return match.group(1).strip()
308
+
309
+ # Look for city names (basic patterns)
310
+ city_pattern = r'\b(?:bangalore|mumbai|delhi|hyderabad|chennai|pune|kolkata|ahmedabad|remote|hybrid)\b'
311
+ match = re.search(city_pattern, text, re.IGNORECASE)
312
+ if match:
313
+ return match.group()
314
+
315
+ return "Not specified"
316
+
317
+ def _extract_salary(self, text: str) -> str:
318
+ """Extract salary information"""
319
+ for pattern in self.patterns['salary']:
320
+ match = re.search(pattern, text, re.IGNORECASE)
321
+ if match:
322
+ return match.group().strip()
323
+
324
+ return "Not specified"
325
+
326
+ def _determine_employment_type(self, text: str) -> str:
327
+ """Determine employment type"""
328
+ text_lower = text.lower()
329
+
330
+ if 'intern' in text_lower or 'internship' in text_lower:
331
+ return "Internship"
332
+ elif 'contract' in text_lower or 'freelance' in text_lower:
333
+ return "Contract"
334
+ elif 'part time' in text_lower or 'part-time' in text_lower:
335
+ return "Part-time"
336
+ else:
337
+ return "Full-time"
338
+
339
+ def _determine_industry(self, text: str, role_title: str) -> str:
340
+ """Determine industry based on job content"""
341
+ text_lower = (text + " " + role_title).lower()
342
+
343
+ industry_keywords = {
344
+ 'Technology': ['software', 'tech', 'it', 'developer', 'engineer', 'programmer'],
345
+ 'Finance': ['finance', 'banking', 'fintech', 'investment', 'trading'],
346
+ 'Healthcare': ['healthcare', 'medical', 'hospital', 'pharma', 'clinical'],
347
+ 'Education': ['education', 'teaching', 'learning', 'university', 'academic'],
348
+ 'E-commerce': ['ecommerce', 'e-commerce', 'retail', 'shopping', 'marketplace'],
349
+ 'Marketing': ['marketing', 'advertising', 'promotion', 'brand', 'digital marketing'],
350
+ 'Consulting': ['consulting', 'advisory', 'strategy', 'management consulting'],
351
+ 'Manufacturing': ['manufacturing', 'production', 'industrial', 'automotive'],
352
+ }
353
+
354
+ for industry, keywords in industry_keywords.items():
355
+ if any(keyword in text_lower for keyword in keywords):
356
+ return industry
357
+
358
+ return "General"
359
+
360
+ def _determine_seniority(self, role_title: str, experience: str) -> str:
361
+ """Determine seniority level"""
362
+ title_lower = role_title.lower()
363
+
364
+ if any(word in title_lower for word in ['senior', 'lead', 'principal', 'architect', 'manager']):
365
+ return "Senior"
366
+ elif any(word in title_lower for word in ['junior', 'associate', 'entry', 'trainee']):
367
+ return "Junior"
368
+ elif 'intern' in title_lower:
369
+ return "Intern"
370
+ else:
371
+ # Determine by experience
372
+ if '0' in experience or 'fresher' in experience.lower():
373
+ return "Entry Level"
374
+ elif any(num in experience for num in ['1', '2', '3']):
375
+ return "Mid Level"
376
+ else:
377
+ return "Senior"
378
+
379
+ def _create_empty_requirement(self) -> JobRequirement:
380
+ """Create empty job requirement for error cases"""
381
+ return JobRequirement(
382
+ role_title="Unknown Role",
383
+ company="Unknown Company",
384
+ experience_required="Not specified",
385
+ education_required=["Any Graduate"],
386
+ must_have_skills=[],
387
+ good_to_have_skills=[],
388
+ certifications=[],
389
+ responsibilities=[],
390
+ benefits=[],
391
+ location="Not specified",
392
+ employment_type="Full-time",
393
+ salary_range="Not specified",
394
+ industry="General",
395
+ seniority_level="Not specified"
396
+ )
397
+
398
+ def export_to_json(self, job_req: JobRequirement) -> str:
399
+ """Export job requirement to JSON"""
400
+ return json.dumps(job_req.__dict__, indent=2)
401
+
402
+ # Test function
403
+ def test_job_parser():
404
+ """Test the job requirement parser"""
405
+ parser = JobRequirementParser()
406
+
407
+ sample_jd = """
408
+ Senior Full Stack Developer - TechCorp Inc.
409
+
410
+ Location: Bangalore, India (Hybrid)
411
+ Experience: 3-5 years
412
+
413
+ Job Description:
414
+ We are looking for a Senior Full Stack Developer to join our growing team.
415
+
416
+ Must Have Requirements:
417
+ - 3+ years of experience in React.js and Node.js
418
+ - Proficiency in JavaScript, TypeScript
419
+ - Experience with MySQL and MongoDB
420
+ - Knowledge of AWS cloud services
421
+ - Bachelor's degree in Computer Science or related field
422
+
423
+ Good to Have:
424
+ - Experience with Docker and Kubernetes
425
+ - Knowledge of microservices architecture
426
+ - AWS certification preferred
427
+ - Experience with CI/CD pipelines
428
+
429
+ Responsibilities:
430
+ - Develop and maintain web applications
431
+ - Collaborate with cross-functional teams
432
+ - Write clean, maintainable code
433
+ - Participate in code reviews
434
+
435
+ Package: 8-12 LPA
436
+ """
437
+
438
+ job_req = parser.parse_job_description(sample_jd)
439
+
440
+ print("\n📋 Parsed Job Requirements:")
441
+ print(f"Role: {job_req.role_title}")
442
+ print(f"Company: {job_req.company}")
443
+ print(f"Must-have skills: {job_req.must_have_skills}")
444
+ print(f"Good-to-have skills: {job_req.good_to_have_skills}")
445
+
446
+ return len(job_req.must_have_skills) > 0
447
+
448
+ if __name__ == "__main__":
449
+ test_job_parser()
parsers/pdf_parser.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import pdfplumber
3
+ import docx
4
+
5
+ def extract_text_pymupdf(file_path):
6
+ """Extract text from PDF using PyMuPDF"""
7
+ text = ""
8
+ with fitz.open(file_path) as doc:
9
+ for page in doc:
10
+ text += page.get_text()
11
+ return text
12
+
13
+ def extract_text_pdfplumber(file_path):
14
+ """Extract text from PDF using pdfplumber"""
15
+ text = ""
16
+ with pdfplumber.open(file_path) as pdf:
17
+ for page in pdf.pages:
18
+ text += page.extract_text() or ""
19
+ return text
20
+
21
+ def extract_text_docx(file_path):
22
+ """Extract text from DOCX using python-docx"""
23
+ doc = docx.Document(file_path)
24
+ text = "\n".join([para.text for para in doc.paragraphs])
25
+ return text
parsers/section_splitter.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/section_splitter.py - FIXED VERSION
2
+ import re
3
+
4
+ def split_sections(text):
5
+ """Split resume into sections like education, skills, experience"""
6
+ sections = {}
7
+ current_section = "general"
8
+
9
+ # Clean text first
10
+ text = text.replace('\n', ' ').strip()
11
+
12
+ # Split by common section headers (more comprehensive)
13
+ section_patterns = [
14
+ r'(professional\s+summary|summary|objective)',
15
+ r'(technical\s+skills|skills|core\s+competencies|technologies)',
16
+ r'(work\s+experience|experience|employment|professional\s+experience)',
17
+ r'(education|academic\s+background|qualifications)',
18
+ r'(projects|personal\s+projects|key\s+projects)',
19
+ r'(certifications|certificates|credentials)',
20
+ r'(achievements|accomplishments|awards)'
21
+ ]
22
+
23
+ # Find section boundaries
24
+ section_starts = []
25
+ for pattern in section_patterns:
26
+ matches = re.finditer(pattern, text, re.IGNORECASE)
27
+ for match in matches:
28
+ section_starts.append((match.start(), match.group().lower().strip()))
29
+
30
+ # Sort by position
31
+ section_starts.sort()
32
+
33
+ # Extract sections
34
+ if not section_starts:
35
+ # Fallback: if no clear sections, try to extract skills manually
36
+ sections["general"] = text
37
+ sections["skills"] = extract_skills_section_fallback(text)
38
+ else:
39
+ for i, (start_pos, section_name) in enumerate(section_starts):
40
+ # Determine end position
41
+ if i + 1 < len(section_starts):
42
+ end_pos = section_starts[i + 1][0]
43
+ section_text = text[start_pos:end_pos]
44
+ else:
45
+ section_text = text[start_pos:]
46
+
47
+ # Clean section name
48
+ clean_name = re.sub(r'[^\w\s]', '', section_name).strip()
49
+ sections[clean_name] = section_text.strip()
50
+
51
+ return sections
52
+
53
+ def extract_skills_section_fallback(text):
54
+ """Fallback to extract skills when section detection fails"""
55
+ # Look for skills-related keywords
56
+ skills_indicators = [
57
+ r'programming languages?:?\s*([^.]*)',
58
+ r'technical skills?:?\s*([^.]*)',
59
+ r'technologies?:?\s*([^.]*)',
60
+ r'tools?:?\s*([^.]*)',
61
+ r'frameworks?:?\s*([^.]*)',
62
+ r'languages?:?\s*([^.]*)'
63
+ ]
64
+
65
+ skills_text = ""
66
+ for pattern in skills_indicators:
67
+ matches = re.findall(pattern, text, re.IGNORECASE)
68
+ for match in matches:
69
+ skills_text += " " + match
70
+
71
+ return skills_text.strip() if skills_text else ""
parsers/skill_extractor.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/skill_extractor.py - ENHANCED VERSION
2
+ import re
3
+ from parsers.skills_list import skills
4
+
5
+ def extract_skills(text):
6
+ """Extract known skills from text using dictionary matching"""
7
+ if not text:
8
+ return []
9
+
10
+ # Convert to lowercase for matching
11
+ text_lower = text.lower()
12
+ found_skills = []
13
+
14
+ # Enhanced skill extraction
15
+ for skill in skills:
16
+ skill_lower = skill.lower()
17
+
18
+ # Multiple matching strategies
19
+ patterns = [
20
+ rf'\b{re.escape(skill_lower)}\b', # Exact word boundary match
21
+ rf'{re.escape(skill_lower)}(?:\.\s*js|js)', # Handle variations like "node.js"
22
+ rf'{re.escape(skill_lower)}(?:\s*\.\s*\w+)?' # Handle extensions
23
+ ]
24
+
25
+ for pattern in patterns:
26
+ if re.search(pattern, text_lower):
27
+ found_skills.append(skill)
28
+ break
29
+
30
+ # Additional extraction for common variations
31
+ skill_variations = {
32
+ 'javascript': ['js', 'javascript', 'ecmascript'],
33
+ 'python': ['python', 'py'],
34
+ 'node.js': ['nodejs', 'node.js', 'node js'],
35
+ 'postgresql': ['postgres', 'postgresql', 'psql'],
36
+ 'kubernetes': ['k8s', 'kubernetes'],
37
+ 'docker': ['docker', 'containerization'],
38
+ 'ci/cd': ['ci/cd', 'cicd', 'continuous integration', 'continuous deployment']
39
+ }
40
+
41
+ for main_skill, variations in skill_variations.items():
42
+ for variation in variations:
43
+ if variation in text_lower and main_skill not in found_skills:
44
+ if main_skill in skills: # Only add if it's in our skills list
45
+ found_skills.append(main_skill)
46
+
47
+ # Remove duplicates and return
48
+ return list(set(found_skills))
49
+
50
+ def debug_skills_extraction(text):
51
+ """Debug version to see what's happening"""
52
+ print(f"🔍 Text length: {len(text)}")
53
+ print(f"🔍 First 300 chars: {text[:300]}")
54
+
55
+ # Check for obvious skills manually
56
+ obvious_skills = ['python', 'javascript', 'react', 'node.js', 'aws', 'docker']
57
+ found_obvious = [skill for skill in obvious_skills if skill.lower() in text.lower()]
58
+ print(f"🔍 Obvious skills found: {found_obvious}")
59
+
60
+ skills_found = extract_skills(text)
61
+ print(f"🔍 Total skills extracted: {len(skills_found)}")
62
+ print(f"🔍 Skills: {skills_found}")
63
+
64
+ return skills_found
parsers/skills_list.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/skills_list.py
2
+ skills = [
3
+ # Programming Languages
4
+ "python", "java", "javascript", "js", "typescript", "c++", "c#", "php", "go", "rust", "kotlin", "swift", "ruby",
5
+
6
+ # Web Technologies
7
+ "html", "css", "react", "angular", "vue", "nodejs", "node.js", "express", "django", "flask", "fastapi", "spring",
8
+
9
+ # Databases
10
+ "mysql", "postgresql", "postgres", "mongodb", "sqlite", "oracle", "redis", "cassandra", "dynamodb",
11
+
12
+ # Cloud & DevOps
13
+ "aws", "azure", "gcp", "google cloud", "docker", "kubernetes", "k8s", "terraform", "jenkins", "ci/cd", "cicd",
14
+
15
+ # Data Science & AI
16
+ "pandas", "numpy", "matplotlib", "seaborn", "scikit-learn", "sklearn", "tensorflow", "pytorch", "keras", "opencv",
17
+
18
+ # Frameworks & Libraries
19
+ "react", "angular", "vue", "jquery", "bootstrap", "tailwind", "material-ui", "redux",
20
+
21
+ # Tools & Technologies
22
+ "git", "github", "gitlab", "jira", "confluence", "slack", "trello", "figma", "photoshop",
23
+
24
+ # Operating Systems
25
+ "linux", "windows", "macos", "ubuntu", "centos",
26
+
27
+ # API & Protocols
28
+ "rest", "api", "graphql", "soap", "json", "xml", "http", "https",
29
+
30
+ # Testing
31
+ "junit", "pytest", "selenium", "cucumber", "postman", "jest",
32
+
33
+ # Methodologies
34
+ "agile", "scrum", "kanban", "devops", "microservices"
35
+ ]
parsers/smart_skill_extractor.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/smart_skill_extractor.py - AI-Powered Skill Detection
2
+ import re
3
+ from collections import Counter
4
+
5
+ class SmartSkillExtractor:
6
+ """AI-powered skill extraction that finds ANY skill mentioned in text"""
7
+
8
+ def __init__(self):
9
+ self.skill_database = self._load_comprehensive_skills()
10
+ self.patterns = self._create_extraction_patterns()
11
+ print(f"✅ Smart Skill Extractor loaded with {len(self.skill_database)} skills")
12
+
13
+ def _load_comprehensive_skills(self):
14
+ """Load comprehensive skill database covering all domains"""
15
+
16
+ # Programming Languages
17
+ programming = [
18
+ 'python', 'java', 'javascript', 'typescript', 'c++', 'c#', 'c', 'php', 'ruby', 'go', 'rust',
19
+ 'kotlin', 'swift', 'scala', 'r', 'matlab', 'perl', 'bash', 'powershell', 'sql', 'html',
20
+ 'css', 'sass', 'less', 'coffeescript', 'dart', 'elixir', 'erlang', 'f#', 'haskell',
21
+ 'julia', 'lua', 'objective-c', 'vb.net', 'assembly', 'cobol', 'fortran'
22
+ ]
23
+
24
+ # Frameworks & Libraries
25
+ frameworks = [
26
+ 'react', 'angular', 'vue', 'svelte', 'ember', 'backbone', 'jquery', 'bootstrap', 'tailwind',
27
+ 'django', 'flask', 'fastapi', 'express', 'nodejs', 'spring', 'hibernate', 'struts',
28
+ 'rails', 'sinatra', 'laravel', 'symfony', 'codeigniter', 'asp.net', 'entity framework',
29
+ 'xamarin', 'flutter', 'react native', 'ionic', 'cordova', 'electron', 'unity', 'unreal',
30
+ 'tensorflow', 'pytorch', 'keras', 'scikit-learn', 'pandas', 'numpy', 'matplotlib',
31
+ 'seaborn', 'plotly', 'opencv', 'nltk', 'spacy'
32
+ ]
33
+
34
+ # Databases
35
+ databases = [
36
+ 'mysql', 'postgresql', 'mongodb', 'redis', 'cassandra', 'elasticsearch', 'neo4j',
37
+ 'couchdb', 'dynamodb', 'firestore', 'sqlite', 'oracle', 'sql server', 'mariadb',
38
+ 'influxdb', 'clickhouse', 'bigquery', 'snowflake', 'redshift'
39
+ ]
40
+
41
+ # Cloud & DevOps
42
+ cloud_devops = [
43
+ 'aws', 'azure', 'gcp', 'docker', 'kubernetes', 'jenkins', 'gitlab ci', 'github actions',
44
+ 'terraform', 'ansible', 'puppet', 'chef', 'vagrant', 'consul', 'vault', 'prometheus',
45
+ 'grafana', 'elk stack', 'nginx', 'apache', 'tomcat',
46
+ 'linux', 'ubuntu', 'centos', 'windows server', 'git', 'svn'
47
+ ]
48
+
49
+ # Data Science & AI
50
+ data_ai = [
51
+ 'machine learning', 'deep learning', 'artificial intelligence', 'data science',
52
+ 'data analysis', 'data mining', 'big data', 'analytics', 'statistics', 'regression',
53
+ 'classification', 'clustering', 'nlp', 'computer vision', 'neural networks'
54
+ ]
55
+
56
+ # Business & Soft Skills
57
+ business_soft = [
58
+ 'project management', 'agile', 'scrum', 'kanban', 'leadership', 'communication',
59
+ 'teamwork', 'problem solving', 'time management', 'quality assurance',
60
+ 'business analysis', 'user research', 'ux design', 'ui design'
61
+ ]
62
+
63
+ # Tools & Platforms
64
+ tools = [
65
+ 'jira', 'confluence', 'slack', 'figma', 'photoshop', 'excel', 'powerpoint',
66
+ 'salesforce', 'google analytics', 'seo', 'automation', 'crm', 'erp'
67
+ ]
68
+
69
+ # Combine all skills
70
+ all_skills = (programming + frameworks + databases + cloud_devops +
71
+ data_ai + business_soft + tools)
72
+
73
+ # Create variations mapping
74
+ skill_variations = {}
75
+ for skill in all_skills:
76
+ variations = [skill, skill.replace(' ', ''), skill.replace(' ', '_'),
77
+ skill.replace(' ', '-'), skill.upper(), skill.lower()]
78
+
79
+ # Add common abbreviations
80
+ abbreviations = {
81
+ 'javascript': ['js', 'javascript'],
82
+ 'typescript': ['ts', 'typescript'],
83
+ 'artificial intelligence': ['ai', 'artificial intelligence'],
84
+ 'machine learning': ['ml', 'machine learning'],
85
+ 'amazon web services': ['aws', 'amazon web services'],
86
+ 'google cloud platform': ['gcp', 'google cloud'],
87
+ 'kubernetes': ['k8s', 'kubernetes'],
88
+ 'user experience': ['ux', 'user experience'],
89
+ 'user interface': ['ui', 'user interface'],
90
+ 'structured query language': ['sql', 'structured query language'],
91
+ 'cascading style sheets': ['css', 'cascading style sheets'],
92
+ 'hypertext markup language': ['html', 'hypertext markup language']
93
+ }
94
+
95
+ skill_key = skill.lower()
96
+ if skill_key in abbreviations:
97
+ variations.extend(abbreviations[skill_key])
98
+
99
+ for var in variations:
100
+ if var and len(var) > 1:
101
+ skill_variations[var.lower()] = skill
102
+
103
+ return skill_variations
104
+
105
+ def _create_extraction_patterns(self):
106
+ """Create regex patterns for skill extraction"""
107
+ return {
108
+ 'experience_with': r'\b(?:experience|expertise|proficient|skilled)\s+(?:in|with|using)\s+([a-zA-Z+#.\s-]+)\b',
109
+ 'years_exp': r'\b(\d+)\+?\s*(?:years?|yrs?)\s+(?:of\s+)?(?:experience|exp)\s+(?:in|with|using)\s+([a-zA-Z+#.\s-]+)\b',
110
+ 'worked_with': r'\b(?:worked|working|used|using)\s+(?:with|on)?\s*([a-zA-Z+#.\s-]+)\b',
111
+ 'technologies': r'\b(?:technologies|tools|frameworks|skills)[\s:]*([a-zA-Z+#.\s,-]+)\b',
112
+ 'skills': r'\b(?:skills?|competencies)[\s:]*([a-zA-Z+#.\s,-]+)\b'
113
+ }
114
+
115
+ def extract_skills_comprehensive(self, text):
116
+ """Extract skills using multiple techniques"""
117
+ if not text or len(text.strip()) < 10:
118
+ return []
119
+
120
+ found_skills = set()
121
+ text_lower = text.lower()
122
+
123
+ # Method 1: Direct skill matching
124
+ for skill_variant, canonical_skill in self.skill_database.items():
125
+ if skill_variant in text_lower:
126
+ # Verify it's a whole word match
127
+ pattern = r'\b' + re.escape(skill_variant) + r'\b'
128
+ if re.search(pattern, text_lower):
129
+ found_skills.add(canonical_skill)
130
+
131
+ # Method 2: Pattern-based extraction
132
+ for pattern_name, pattern in self.patterns.items():
133
+ matches = re.finditer(pattern, text_lower, re.IGNORECASE)
134
+ for match in matches:
135
+ if len(match.groups()) > 0 and match.group(1):
136
+ # Clean and process the captured group
137
+ skill_text = match.group(1).strip(' ,-')
138
+ extracted_skills = self._process_skill_text(skill_text)
139
+ found_skills.update(extracted_skills)
140
+
141
+ # Method 3: Context-based extraction
142
+ context_skills = self._extract_contextual_skills(text)
143
+ found_skills.update(context_skills)
144
+
145
+ return sorted(list(found_skills))
146
+
147
+ def _process_skill_text(self, skill_text):
148
+ """Process extracted skill text to find valid skills"""
149
+ skills = set()
150
+
151
+ # Split by common separators
152
+ parts = re.split(r'[,;/\|\n]', skill_text)
153
+
154
+ for part in parts:
155
+ part = part.strip(' ,-()[]{}')
156
+ if len(part) > 1:
157
+ # Check if it's in our skill database
158
+ part_lower = part.lower()
159
+ if part_lower in self.skill_database:
160
+ skills.add(self.skill_database[part_lower])
161
+
162
+ # Check individual words
163
+ words = part.split()
164
+ for word in words:
165
+ word = word.strip(' ,-()[]{}').lower()
166
+ if word in self.skill_database:
167
+ skills.add(self.skill_database[word])
168
+
169
+ return skills
170
+
171
+ def _extract_contextual_skills(self, text):
172
+ """Extract skills based on context clues"""
173
+ skills = set()
174
+
175
+ # Look for skills in specific sections
176
+ section_patterns = {
177
+ r'(?:technical\s+)?skills?[\s:]+([^.]+)': 'skills_section',
178
+ r'technologies?[\s:]+([^.]+)': 'tech_section',
179
+ r'tools?[\s:]+([^.]+)': 'tools_section'
180
+ }
181
+
182
+ for pattern, section_type in section_patterns.items():
183
+ matches = re.finditer(pattern, text, re.IGNORECASE)
184
+ for match in matches:
185
+ if len(match.groups()) > 0:
186
+ content = match.group(1)
187
+ # Extract skills from this section
188
+ section_skills = self._process_skill_text(content)
189
+ skills.update(section_skills)
190
+
191
+ return skills
192
+
193
+ def get_skill_categories(self, skills):
194
+ """Categorize extracted skills"""
195
+ categories = {
196
+ 'Programming Languages': [],
197
+ 'Frameworks & Libraries': [],
198
+ 'Databases': [],
199
+ 'Cloud & DevOps': [],
200
+ 'Data Science & AI': [],
201
+ 'Business & Soft Skills': [],
202
+ 'Tools & Platforms': []
203
+ }
204
+
205
+ # Simple categorization based on skill type
206
+ for skill in skills:
207
+ skill_lower = skill.lower()
208
+
209
+ if any(lang in skill_lower for lang in ['python', 'java', 'javascript', 'c++', 'php', 'ruby']):
210
+ categories['Programming Languages'].append(skill)
211
+ elif any(fw in skill_lower for fw in ['react', 'angular', 'django', 'spring', 'tensorflow']):
212
+ categories['Frameworks & Libraries'].append(skill)
213
+ elif any(db in skill_lower for db in ['mysql', 'mongodb', 'postgresql', 'redis']):
214
+ categories['Databases'].append(skill)
215
+ elif any(cloud in skill_lower for cloud in ['aws', 'azure', 'docker', 'kubernetes']):
216
+ categories['Cloud & DevOps'].append(skill)
217
+ elif any(ai in skill_lower for ai in ['machine learning', 'ai', 'data science', 'analytics']):
218
+ categories['Data Science & AI'].append(skill)
219
+ elif any(tool in skill_lower for tool in ['jira', 'figma', 'photoshop', 'excel']):
220
+ categories['Tools & Platforms'].append(skill)
221
+ else:
222
+ categories['Business & Soft Skills'].append(skill)
223
+
224
+ # Remove empty categories
225
+ return {k: v for k, v in categories.items() if v}
226
+
227
+ # Test function
228
+ def test_smart_extractor():
229
+ """Test the smart skill extractor"""
230
+ extractor = SmartSkillExtractor()
231
+
232
+ test_text = """
233
+ John Doe - Software Engineer
234
+ Skills: Python, JavaScript, React, MySQL, AWS
235
+ Experience: 3 years of experience in full-stack development
236
+ """
237
+
238
+ skills = extractor.extract_skills_comprehensive(test_text)
239
+ print(f"✅ Extracted {len(skills)} skills: {skills}")
240
+
241
+ return len(skills) > 0
242
+
243
+ if __name__ == "__main__":
244
+ test_smart_extractor()
parsers/universal_parser.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parsers/universal_parser.py - Universal Resume Parser
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+
6
+ class UniversalResumeParser:
7
+ """Universal parser that handles multiple resume formats"""
8
+
9
+ def __init__(self):
10
+ self.supported_formats = {
11
+ '.pdf': self._extract_from_pdf,
12
+ '.docx': self._extract_from_docx,
13
+ '.txt': self._extract_from_txt,
14
+ '.doc': self._extract_from_doc
15
+ }
16
+ print("✅ Universal Resume Parser initialized")
17
+
18
+ def extract_text(self, file_path):
19
+ """Extract text from any supported file format"""
20
+ try:
21
+ file_ext = Path(file_path).suffix.lower()
22
+
23
+ if file_ext not in self.supported_formats:
24
+ # Fallback to text reading
25
+ try:
26
+ with open(file_path, 'r', encoding='utf-8') as f:
27
+ return f.read()
28
+ except:
29
+ raise ValueError(f"Unsupported format: {file_ext}")
30
+
31
+ print(f"🔍 Processing {file_ext} file...")
32
+
33
+ # Use appropriate extractor
34
+ extractor = self.supported_formats[file_ext]
35
+ text = extractor(file_path)
36
+
37
+ # Clean text
38
+ enhanced_text = self._enhance_extracted_text(text)
39
+
40
+ print(f"✅ Extracted {len(enhanced_text)} characters")
41
+ return enhanced_text
42
+
43
+ except Exception as e:
44
+ print(f"❌ Extraction failed: {e}")
45
+ # Try basic text reading as fallback
46
+ try:
47
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
48
+ return f.read()
49
+ except:
50
+ return f"Error extracting from {file_path}: {str(e)}"
51
+
52
+ def _extract_from_pdf(self, file_path):
53
+ """Extract from PDF using existing function"""
54
+ try:
55
+ from parsers.pdf_parser import extract_text_pymupdf
56
+ return extract_text_pymupdf(file_path)
57
+ except ImportError:
58
+ # Fallback if PyMuPDF not available
59
+ try:
60
+ import fitz
61
+ doc = fitz.open(file_path)
62
+ text = ""
63
+ for page in doc:
64
+ text += page.get_text()
65
+ doc.close()
66
+ return text
67
+ except ImportError:
68
+ return "PDF extraction requires PyMuPDF package"
69
+ except Exception as e:
70
+ return f"PDF extraction error: {str(e)}"
71
+
72
+ def _extract_from_docx(self, file_path):
73
+ """Extract from DOCX using existing function"""
74
+ try:
75
+ from parsers.docx_parser import extract_text_docx
76
+ return extract_text_docx(file_path)
77
+ except ImportError:
78
+ try:
79
+ import docx
80
+ doc = docx.Document(file_path)
81
+ text = ""
82
+ for paragraph in doc.paragraphs:
83
+ text += paragraph.text + "\n"
84
+ return text
85
+ except ImportError:
86
+ return "DOCX extraction requires python-docx package"
87
+ except Exception as e:
88
+ return f"DOCX extraction error: {str(e)}"
89
+
90
+ def _extract_from_txt(self, file_path):
91
+ """Extract from text file with encoding detection"""
92
+ encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
93
+
94
+ for encoding in encodings:
95
+ try:
96
+ with open(file_path, 'r', encoding=encoding) as f:
97
+ return f.read()
98
+ except UnicodeDecodeError:
99
+ continue
100
+
101
+ # If all encodings fail
102
+ try:
103
+ with open(file_path, 'rb') as f:
104
+ raw_data = f.read()
105
+ return raw_data.decode('utf-8', errors='ignore')
106
+ except Exception as e:
107
+ return f"Text extraction error: {str(e)}"
108
+
109
+ def _extract_from_doc(self, file_path):
110
+ """Extract from legacy DOC format"""
111
+ try:
112
+ import docx2txt
113
+ text = docx2txt.process(file_path)
114
+ return text
115
+ except ImportError:
116
+ return "DOC format requires docx2txt package (pip install docx2txt)"
117
+ except Exception as e:
118
+ return f"DOC extraction error: {str(e)}"
119
+
120
+ def _enhance_extracted_text(self, text):
121
+ """Clean and enhance extracted text"""
122
+ if not text or len(text.strip()) < 10:
123
+ return text
124
+
125
+ # Remove excessive whitespace
126
+ text = re.sub(r'\n\s*\n', '\n\n', text)
127
+ text = re.sub(r'[ \t]+', ' ', text)
128
+
129
+ # Fix common extraction issues
130
+ text = re.sub(r'([a-zA-Z0-9._%+-]+)\s*@\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', r'\1@\2', text)
131
+ text = re.sub(r'(\d{3})\s*-?\s*(\d{3})\s*-?\s*(\d{4})', r'\1-\2-\3', text)
132
+
133
+ return text.strip()
134
+
135
+ def test_universal_parser():
136
+ """Test the universal parser"""
137
+ parser = UniversalResumeParser()
138
+ test_text = "Test resume text"
139
+ enhanced = parser._enhance_extracted_text(test_text)
140
+ print("✅ Universal parser test completed")
141
+ return True
142
+
143
+ if __name__ == "__main__":
144
+ test_universal_parser()
scoring/__pycache__/relevance_scorer.cpython-312.pyc ADDED
Binary file (12.3 kB). View file
 
scoring/relevance_scorer.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # scoring/relevance_scorer.py - Job-Specific Resume Relevance Scoring
2
+ from dataclasses import dataclass
3
+ from typing import Dict, List, Tuple
4
+ import re
5
+
6
+ @dataclass
7
+ class RelevanceScore:
8
+ """Structured relevance scoring result"""
9
+ overall_score: float # 0-100
10
+ skill_match_score: float
11
+ experience_match_score: float
12
+ education_match_score: float
13
+
14
+ matched_must_have: List[str]
15
+ matched_good_to_have: List[str]
16
+ missing_must_have: List[str]
17
+ missing_good_to_have: List[str]
18
+
19
+ experience_gap: str
20
+ education_gap: List[str]
21
+
22
+ fit_verdict: str # High/Medium/Low
23
+ confidence_score: float
24
+
25
+ improvement_suggestions: List[str]
26
+ quick_wins: List[str]
27
+ long_term_goals: List[str]
28
+
29
+ class JobRelevanceScorer:
30
+ """Score resume relevance against specific job requirements"""
31
+
32
+ def __init__(self):
33
+ self.scoring_weights = {
34
+ 'must_have_skills': 0.40, # 40% weight
35
+ 'experience': 0.25, # 25% weight
36
+ 'good_to_have_skills': 0.15, # 15% weight
37
+ 'education': 0.20 # 20% weight
38
+ }
39
+ print("✅ Job Relevance Scorer initialized")
40
+
41
+ def calculate_relevance(self, resume_text: str, job_req) -> RelevanceScore:
42
+ """Calculate comprehensive relevance score against job requirements"""
43
+
44
+ print(f"🎯 Scoring relevance for: {getattr(job_req, 'role_title', 'Unknown Role')}")
45
+
46
+ # Extract resume information
47
+ from parsers.smart_skill_extractor import SmartSkillExtractor
48
+ skill_extractor = SmartSkillExtractor()
49
+ resume_skills = skill_extractor.extract_skills_comprehensive(resume_text)
50
+
51
+ resume_experience = self._extract_experience_years(resume_text)
52
+ resume_education = self._extract_education_level(resume_text)
53
+
54
+ # Get job requirements
55
+ must_have_skills = getattr(job_req, 'must_have_skills', [])
56
+ good_to_have_skills = getattr(job_req, 'good_to_have_skills', [])
57
+ required_experience = getattr(job_req, 'experience_required', '')
58
+ required_education = getattr(job_req, 'education_required', [])
59
+
60
+ # Calculate component scores
61
+ skill_score, skill_matches = self._score_skills(
62
+ resume_skills, must_have_skills, good_to_have_skills
63
+ )
64
+
65
+ experience_score, exp_gap = self._score_experience(
66
+ resume_experience, required_experience
67
+ )
68
+
69
+ education_score, edu_gap = self._score_education(
70
+ resume_education, required_education
71
+ )
72
+
73
+ # Calculate weighted overall score
74
+ overall_score = (
75
+ skill_score * self.scoring_weights['must_have_skills'] +
76
+ experience_score * self.scoring_weights['experience'] +
77
+ education_score * self.scoring_weights['education']
78
+ )
79
+
80
+ # Add good-to-have bonus
81
+ good_to_have_bonus = len(skill_matches['matched_good_to_have']) * 2
82
+ overall_score = min(100, overall_score + good_to_have_bonus)
83
+
84
+ # Determine fit verdict
85
+ fit_verdict, confidence = self._determine_fit_verdict(
86
+ overall_score, skill_matches, experience_score
87
+ )
88
+
89
+ # Generate improvement suggestions
90
+ suggestions = self._generate_improvement_suggestions(
91
+ skill_matches, exp_gap, edu_gap, job_req
92
+ )
93
+
94
+ return RelevanceScore(
95
+ overall_score=round(overall_score, 1),
96
+ skill_match_score=round(skill_score, 1),
97
+ experience_match_score=round(experience_score, 1),
98
+ education_match_score=round(education_score, 1),
99
+
100
+ matched_must_have=skill_matches['matched_must_have'],
101
+ matched_good_to_have=skill_matches['matched_good_to_have'],
102
+ missing_must_have=skill_matches['missing_must_have'],
103
+ missing_good_to_have=skill_matches['missing_good_to_have'],
104
+
105
+ experience_gap=exp_gap,
106
+ education_gap=edu_gap,
107
+
108
+ fit_verdict=fit_verdict,
109
+ confidence_score=confidence,
110
+
111
+ improvement_suggestions=suggestions['main'],
112
+ quick_wins=suggestions['quick_wins'],
113
+ long_term_goals=suggestions['long_term']
114
+ )
115
+
116
+ def _score_skills(self, resume_skills: List[str], must_have: List[str],
117
+ good_to_have: List[str]) -> Tuple[float, Dict]:
118
+ """Score skill matching against job requirements"""
119
+
120
+ resume_skills_lower = [skill.lower() for skill in resume_skills]
121
+
122
+ # Match must-have skills
123
+ matched_must_have = []
124
+ missing_must_have = []
125
+
126
+ for skill in must_have:
127
+ skill_lower = skill.lower()
128
+ if any(skill_lower in resume_skill for resume_skill in resume_skills_lower):
129
+ matched_must_have.append(skill)
130
+ else:
131
+ missing_must_have.append(skill)
132
+
133
+ # Match good-to-have skills
134
+ matched_good_to_have = []
135
+ missing_good_to_have = []
136
+
137
+ for skill in good_to_have:
138
+ skill_lower = skill.lower()
139
+ if any(skill_lower in resume_skill for resume_skill in resume_skills_lower):
140
+ matched_good_to_have.append(skill)
141
+ else:
142
+ missing_good_to_have.append(skill)
143
+
144
+ # Calculate skill score
145
+ if not must_have:
146
+ must_have_score = 100
147
+ else:
148
+ must_have_score = (len(matched_must_have) / len(must_have)) * 100
149
+
150
+ return must_have_score, {
151
+ 'matched_must_have': matched_must_have,
152
+ 'matched_good_to_have': matched_good_to_have,
153
+ 'missing_must_have': missing_must_have,
154
+ 'missing_good_to_have': missing_good_to_have
155
+ }
156
+
157
+ def _score_experience(self, resume_exp: int, required_exp: str) -> Tuple[float, str]:
158
+ """Score experience matching"""
159
+
160
+ req_years = self._parse_experience_requirement(required_exp)
161
+
162
+ if req_years is None:
163
+ return 100, "Experience requirement not specified"
164
+
165
+ if resume_exp >= req_years:
166
+ if resume_exp <= req_years + 2:
167
+ score = 100
168
+ gap = f"Perfect match ({resume_exp} years vs {req_years} required)"
169
+ else:
170
+ score = 95
171
+ gap = f"Overqualified ({resume_exp} years vs {req_years} required)"
172
+ else:
173
+ gap_years = req_years - resume_exp
174
+ if gap_years == 1:
175
+ score = 75
176
+ gap = f"1 year short ({resume_exp} years vs {req_years} required)"
177
+ elif gap_years == 2:
178
+ score = 50
179
+ gap = f"2 years short ({resume_exp} years vs {req_years} required)"
180
+ else:
181
+ score = 25
182
+ gap = f"{gap_years} years short ({resume_exp} years vs {req_years} required)"
183
+
184
+ return score, gap
185
+
186
+ def _score_education(self, resume_edu: List[str], required_edu: List[str]) -> Tuple[float, List[str]]:
187
+ """Score education matching"""
188
+
189
+ if not required_edu or "any graduate" in " ".join(required_edu).lower():
190
+ return 100, []
191
+
192
+ resume_edu_lower = [edu.lower() for edu in resume_edu]
193
+
194
+ matched = False
195
+ gaps = []
196
+
197
+ for req_edu in required_edu:
198
+ req_edu_lower = req_edu.lower()
199
+ found_match = False
200
+ for res_edu in resume_edu_lower:
201
+ if any(word in res_edu for word in req_edu_lower.split() if len(word) > 2):
202
+ matched = True
203
+ found_match = True
204
+ break
205
+
206
+ if not found_match:
207
+ gaps.append(req_edu)
208
+
209
+ score = 100 if matched and not gaps else (80 if matched else 30)
210
+ return score, gaps
211
+
212
+ def _extract_experience_years(self, resume_text: str) -> int:
213
+ """Extract years of experience from resume"""
214
+
215
+ patterns = [
216
+ r'(\d+)[\+\s]*years?\s+(?:of\s+)?(?:experience|exp)',
217
+ r'(?:experience|exp)[\s:]*(\d+)[\+\s]*years?',
218
+ r'(\d+)[\+\s]*years?\s+(?:in|with)'
219
+ ]
220
+
221
+ years = []
222
+ for pattern in patterns:
223
+ matches = re.findall(pattern, resume_text, re.IGNORECASE)
224
+ years.extend([int(match) for match in matches if match.isdigit()])
225
+
226
+ return max(years) if years else 0
227
+
228
+ def _extract_education_level(self, resume_text: str) -> List[str]:
229
+ """Extract education from resume"""
230
+
231
+ patterns = [
232
+ r'bachelor[^.\n]*',
233
+ r'master[^.\n]*',
234
+ r'b\.?tech[^.\n]*',
235
+ r'm\.?tech[^.\n]*',
236
+ r'bca[^.\n]*',
237
+ r'mca[^.\n]*'
238
+ ]
239
+
240
+ education = []
241
+ for pattern in patterns:
242
+ matches = re.findall(pattern, resume_text, re.IGNORECASE)
243
+ education.extend(matches)
244
+
245
+ return education
246
+
247
+ def _parse_experience_requirement(self, exp_req: str) -> int:
248
+ """Parse experience requirement string to years"""
249
+
250
+ if not exp_req or exp_req.lower() == "not specified":
251
+ return None
252
+
253
+ numbers = re.findall(r'\d+', exp_req)
254
+
255
+ if not numbers:
256
+ return None
257
+
258
+ return int(numbers[0])
259
+
260
+ def _determine_fit_verdict(self, overall_score: float, skill_matches: Dict,
261
+ experience_score: float) -> Tuple[str, float]:
262
+ """Determine fit verdict and confidence"""
263
+
264
+ must_have_count = len(skill_matches['matched_must_have']) + len(skill_matches['missing_must_have'])
265
+ must_have_ratio = len(skill_matches['matched_must_have']) / max(1, must_have_count)
266
+
267
+ confidence = min(100, (must_have_ratio * 50) + (experience_score * 0.3) + (overall_score * 0.2))
268
+
269
+ if overall_score >= 80 and must_have_ratio >= 0.8:
270
+ verdict = "High Suitability"
271
+ elif overall_score >= 60 and must_have_ratio >= 0.6:
272
+ verdict = "Medium Suitability"
273
+ elif overall_score >= 40:
274
+ verdict = "Low-Medium Suitability"
275
+ else:
276
+ verdict = "Low Suitability"
277
+
278
+ return verdict, round(confidence, 1)
279
+
280
+ def _generate_improvement_suggestions(self, skill_matches: Dict, exp_gap: str,
281
+ edu_gap: List[str], job_req) -> Dict[str, List[str]]:
282
+ """Generate personalized improvement suggestions"""
283
+
284
+ main_suggestions = []
285
+ quick_wins = []
286
+ long_term_goals = []
287
+
288
+ # Skill suggestions
289
+ missing_must_have = skill_matches['missing_must_have']
290
+ if missing_must_have:
291
+ main_suggestions.append(f"Acquire critical skills: {', '.join(missing_must_have[:3])}")
292
+ quick_wins.append(f"Start learning: {', '.join(missing_must_have[:2])}")
293
+
294
+ # Experience suggestions
295
+ if "short" in exp_gap:
296
+ quick_wins.append("Gain experience through projects and internships")
297
+
298
+ # Education suggestions
299
+ if edu_gap:
300
+ long_term_goals.append("Consider relevant degree or certification")
301
+
302
+ return {
303
+ 'main': main_suggestions[:5],
304
+ 'quick_wins': quick_wins[:5],
305
+ 'long_term': long_term_goals[:3]
306
+ }
307
+
308
+ def test_relevance_scorer():
309
+ """Test the relevance scorer"""
310
+ print("✅ Relevance scorer test completed")
311
+ return True
312
+
313
+ if __name__ == "__main__":
314
+ test_relevance_scorer()