Spaces:

kushal2006
/

hackathongenai

Sleeping

App Files Files Community

kushal2006 commited on Sep 21, 2025

Commit

5aab0ac

verified ·

1 Parent(s): 3083de5

Upload 46 files

Browse files

Files changed (46) hide show

config/skills.yaml +12 -0
llm_analysis/__init__.py +0 -0
llm_analysis/__pycache__/__init__.cpython-312.pyc +0 -0
llm_analysis/__pycache__/langgraph_pipeline.cpython-312.pyc +0 -0
llm_analysis/__pycache__/langsmith_logger.cpython-312.pyc +0 -0
llm_analysis/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
llm_analysis/__pycache__/prompt_templates.cpython-312.pyc +0 -0
llm_analysis/langgraph_pipeline.py +225 -0
llm_analysis/langsmith_logger.py +265 -0
llm_analysis/llm_analyzer.py +170 -0
llm_analysis/prompt_templates.py +83 -0
logs/langsmith_metrics.jsonl +57 -0
logs/langsmith_traces.jsonl +120 -0
matchers/__init__.py +0 -0
matchers/__pycache__/__init__.cpython-312.pyc +0 -0
matchers/__pycache__/final_scorer.cpython-312.pyc +0 -0
matchers/__pycache__/hard_matcher.cpython-312.pyc +0 -0
matchers/__pycache__/semantic_matcher.cpython-312.pyc +0 -0
matchers/entity_extractor.py +160 -0
matchers/final_scorer.py +73 -0
matchers/fuzzy_matcher.py +117 -0
matchers/hard_matcher.py +47 -0
matchers/semantic_matcher.py +37 -0
parsers/__iniy__.py +0 -0
parsers/__pycache__/cleaner.cpython-312.pyc +0 -0
parsers/__pycache__/docx_parser.cpython-312.pyc +0 -0
parsers/__pycache__/jd_parser.cpython-312.pyc +0 -0
parsers/__pycache__/job_requirement_parser.cpython-312.pyc +0 -0
parsers/__pycache__/pdf_parser.cpython-312.pyc +0 -0
parsers/__pycache__/section_splitter.cpython-312.pyc +0 -0
parsers/__pycache__/skill_extractor.cpython-312.pyc +0 -0
parsers/__pycache__/skills_list.cpython-312.pyc +0 -0
parsers/__pycache__/smart_skill_extractor.cpython-312.pyc +0 -0
parsers/cleaner.py +7 -0
parsers/docx_parser.py +5 -0
parsers/entity_extractor.py +33 -0
parsers/jd_parser.py +20 -0
parsers/job_requirement_parser.py +449 -0
parsers/pdf_parser.py +25 -0
parsers/section_splitter.py +71 -0
parsers/skill_extractor.py +64 -0
parsers/skills_list.py +35 -0
parsers/smart_skill_extractor.py +244 -0
parsers/universal_parser.py +144 -0
scoring/__pycache__/relevance_scorer.cpython-312.pyc +0 -0
scoring/relevance_scorer.py +314 -0

config/skills.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+skills:
+  - python
+  - java
+  - c++
+  - sql
+  - aws
+  - docker
+  - kubernetes
+  - tensorflow
+  - pytorch
+  - react
+  - node.js

llm_analysis/__init__.py ADDED Viewed

File without changes

llm_analysis/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (154 Bytes). View file

llm_analysis/__pycache__/langgraph_pipeline.cpython-312.pyc ADDED Viewed

Binary file (9.35 kB). View file

llm_analysis/__pycache__/langsmith_logger.cpython-312.pyc ADDED Viewed

Binary file (11.6 kB). View file

llm_analysis/__pycache__/llm_analyzer.cpython-312.pyc ADDED Viewed

Binary file (8.66 kB). View file

llm_analysis/__pycache__/prompt_templates.cpython-312.pyc ADDED Viewed

Binary file (3.76 kB). View file

llm_analysis/langgraph_pipeline.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# llm_analysis/langgraph_pipeline.py - Structured Analysis Pipeline
+from langgraph.graph import StateGraph, END
+from typing import Dict, List, TypedDict
+import json
+from llm_analysis.llm_analyzer import LLMResumeAnalyzer
+class AnalysisState(TypedDict):
+    """State object for the analysis pipeline"""
+    resume_text: str
+    jd_text: str
+    basic_scores: Dict
+    enhanced_skills: Dict
+    llm_analysis: Dict
+    improvement_roadmap: Dict
+    final_result: Dict
+    current_step: str
+    errors: List[str]
+class ResumeAnalysisPipeline:
+    """LangGraph-powered structured analysis pipeline"""
+    def __init__(self, model="x-ai/grok-4-fast:free"):
+        self.llm_analyzer = LLMResumeAnalyzer(model=model)
+        self.graph = self._create_pipeline()
+        print("✅ LangGraph pipeline initialized")
+    def _create_pipeline(self):
+        """Create the structured analysis pipeline"""
+        # Define the workflow graph
+        workflow = StateGraph(AnalysisState)
+        # Add nodes (analysis steps)
+        workflow.add_node("skills_extraction", self._extract_skills_node)
+        workflow.add_node("llm_analysis", self._llm_analysis_node)
+        workflow.add_node("roadmap_generation", self._roadmap_generation_node)
+        workflow.add_node("final_compilation", self._final_compilation_node)
+        workflow.add_node("error_handler", self._error_handler_node)
+        # Define the flow
+        workflow.set_entry_point("skills_extraction")
+        # Add edges (flow control)
+        workflow.add_edge("skills_extraction", "llm_analysis")
+        workflow.add_edge("llm_analysis", "roadmap_generation")
+        workflow.add_edge("roadmap_generation", "final_compilation")
+        workflow.add_edge("final_compilation", END)
+        workflow.add_edge("error_handler", END)
+        # Add conditional edges for error handling
+        workflow.add_conditional_edges(
+            "skills_extraction",
+            self._should_continue,
+            {
+                "continue": "llm_analysis",
+                "error": "error_handler"
+            }
+        )
+        workflow.add_conditional_edges(
+            "llm_analysis",
+            self._should_continue,
+            {
+                "continue": "roadmap_generation",
+                "error": "error_handler"
+            }
+        )
+        return workflow.compile()
+    def _should_continue(self, state: AnalysisState) -> str:
+        """Decide whether to continue or handle errors"""
+        if state.get("errors"):
+            return "error"
+        return "continue"
+    def _extract_skills_node(self, state: AnalysisState) -> AnalysisState:
+        """Node 1: Enhanced skills extraction"""
+        try:
+            state["current_step"] = "skills_extraction"
+            print("🔍 LangGraph: Extracting skills...")
+            # Enhanced skills extraction
+            enhanced_skills = self.llm_analyzer.enhance_skills_extraction(state["resume_text"])
+            state["enhanced_skills"] = enhanced_skills
+            print("✅ LangGraph: Skills extraction completed")
+            return state
+        except Exception as e:
+            state["errors"].append(f"Skills extraction failed: {str(e)}")
+            return state
+    def _llm_analysis_node(self, state: AnalysisState) -> AnalysisState:
+        """Node 2: LLM analysis"""
+        try:
+            state["current_step"] = "llm_analysis"
+            print("🧠 LangGraph: Running LLM analysis...")
+            # LLM analysis
+            llm_analysis = self.llm_analyzer.analyze_resume_vs_jd(
+                state["resume_text"],
+                state["jd_text"],
+                state["basic_scores"]
+            )
+            state["llm_analysis"] = llm_analysis
+            print("✅ LangGraph: LLM analysis completed")
+            return state
+        except Exception as e:
+            state["errors"].append(f"LLM analysis failed: {str(e)}")
+            return state
+    def _roadmap_generation_node(self, state: AnalysisState) -> AnalysisState:
+        """Node 3: Improvement roadmap generation"""
+        try:
+            state["current_step"] = "roadmap_generation"
+            print("🗺️ LangGraph: Generating improvement roadmap...")
+            # Generate roadmap
+            roadmap = self.llm_analyzer.generate_improvement_roadmap(state["llm_analysis"])
+            state["improvement_roadmap"] = roadmap
+            print("✅ LangGraph: Roadmap generation completed")
+            return state
+        except Exception as e:
+            state["errors"].append(f"Roadmap generation failed: {str(e)}")
+            return state
+    def _final_compilation_node(self, state: AnalysisState) -> AnalysisState:
+        """Node 4: Final result compilation"""
+        try:
+            state["current_step"] = "final_compilation"
+            print("📊 LangGraph: Compiling final results...")
+            # Compile final result
+            final_result = {
+                "basic_scores": state["basic_scores"],
+                "enhanced_skills": state["enhanced_skills"],
+                "llm_analysis": state["llm_analysis"],
+                "improvement_roadmap": state["improvement_roadmap"],
+                "pipeline_status": "completed",
+                "processing_steps": ["skills_extraction", "llm_analysis", "roadmap_generation", "compilation"]
+            }
+            state["final_result"] = final_result
+            print("✅ LangGraph: Pipeline completed successfully")
+            return state
+        except Exception as e:
+            state["errors"].append(f"Final compilation failed: {str(e)}")
+            return state
+    def _error_handler_node(self, state: AnalysisState) -> AnalysisState:
+        """Error handling node"""
+        print(f"❌ LangGraph: Handling errors - {len(state['errors'])} error(s)")
+        state["final_result"] = {
+            "pipeline_status": "failed",
+            "errors": state["errors"],
+            "last_successful_step": state.get("current_step", "unknown"),
+            "partial_results": {
+                "basic_scores": state.get("basic_scores", {}),
+                "enhanced_skills": state.get("enhanced_skills", {}),
+                "llm_analysis": state.get("llm_analysis", {}),
+                "improvement_roadmap": state.get("improvement_roadmap", {})
+            }
+        }
+        return state
+    def run_structured_analysis(self, resume_text: str, jd_text: str, basic_scores: Dict) -> Dict:
+        """Run the complete structured analysis pipeline"""
+        print("🚀 Starting LangGraph structured analysis pipeline...")
+        # Initialize state
+        initial_state = AnalysisState(
+            resume_text=resume_text,
+            jd_text=jd_text,
+            basic_scores=basic_scores,
+            enhanced_skills={},
+            llm_analysis={},
+            improvement_roadmap={},
+            final_result={},
+            current_step="initializing",
+            errors=[]
+        )
+        # Run the pipeline
+        try:
+            final_state = self.graph.invoke(initial_state)
+            print("✅ LangGraph pipeline execution completed")
+            return final_state["final_result"]
+        except Exception as e:
+            print(f"❌ LangGraph pipeline failed: {e}")
+            return {
+                "pipeline_status": "critical_failure",
+                "error": str(e),
+                "basic_scores": basic_scores
+            }
+# Test function
+def test_langgraph_pipeline():
+    """Test the LangGraph pipeline"""
+    pipeline = ResumeAnalysisPipeline()
+    sample_resume = "Python developer with React experience"
+    sample_jd = "Looking for Python developer with React skills"
+    sample_basic_scores = {
+        "score": 75,
+        "matched_skills": ["python", "react"],
+        "missing_skills": ["docker"],
+        "matched_count": 2,
+        "total_jd_skills": 3
+    }
+    result = pipeline.run_structured_analysis(sample_resume, sample_jd, sample_basic_scores)
+    print(f"✅ LangGraph test completed: {result.get('pipeline_status', 'unknown')}")
+    return result.get('pipeline_status') == 'completed'
+if __name__ == "__main__":
+    test_langgraph_pipeline()

llm_analysis/langsmith_logger.py ADDED Viewed

	@@ -0,0 +1,265 @@

+# llm_analysis/langsmith_logger.py - LangSmith Observability & Debugging
+import os
+import json
+from datetime import datetime
+from typing import Dict, Any, Optional
+import uuid
+# Note: LangSmith requires API key for full functionality
+# For hackathon demo, we'll create a local logging system that mimics LangSmith
+class LangSmithLogger:
+    """LangSmith-style logging and observability for LLM chains"""
+    def __init__(self, project_name="resume-relevance-system"):
+        self.project_name = project_name
+        self.session_id = str(uuid.uuid4())
+        self.logs_dir = "logs"
+        os.makedirs(self.logs_dir, exist_ok=True)
+        # Initialize log files
+        self.trace_log = f"{self.logs_dir}/langsmith_traces.jsonl"
+        self.metrics_log = f"{self.logs_dir}/langsmith_metrics.jsonl"
+        print(f"✅ LangSmith Logger initialized - Project: {project_name}")
+        print(f"📊 Session ID: {self.session_id}")
+    def start_trace(self, trace_name: str, inputs: Dict[str, Any]) -> str:
+        """Start a new trace for an LLM chain"""
+        trace_id = str(uuid.uuid4())
+        trace_start = {
+            "trace_id": trace_id,
+            "session_id": self.session_id,
+            "project_name": self.project_name,
+            "trace_name": trace_name,
+            "start_time": datetime.utcnow().isoformat(),
+            "inputs": inputs,
+            "status": "started",
+            "type": "trace_start"
+        }
+        self._log_event(trace_start, self.trace_log)
+        print(f"🔍 LangSmith: Started trace '{trace_name}' - ID: {trace_id[:8]}...")
+        return trace_id
+    def end_trace(self, trace_id: str, outputs: Dict[str, Any],
+                  status: str = "success", error: Optional[str] = None,
+                  token_usage: Optional[Dict] = None):
+        """End a trace with results"""
+        trace_end = {
+            "trace_id": trace_id,
+            "session_id": self.session_id,
+            "end_time": datetime.utcnow().isoformat(),
+            "outputs": outputs,
+            "status": status,
+            "error": error,
+            "token_usage": token_usage or {},
+            "type": "trace_end"
+        }
+        self._log_event(trace_end, self.trace_log)
+        status_emoji = "✅" if status == "success" else "❌"
+        print(f"{status_emoji} LangSmith: Ended trace {trace_id[:8]}... - Status: {status}")
+    def log_llm_call(self, trace_id: str, step_name: str,
+                     prompt: str, response: str, model: str,
+                     latency_ms: float, token_usage: Optional[Dict] = None):
+        """Log an individual LLM call within a trace"""
+        llm_call = {
+            "trace_id": trace_id,
+            "step_name": step_name,
+            "timestamp": datetime.utcnow().isoformat(),
+            "model": model,
+            "prompt": prompt[:500] + "..." if len(prompt) > 500 else prompt,  # Truncate long prompts
+            "response": response[:500] + "..." if len(response) > 500 else response,
+            "latency_ms": latency_ms,
+            "token_usage": token_usage or {},
+            "type": "llm_call"
+        }
+        self._log_event(llm_call, self.trace_log)
+        print(f"🤖 LangSmith: LLM call logged - {step_name} ({latency_ms:.1f}ms)")
+    def log_metrics(self, metrics: Dict[str, Any]):
+        """Log performance metrics"""
+        metric_entry = {
+            "session_id": self.session_id,
+            "timestamp": datetime.utcnow().isoformat(),
+            "metrics": metrics,
+            "type": "metrics"
+        }
+        self._log_event(metric_entry, self.metrics_log)
+        print(f"📊 LangSmith: Metrics logged - {list(metrics.keys())}")
+    def log_evaluation(self, trace_id: str, evaluation_results: Dict[str, Any]):
+        """Log evaluation results for testing and debugging"""
+        evaluation = {
+            "trace_id": trace_id,
+            "timestamp": datetime.utcnow().isoformat(),
+            "evaluation_results": evaluation_results,
+            "type": "evaluation"
+        }
+        self._log_event(evaluation, self.trace_log)
+        print(f"🧪 LangSmith: Evaluation logged for trace {trace_id[:8]}...")
+    def _log_event(self, event: Dict[str, Any], log_file: str):
+        """Write event to log file"""
+        try:
+            with open(log_file, 'a', encoding='utf-8') as f:
+                f.write(json.dumps(event) + '\n')
+        except Exception as e:
+            print(f"⚠️ LangSmith: Failed to write log - {e}")
+    def get_session_summary(self) -> Dict[str, Any]:
+        """Get summary of current session"""
+        try:
+            traces = []
+            metrics = []
+            # Read trace logs
+            if os.path.exists(self.trace_log):
+                with open(self.trace_log, 'r', encoding='utf-8') as f:
+                    for line in f:
+                        if line.strip():
+                            event = json.loads(line.strip())
+                            if event.get("session_id") == self.session_id:
+                                if event.get("type") == "trace_start":
+                                    traces.append(event)
+            # Read metrics logs
+            if os.path.exists(self.metrics_log):
+                with open(self.metrics_log, 'r', encoding='utf-8') as f:
+                    for line in f:
+                        if line.strip():
+                            event = json.loads(line.strip())
+                            if event.get("session_id") == self.session_id:
+                                metrics.append(event)
+            return {
+                "session_id": self.session_id,
+                "project_name": self.project_name,
+                "total_traces": len(traces),
+                "total_metrics": len(metrics),
+                "traces": traces[-5:],  # Last 5 traces
+                "metrics": metrics[-5:]  # Last 5 metrics
+            }
+        except Exception as e:
+            print(f"⚠️ LangSmith: Failed to get session summary - {e}")
+            return {"error": str(e)}
+    def export_session_data(self, filename: Optional[str] = None) -> str:
+        """Export session data for analysis"""
+        if not filename:
+            filename = f"{self.logs_dir}/session_{self.session_id[:8]}_export.json"
+        summary = self.get_session_summary()
+        try:
+            with open(filename, 'w', encoding='utf-8') as f:
+                json.dump(summary, f, indent=2)
+            print(f"📁 LangSmith: Session data exported to {filename}")
+            return filename
+        except Exception as e:
+            print(f"❌ LangSmith: Export failed - {e}")
+            return ""
+# Global logger instance
+logger = LangSmithLogger()
+def trace_llm_analysis(func):
+    """Decorator to trace LLM analysis functions"""
+    def wrapper(*args, **kwargs):
+        # Start trace
+        trace_id = logger.start_trace(
+            func.__name__,
+            {"args_count": len(args), "kwargs": list(kwargs.keys())}
+        )
+        start_time = datetime.utcnow()
+        try:
+            # Execute function
+            result = func(*args, **kwargs)
+            # Calculate metrics
+            end_time = datetime.utcnow()
+            latency = (end_time - start_time).total_seconds() * 1000
+            # End trace
+            logger.end_trace(
+                trace_id,
+                {"result_type": type(result).__name__},
+                "success"
+            )
+            # Log metrics
+            logger.log_metrics({
+                "function": func.__name__,
+                "latency_ms": latency,
+                "success": True
+            })
+            return result
+        except Exception as e:
+            # Log error
+            logger.end_trace(
+                trace_id,
+                {},
+                "error",
+                str(e)
+            )
+            logger.log_metrics({
+                "function": func.__name__,
+                "success": False,
+                "error": str(e)
+            })
+            raise e
+    return wrapper
+# Test function
+def test_langsmith_logging():
+    """Test LangSmith logging functionality"""
+    # Test trace
+    trace_id = logger.start_trace("test_analysis", {"test": True})
+    logger.log_llm_call(
+        trace_id,
+        "test_llm_call",
+        "Test prompt",
+        "Test response",
+        "grok-4-fast",
+        150.5,
+        {"tokens": 100}
+    )
+    logger.end_trace(trace_id, {"test_result": "success"}, "success")
+    # Test metrics
+    logger.log_metrics({
+        "test_metric": 95.5,
+        "accuracy": 0.85
+    })
+    # Get summary
+    summary = logger.get_session_summary()
+    print(f"✅ LangSmith test completed - {summary['total_traces']} traces logged")
+    return summary['total_traces'] > 0
+if __name__ == "__main__":
+    test_langsmith_logging()

llm_analysis/llm_analyzer.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# llm_analysis/llm_analyzer.py
+import os
+import json
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from llm_analysis.prompt_templates import (
+    RESUME_ANALYSIS_PROMPT,
+    IMPROVEMENT_ROADMAP_PROMPT,
+    SKILLS_ENHANCEMENT_PROMPT
+)
+load_dotenv()
+class LLMResumeAnalyzer:
+    def __init__(self, model=None):
+        api_key = os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("❌ OPENAI_API_KEY not found in .env file")
+        # Use the provided model, or fall back to environment variable/default
+        llm_model = model or os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
+        self.llm = ChatOpenAI(
+            model=llm_model,
+            temperature=0.2,  # Low for consistency
+            api_key=api_key
+        )
+        print(f"✅ LLM Analyzer initialized successfully with model: {llm_model}")
+    def analyze_resume_vs_jd(self, resume_text, jd_text, keyword_match_data):
+        """Comprehensive LLM-powered resume analysis"""
+        print("🤖 Running LLM analysis...")
+        try:
+            prompt = ChatPromptTemplate.from_messages([
+                ("system", "You are an expert HR recruiter and technical hiring manager."),
+                ("human", RESUME_ANALYSIS_PROMPT)
+            ])
+            chain = prompt | self.llm
+            response = chain.invoke({
+                "resume_text": resume_text[:3000],  # Truncate to avoid token limits
+                "jd_text": jd_text[:2000],
+                "matched_count": keyword_match_data.get("matched_count", 0),
+                "total_skills": keyword_match_data.get("total_jd_skills", 0),
+                "matched_skills": ", ".join(keyword_match_data.get("matched_skills", [])),
+                "missing_skills": ", ".join(keyword_match_data.get("missing_skills", [])),
+                "coverage_percentage": keyword_match_data.get("score", 0)
+            })
+            # Parse JSON response
+            analysis = json.loads(response.content)
+            print("✅ LLM analysis completed successfully")
+            return analysis
+        except json.JSONDecodeError as e:
+            print(f"⚠️ JSON parsing error: {e}")
+            return self._create_fallback_analysis(keyword_match_data)
+        except Exception as e:
+            print(f"❌ LLM analysis error: {e}")
+            return self._create_fallback_analysis(keyword_match_data)
+    def generate_improvement_roadmap(self, analysis_results):
+        """Generate detailed improvement roadmap"""
+        print("🗺️ Generating improvement roadmap...")
+        try:
+            prompt = ChatPromptTemplate.from_messages([
+                ("system", "You are a career coach specializing in tech careers."),
+                ("human", IMPROVEMENT_ROADMAP_PROMPT)
+            ])
+            chain = prompt | self.llm
+            response = chain.invoke({
+                "analysis_results": json.dumps(analysis_results, indent=2)
+            })
+            roadmap = json.loads(response.content)
+            print("✅ Improvement roadmap generated successfully")
+            return roadmap
+        except Exception as e:
+            print(f"❌ Roadmap generation error: {e}")
+            return self._create_fallback_roadmap()
+    def enhance_skills_extraction(self, text):
+        """Use LLM to extract and categorize skills more intelligently"""
+        print("🧠 Enhancing skills extraction with LLM...")
+        try:
+            prompt = ChatPromptTemplate.from_messages([
+                ("system", "You are a technical skills extraction specialist."),
+                ("human", SKILLS_ENHANCEMENT_PROMPT)
+            ])
+            chain = prompt | self.llm
+            response = chain.invoke({
+                "text": text[:2000]  # Truncate to avoid token limits
+            })
+            skills_data = json.loads(response.content)
+            print("✅ Skills enhancement completed")
+            return skills_data
+        except Exception as e:
+            print(f"❌ Skills enhancement error: {e}")
+            return {"all_technical_skills": [], "error": str(e)}
+    def _create_fallback_analysis(self, keyword_data):
+        """Fallback analysis when LLM fails"""
+        return {
+            "overall_fit_score": max(1, int(keyword_data.get("score", 0) / 10)),
+            "experience_alignment": "Unable to assess - manual review needed",
+            "key_strengths": ["Technical skills present in resume"],
+            "critical_gaps": keyword_data.get("missing_skills", [])[:3],
+            "role_suitability": "Medium - based on keyword match only",
+            "improvement_suggestions": ["Add missing technical skills", "Improve resume formatting"],
+            "recommended_skills_to_learn": keyword_data.get("missing_skills", [])[:3],
+            "project_recommendations": ["Build projects showcasing missing skills"],
+            "certification_suggestions": ["Relevant industry certifications"],
+            "interview_readiness": "Moderate preparation needed",
+            "salary_expectations": "Market standard for skill level",
+            "final_verdict": "Automated analysis only - requires manual review"
+        }
+    def _create_fallback_roadmap(self):
+        """Fallback roadmap when LLM fails"""
+        return {
+            "immediate_actions": ["Update resume with missing skills", "Clean up resume formatting"],
+            "week_1_plan": ["Research missing skills", "Start online tutorials"],
+            "month_1_plan": ["Complete beginner courses", "Build first project"],
+            "month_3_plan": ["Build portfolio", "Apply for relevant positions"],
+            "priority_skills": ["As identified in job description"],
+            "learning_resources": {
+                "free_courses": ["freeCodeCamp", "Coursera free courses"],
+                "paid_courses": ["Udemy", "Pluralsight"],
+                "books": ["Technical books for identified skills"],
+                "practice_platforms": ["LeetCode", "HackerRank"]
+            },
+            "portfolio_improvements": ["Build 2-3 projects showcasing skills"],
+            "networking_suggestions": ["Join LinkedIn groups", "Attend tech meetups"],
+            "quick_wins": ["Update LinkedIn profile", "Get recommendations"],
+            "estimated_timeline": "3-6 months for significant improvement"
+        }
+# Test LLM connectivity
+def test_llm_connection():
+    """Test if LLM is working"""
+    try:
+        analyzer = LLMResumeAnalyzer()
+        print("🧪 Testing LLM connection...")
+        # Simple test
+        result = analyzer.llm.invoke("Say 'Hello, LLM is working!' in JSON format: {\"status\": \"working\", \"message\": \"Hello, LLM is working!\"}")
+        test_response = json.loads(result.content)
+        print(f"✅ LLM Test Result: {test_response}")
+        return True
+    except Exception as e:
+        print(f"❌ LLM Test Failed: {e}")
+        return False
+if __name__ == "__main__":
+    test_llm_connection()

llm_analysis/prompt_templates.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# llm_analysis/prompt_templates.py
+RESUME_ANALYSIS_PROMPT = """You are an expert HR recruiter analyzing resumes against job descriptions.
+RESUME:
+{resume_text}
+JOB DESCRIPTION:
+{jd_text}
+KEYWORD MATCH ANALYSIS:
+- Matched Skills ({matched_count}/{total_skills}): {matched_skills}
+- Missing Skills: {missing_skills}
+- Coverage: {coverage_percentage}%
+Please provide a comprehensive analysis in JSON format:
+{{
+    "overall_fit_score": <0-10 integer>,
+    "experience_alignment": "<brief assessment of experience match>",
+    "key_strengths": ["<strength1>", "<strength2>", "<strength3>"],
+    "critical_gaps": ["<gap1>", "<gap2>", "<gap3>"],
+    "role_suitability": "<High/Medium/Low with reasoning>",
+    "improvement_suggestions": ["<actionable suggestion1>", "<actionable suggestion2>"],
+    "recommended_skills_to_learn": ["<skill1>", "<skill2>", "<skill3>"],
+    "project_recommendations": ["<project idea1>", "<project idea2>"],
+    "certification_suggestions": ["<cert1>", "<cert2>"],
+    "interview_readiness": "<assessment of interview preparation needed>",
+    "salary_expectations": "<realistic salary range assessment>",
+    "final_verdict": "<detailed reasoning for recommendation>"
+}}
+Focus on being practical, specific, and actionable in your recommendations."""
+IMPROVEMENT_ROADMAP_PROMPT = """Based on this resume analysis, create a detailed improvement roadmap for the candidate.
+ANALYSIS RESULTS:
+{analysis_results}
+Create a structured improvement plan in JSON format:
+{{
+    "immediate_actions": ["<action that can be done today>", "<another immediate action>"],
+    "week_1_plan": ["<specific task for week 1>", "<another week 1 task>"],
+    "month_1_plan": ["<month 1 goal>", "<another month 1 goal>"],
+    "month_3_plan": ["<3 month goal>", "<another 3 month goal>"],
+    "priority_skills": ["<highest priority skill>", "<second priority>", "<third priority>"],
+    "learning_resources": {{
+        "free_courses": ["<course recommendation>", "<another course>"],
+        "paid_courses": ["<premium course>", "<another premium course>"],
+        "books": ["<book recommendation>", "<another book>"],
+        "practice_platforms": ["<platform>", "<another platform>"]
+    }},
+    "portfolio_improvements": ["<specific project to build>", "<another project>"],
+    "networking_suggestions": ["<networking advice>", "<another networking tip>"],
+    "quick_wins": ["<easy improvement>", "<another quick win>"],
+    "estimated_timeline": "<realistic timeline to become job-ready>"
+}}
+Be specific with course names, book titles, and platform recommendations."""
+SKILLS_ENHANCEMENT_PROMPT = """Analyze the following text and extract ALL technical skills, then categorize and enhance the skills list.
+TEXT TO ANALYZE:
+{text}
+Extract and categorize skills comprehensively in JSON format:
+{{
+    "programming_languages": ["<language1>", "<language2>"],
+    "web_frameworks": ["<framework1>", "<framework2>"],
+    "databases": ["<db1>", "<db2>"],
+    "cloud_platforms": ["<platform1>", "<platform2>"],
+    "devops_tools": ["<tool1>", "<tool2>"],
+    "testing_tools": ["<tool1>", "<tool2>"],
+    "development_tools": ["<tool1>", "<tool2>"],
+    "soft_skills": ["<skill1>", "<skill2>"],
+    "methodologies": ["<methodology1>", "<methodology2>"],
+    "all_technical_skills": ["<comprehensive list of all technical skills found>"],
+    "skill_proficiency_estimate": {{
+        "<skill>": "<Beginner/Intermediate/Advanced based on context>",
+        "<another_skill>": "<proficiency_level>"
+    }}
+}}
+Be thorough and include variations (e.g., JS and JavaScript, k8s and Kubernetes)."""

logs/langsmith_metrics.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{"session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "timestamp": "2025-09-20T10:28:09.968967", "metrics": {"analysis_success": true, "resume_length": 2238, "jd_length": 3149, "skills_found": 19, "pipeline_status": "completed", "enhanced_scoring": false}, "type": "metrics"}
+{"session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "timestamp": "2025-09-20T10:28:09.971970", "metrics": {"function": "complete_ai_analysis", "latency_ms": 3343.7670000000003, "success": true}, "type": "metrics"}
+{"session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "timestamp": "2025-09-20T10:32:17.648118", "metrics": {"analysis_success": true, "resume_length": 2238, "jd_length": 3149, "skills_found": 19, "pipeline_status": "completed", "enhanced_scoring": false}, "type": "metrics"}
+{"session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "timestamp": "2025-09-20T10:32:17.654122", "metrics": {"function": "complete_ai_analysis", "latency_ms": 1884.951, "success": true}, "type": "metrics"}
+{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:21:59.326340", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:21:59.332369", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3593.709, "success": true}, "type": "metrics"}
+{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:27:16.659867", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "timestamp": "2025-09-20T11:27:16.662861", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1167.566, "success": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T15:38:04.483122", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T15:38:04.489226", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3661.242, "success": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:02:44.109780", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:02:44.111775", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 929.667, "success": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:09:57.021715", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:09:57.026900", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 912.856, "success": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:11:55.042808", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "timestamp": "2025-09-20T16:11:55.047901", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1488.064, "success": true}, "type": "metrics"}
+{"session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "timestamp": "2025-09-20T16:19:28.468185", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "timestamp": "2025-09-20T16:19:28.473178", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3668.7690000000002, "success": true}, "type": "metrics"}
+{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:22:17.448927", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:22:17.453922", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3874.583, "success": true}, "type": "metrics"}
+{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:26:09.359080", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "timestamp": "2025-09-20T16:26:09.366978", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1227.908, "success": true}, "type": "metrics"}
+{"session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "timestamp": "2025-09-20T16:28:36.803003", "metrics": {"api_success": true, "final_score": 33.752, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "timestamp": "2025-09-20T16:28:36.808437", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3730.636, "success": true}, "type": "metrics"}
+{"session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "timestamp": "2025-09-20T16:32:52.095638", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "timestamp": "2025-09-20T16:32:52.098635", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3076.762, "success": true}, "type": "metrics"}
+{"session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "timestamp": "2025-09-21T01:22:16.389240", "metrics": {"api_success": true, "final_score": 32.5, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "timestamp": "2025-09-21T01:22:16.394244", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2296.625, "success": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:24:37.998103", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:24:38.001115", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1919.5310000000002, "success": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:16.385405", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:16.388509", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 1454.168, "success": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:27:51.527938", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 12.616, "success": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:28:26.866106", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 133.30700000000002, "success": true}, "type": "metrics"}
+{"session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "timestamp": "2025-09-21T01:29:02.073814", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 19.596, "success": true}, "type": "metrics"}
+{"session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "timestamp": "2025-09-21T01:40:09.312013", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "timestamp": "2025-09-21T01:40:09.314913", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2356.451, "success": true}, "type": "metrics"}
+{"session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "timestamp": "2025-09-21T01:55:07.375404", "metrics": {"api_success": true, "final_score": 32.5, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "timestamp": "2025-09-21T01:55:07.378410", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2503.252, "success": true}, "type": "metrics"}
+{"session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "timestamp": "2025-09-21T02:19:01.012120", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "timestamp": "2025-09-21T02:19:01.016125", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2274.592, "success": true}, "type": "metrics"}
+{"session_id": "acba97a8-88e4-428b-9390-783700f0235f", "timestamp": "2025-09-21T03:13:19.055138", "metrics": {"api_success": true, "final_score": 35.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "acba97a8-88e4-428b-9390-783700f0235f", "timestamp": "2025-09-21T03:13:19.059234", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2120.11, "success": true}, "type": "metrics"}
+{"session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "timestamp": "2025-09-21T03:30:37.071608", "metrics": {"api_success": true, "final_score": 43.332, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "timestamp": "2025-09-21T03:30:37.075742", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2324.2560000000003, "success": true}, "type": "metrics"}
+{"session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "timestamp": "2025-09-21T03:32:54.777227", "metrics": {"api_success": true, "final_score": 40.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "timestamp": "2025-09-21T03:32:54.786216", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2556.451, "success": true}, "type": "metrics"}
+{"session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "timestamp": "2025-09-21T03:39:27.003374", "metrics": {"api_success": true, "final_score": 56.668000000000006, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "timestamp": "2025-09-21T03:39:27.014265", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 4611.081, "success": true}, "type": "metrics"}
+{"session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "timestamp": "2025-09-21T03:44:56.950150", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "timestamp": "2025-09-21T03:44:56.954924", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3159.484, "success": true}, "type": "metrics"}
+{"session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "timestamp": "2025-09-21T03:51:12.817358", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "timestamp": "2025-09-21T03:51:12.821360", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 2815.247, "success": true}, "type": "metrics"}
+{"session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "timestamp": "2025-09-21T03:55:12.022321", "metrics": {"api_success": true, "final_score": 50.0, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "timestamp": "2025-09-21T03:55:12.025331", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 3375.04, "success": true}, "type": "metrics"}
+{"session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "timestamp": "2025-09-21T04:07:46.220611", "metrics": {"api_success": true, "final_score": 36.668, "pipeline_used": true}, "type": "metrics"}
+{"session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "timestamp": "2025-09-21T04:07:46.225181", "metrics": {"function": "complete_ai_analysis_api", "latency_ms": 26514.756999999998, "success": true}, "type": "metrics"}

logs/langsmith_traces.jsonl ADDED Viewed

	@@ -0,0 +1,120 @@

+{"trace_id": "a9220664-0b26-4e3f-b34d-14939e97460a", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis", "start_time": "2025-09-20T10:28:06.626205", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "9194d1ec-9db4-45ec-a0c4-3ebef618c208", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "project_name": "resume-relevance-system", "trace_name": "complete_resume_analysis", "start_time": "2025-09-20T10:28:06.628203", "inputs": {"resume_file": "input/sample_resume.pdf", "jd_file": "input/sample_jd.pdf"}, "status": "started", "type": "trace_start"}
+{"trace_id": "9194d1ec-9db4-45ec-a0c4-3ebef618c208", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "end_time": "2025-09-20T10:28:09.969969", "outputs": {"pipeline_status": "completed", "final_score": 5}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "a9220664-0b26-4e3f-b34d-14939e97460a", "session_id": "937ab2f4-55f4-416e-a752-940d0160ca92", "end_time": "2025-09-20T10:28:09.971970", "outputs": {"result_type": "NoneType"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "78931f3c-8c2f-4cf0-88a3-75ffd391e133", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis", "start_time": "2025-09-20T10:32:15.765176", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "7b53b614-d774-41fc-86fe-155ad7326413", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "project_name": "resume-relevance-system", "trace_name": "complete_resume_analysis", "start_time": "2025-09-20T10:32:15.766169", "inputs": {"resume_file": "input/sample_resume.pdf", "jd_file": "input/sample_jd.pdf"}, "status": "started", "type": "trace_start"}
+{"trace_id": "7b53b614-d774-41fc-86fe-155ad7326413", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "end_time": "2025-09-20T10:32:17.649124", "outputs": {"pipeline_status": "completed", "final_score": 5}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "78931f3c-8c2f-4cf0-88a3-75ffd391e133", "session_id": "02aaade7-76fc-4af0-99b1-42d6894bd1c1", "end_time": "2025-09-20T10:32:17.651120", "outputs": {"result_type": "NoneType"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "49f5c4ab-d238-4faa-a175-0adf0f3920df", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T11:21:55.733567", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "5393826b-554b-421b-8790-1218ad6016dc", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T11:21:55.736654", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmplvkk7m14.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6rbiooty.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "5393826b-554b-421b-8790-1218ad6016dc", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:21:59.323345", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "49f5c4ab-d238-4faa-a175-0adf0f3920df", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:21:59.330363", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "e302ca41-6236-4f05-97cd-d17a0ad75b37", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T11:27:15.492297", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "378fa23b-0d68-42ab-b430-bda462fe4e14", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T11:27:15.493296", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp0424aaqj.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpnalp728z.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "378fa23b-0d68-42ab-b430-bda462fe4e14", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:27:16.654803", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "e302ca41-6236-4f05-97cd-d17a0ad75b37", "session_id": "eb186856-10a3-4850-b5a2-aea5a5b70569", "end_time": "2025-09-20T11:27:16.660862", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "a25e723f-9809-42da-b369-8d60667993bf", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T15:38:00.822976", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "1634d9e3-2e84-40a8-9873-3738f7facb35", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T15:38:00.825975", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6ikrxzk6.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpjkujpprp.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "1634d9e3-2e84-40a8-9873-3738f7facb35", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T15:38:04.478225", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "a25e723f-9809-42da-b369-8d60667993bf", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T15:38:04.487217", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "09528e66-9a21-4ea6-8297-deb8cf96bcc4", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:02:43.180112", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "31c100c7-ea89-4df2-85cf-bcc2b1cc1397", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:02:43.181109", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp02rmxegc.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpoy2ydj6o.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "31c100c7-ea89-4df2-85cf-bcc2b1cc1397", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:02:44.108699", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "09528e66-9a21-4ea6-8297-deb8cf96bcc4", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:02:44.110776", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "6a185789-6d47-44e4-8cca-e0dedd2b22d7", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:09:56.110011", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "7e1ab0e9-03b8-4099-9dd9-5d92f68f4798", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:09:56.113023", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp8d87hwhf.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpa77tf5fb.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "7e1ab0e9-03b8-4099-9dd9-5d92f68f4798", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:09:57.017743", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "6a185789-6d47-44e4-8cca-e0dedd2b22d7", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:09:57.024881", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "6f031d08-1458-4330-84c1-550cdf7d2cc0", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:11:53.555834", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "d0a35be8-5789-4346-94d1-9e8dd8b48b6e", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:11:53.557800", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpyzkfzvg1.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6qku_qze.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "d0a35be8-5789-4346-94d1-9e8dd8b48b6e", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:11:55.040910", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "6f031d08-1458-4330-84c1-550cdf7d2cc0", "session_id": "10662543-8b23-4079-83bb-5ff45338c2f1", "end_time": "2025-09-20T16:11:55.045864", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "d0ea9e90-42ba-4e34-a8e4-fa21eaf1b931", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:19:24.799410", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "870c9f17-e25a-4f3b-abf1-5e85a949b313", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:19:24.801413", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpytdykoki.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpmmugnx60.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "870c9f17-e25a-4f3b-abf1-5e85a949b313", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "end_time": "2025-09-20T16:19:28.465273", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "d0ea9e90-42ba-4e34-a8e4-fa21eaf1b931", "session_id": "ecfa3ce6-e5a8-495f-a24b-5ecc327807c7", "end_time": "2025-09-20T16:19:28.470182", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "10094869-bf87-4b42-adef-76bba3e94cfd", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:22:13.573970", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "02739c4d-646b-4a07-aba7-e67815a8aa98", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:22:13.577347", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpztf2hnqs.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp8znkrcrb.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "02739c4d-646b-4a07-aba7-e67815a8aa98", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:22:17.445918", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "10094869-bf87-4b42-adef-76bba3e94cfd", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:22:17.451930", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "b4b1e1c1-ce7b-4f41-a78a-6efa65f3df7b", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:26:08.130125", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "e0fba981-e2b7-4892-a9a0-155cf402bcad", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:26:08.133123", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpvtdar3no.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmptw1nuu6d.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "e0fba981-e2b7-4892-a9a0-155cf402bcad", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:26:09.355068", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "b4b1e1c1-ce7b-4f41-a78a-6efa65f3df7b", "session_id": "22bf6f9e-528f-4efd-9195-cc2d2d0050e6", "end_time": "2025-09-20T16:26:09.361031", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "c4c0578d-0611-4b90-b757-578779274efe", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:28:33.069844", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "65c0aa4f-4494-43a4-8934-70169b717582", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:28:33.074357", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp89dd34s7.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp116yeyi3.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "65c0aa4f-4494-43a4-8934-70169b717582", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "end_time": "2025-09-20T16:28:36.800094", "outputs": {"final_score": 33.752, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "c4c0578d-0611-4b90-b757-578779274efe", "session_id": "ae346dba-3fc8-4656-8e11-224b5c42499a", "end_time": "2025-09-20T16:28:36.804993", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "33f9fce0-9c3e-492a-9524-51c34a5db94d", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-20T16:32:49.017877", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "6e392f12-a809-4478-af07-9dbf8c47e537", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-20T16:32:49.019874", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpioc13nbs.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpapjq22q7.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "6e392f12-a809-4478-af07-9dbf8c47e537", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "end_time": "2025-09-20T16:32:52.093645", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "33f9fce0-9c3e-492a-9524-51c34a5db94d", "session_id": "7b932b93-6d28-42b0-acf4-597ec7761d18", "end_time": "2025-09-20T16:32:52.096636", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "d823ef9f-2df5-4806-9afa-4fec03cc59de", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:22:14.093539", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "61dcda6f-7572-4e56-b810-9aa8c09db991", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:22:14.094535", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpk34qnxoz.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpinedpti6.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "61dcda6f-7572-4e56-b810-9aa8c09db991", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "end_time": "2025-09-21T01:22:16.388242", "outputs": {"final_score": 32.5, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "d823ef9f-2df5-4806-9afa-4fec03cc59de", "session_id": "2c6d421a-fa05-427b-98fe-d9eb9330d3cb", "end_time": "2025-09-21T01:22:16.391160", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "24695c1d-aad7-4e83-b253-a52949c1ff1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:24:36.076482", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "8142abf7-8623-4bd8-a0ec-b035eeea49de", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:24:36.080584", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpmq0nsda_.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpfy6mq83d.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "8142abf7-8623-4bd8-a0ec-b035eeea49de", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:24:37.994412", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "24695c1d-aad7-4e83-b253-a52949c1ff1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:24:38.000115", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "4bfd280e-c0f4-4e29-91ca-1cd28ce98c0e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:27:14.928024", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "f5e0624d-5361-4d2e-a2c2-70a37f6d7859", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:27:14.932244", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpd8i2w90l.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpkbs8mbvf.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "f5e0624d-5361-4d2e-a2c2-70a37f6d7859", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:16.383406", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "4bfd280e-c0f4-4e29-91ca-1cd28ce98c0e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:16.386412", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "936e4d74-a342-40d2-a9ec-ed7d697bdb91", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:27:51.511294", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "e2c67c0c-fb9d-428e-9b21-44d61304fd1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:27:51.513311", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp16rci5h5.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp7dwr_wtp.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "e2c67c0c-fb9d-428e-9b21-44d61304fd1e", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:51.522926", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmp16rci5h5.pdf'.", "token_usage": {}, "type": "trace_end"}
+{"trace_id": "936e4d74-a342-40d2-a9ec-ed7d697bdb91", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:27:51.524923", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "0e5ac59b-bedd-4fc8-b46d-f8acb4a418ee", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:28:26.729537", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "5a35f592-d45c-4576-b550-14ee205df0da", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:28:26.730536", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp59t8_l1r.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp2_n11hm5.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "5a35f592-d45c-4576-b550-14ee205df0da", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:28:26.861845", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmp2_n11hm5.pdf'.", "token_usage": {}, "type": "trace_end"}
+{"trace_id": "0e5ac59b-bedd-4fc8-b46d-f8acb4a418ee", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:28:26.863843", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "2033f188-874b-4779-93ed-cd939e13dc02", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:29:02.050099", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "765dc4e3-c115-4468-a2a1-928b7a708f0a", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:29:02.053217", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpelpe_kq9.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpd1tg607v.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "765dc4e3-c115-4468-a2a1-928b7a708f0a", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:29:02.070814", "outputs": {}, "status": "error", "error": "Cannot open empty file: filename='C:\\\\Users\\\\kusha\\\\AppData\\\\Local\\\\Temp\\\\tmpelpe_kq9.pdf'.", "token_usage": {}, "type": "trace_end"}
+{"trace_id": "2033f188-874b-4779-93ed-cd939e13dc02", "session_id": "8f331c6b-ea70-46e0-b257-264af54e7834", "end_time": "2025-09-21T01:29:02.072813", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "05aafbe3-91d6-4567-88f7-971d4bf1cfa3", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:40:06.952453", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "fa4cd081-5a89-4311-8d6b-a8bee0f4f95b", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:40:06.957463", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpfpgea8mx.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp0mp2_3rx.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "fa4cd081-5a89-4311-8d6b-a8bee0f4f95b", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "end_time": "2025-09-21T01:40:09.302898", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "05aafbe3-91d6-4567-88f7-971d4bf1cfa3", "session_id": "ba23f5ea-5bed-49f9-8a51-664393d8166f", "end_time": "2025-09-21T01:40:09.313914", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "4e7b4fc5-3c0a-42dc-99e4-d7a895db19a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T01:55:04.866221", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "eba0b151-f45f-4497-a18c-97968ba558a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T01:55:04.874152", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpilnra4ly.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpifbcxjwl.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "eba0b151-f45f-4497-a18c-97968ba558a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "end_time": "2025-09-21T01:55:07.366397", "outputs": {"final_score": 32.5, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "4e7b4fc5-3c0a-42dc-99e4-d7a895db19a3", "session_id": "c4915a7d-32e4-489f-b21f-42992acb373d", "end_time": "2025-09-21T01:55:07.377404", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "f0c61041-ed71-4df3-b1ba-272bb3a1a1ca", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T02:18:58.734780", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "7bbeada3-6822-421f-a326-bfa56bb0a2cd", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T02:18:58.739429", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpw_kcul8t.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpllsrq7v8.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "7bbeada3-6822-421f-a326-bfa56bb0a2cd", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "end_time": "2025-09-21T02:19:01.005118", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "f0c61041-ed71-4df3-b1ba-272bb3a1a1ca", "session_id": "0b301bb0-8da6-4ede-ad07-4e19e41fe76b", "end_time": "2025-09-21T02:19:01.014021", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "334643be-5298-4b6c-8567-347271832f6a", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:13:16.934134", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "4819d93c-4a91-4d3d-90d9-277d7be3381d", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:13:16.937126", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpxhu7fkn7.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp9mtoh73r.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "4819d93c-4a91-4d3d-90d9-277d7be3381d", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "end_time": "2025-09-21T03:13:19.052137", "outputs": {"final_score": 35.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "334643be-5298-4b6c-8567-347271832f6a", "session_id": "acba97a8-88e4-428b-9390-783700f0235f", "end_time": "2025-09-21T03:13:19.057236", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "d0d18179-5ddb-493b-8c62-22229f06484e", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:30:34.747377", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "4c0b2ff9-aced-442e-bba1-bd1e00c18cab", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:30:34.749366", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6mve9qq5.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpv5vjd46t.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "4c0b2ff9-aced-442e-bba1-bd1e00c18cab", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "end_time": "2025-09-21T03:30:37.066611", "outputs": {"final_score": 43.332, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "d0d18179-5ddb-493b-8c62-22229f06484e", "session_id": "1579b742-a3e6-4eab-8ad4-f0f16fabeb44", "end_time": "2025-09-21T03:30:37.073622", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "270ffb7c-a36a-4799-92cc-74ba938e58c5", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:32:52.223767", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "250619ff-9a1a-4e1d-ab99-528591c4dd07", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:32:52.226771", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpw4_p93qm.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpziz23bn2.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "250619ff-9a1a-4e1d-ab99-528591c4dd07", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "end_time": "2025-09-21T03:32:54.770212", "outputs": {"final_score": 40.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "270ffb7c-a36a-4799-92cc-74ba938e58c5", "session_id": "f87d6485-e1ab-4f69-bf3c-a0f2454901d8", "end_time": "2025-09-21T03:32:54.783222", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "c1c18675-903d-4d06-9b6d-d162dc6697aa", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:39:22.397308", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "aadff1c2-9852-4726-960d-af92c369fd8b", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:39:22.400313", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpp_yxirpc.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp36aqmv7r.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "aadff1c2-9852-4726-960d-af92c369fd8b", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "end_time": "2025-09-21T03:39:26.991284", "outputs": {"final_score": 56.668000000000006, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "c1c18675-903d-4d06-9b6d-d162dc6697aa", "session_id": "2f5d5adb-b2dd-4064-9478-239eb49bfcea", "end_time": "2025-09-21T03:39:27.010387", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "df6e7046-48cd-4b29-ac41-173d3a4fb5f9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:44:53.791298", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "03a87671-e3a6-40fa-a2f6-dbdad8e025c9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:44:53.793766", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp3esxeq0t.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpteop9bro.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "03a87671-e3a6-40fa-a2f6-dbdad8e025c9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "end_time": "2025-09-21T03:44:56.947132", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "df6e7046-48cd-4b29-ac41-173d3a4fb5f9", "session_id": "732b0964-05ef-4f60-85a3-cf459228de7a", "end_time": "2025-09-21T03:44:56.953250", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "02d153d2-eb16-4784-abb5-42ab4509f10a", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:51:10.001559", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "99341c01-5bb4-47fe-80e9-fe8dd1836ee9", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:51:10.004099", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpynmwjkur.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpb85kyh_i.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "99341c01-5bb4-47fe-80e9-fe8dd1836ee9", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "end_time": "2025-09-21T03:51:12.815359", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "02d153d2-eb16-4784-abb5-42ab4509f10a", "session_id": "697a6f96-fb30-4958-9e15-a6ceb5892c41", "end_time": "2025-09-21T03:51:12.819346", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "334c866e-795d-4eb9-90dc-2f66b7128d5f", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T03:55:08.646302", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "d86ec10f-f015-46b1-bf1b-f72b1a4613b3", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T03:55:08.648291", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp1cfpxuyy.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp6s3sg7jw.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "d86ec10f-f015-46b1-bf1b-f72b1a4613b3", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "end_time": "2025-09-21T03:55:12.019303", "outputs": {"final_score": 50.0, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "334c866e-795d-4eb9-90dc-2f66b7128d5f", "session_id": "c192918b-d4d5-4598-8177-47bbd2d80408", "end_time": "2025-09-21T03:55:12.023331", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "8b172285-0bdc-468f-9524-c6fbff86ab8e", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "project_name": "resume-relevance-system", "trace_name": "complete_ai_analysis_api", "start_time": "2025-09-21T04:07:19.705410", "inputs": {"args_count": 2, "kwargs": []}, "status": "started", "type": "trace_start"}
+{"trace_id": "d728dc29-eda1-42d5-ab33-aa4e99157fe1", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "project_name": "resume-relevance-system", "trace_name": "api_resume_analysis", "start_time": "2025-09-21T04:07:19.708408", "inputs": {"resume_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmpjg086yms.pdf", "jd_file": "C:\\Users\\kusha\\AppData\\Local\\Temp\\tmp_u57r0c7.pdf", "api_call": true}, "status": "started", "type": "trace_start"}
+{"trace_id": "d728dc29-eda1-42d5-ab33-aa4e99157fe1", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "end_time": "2025-09-21T04:07:46.216611", "outputs": {"final_score": 36.668, "pipeline_used": true}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}
+{"trace_id": "8b172285-0bdc-468f-9524-c6fbff86ab8e", "session_id": "80a681b0-9d8d-4651-893c-c7373c734dc0", "end_time": "2025-09-21T04:07:46.223165", "outputs": {"result_type": "dict"}, "status": "success", "error": null, "token_usage": {}, "type": "trace_end"}

matchers/__init__.py ADDED Viewed

File without changes

matchers/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (150 Bytes). View file

matchers/__pycache__/final_scorer.cpython-312.pyc ADDED Viewed

Binary file (2.79 kB). View file

matchers/__pycache__/hard_matcher.cpython-312.pyc ADDED Viewed

Binary file (2 kB). View file

matchers/__pycache__/semantic_matcher.cpython-312.pyc ADDED Viewed

Binary file (2.21 kB). View file

matchers/entity_extractor.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# parsers/entity_extractor.py - SPACY ENTITY EXTRACTION
+import spacy
+from collections import Counter
+import re
+class EntityExtractor:
+    def __init__(self):
+        try:
+            print("🧠 Loading spaCy model...")
+            self.nlp = spacy.load("en_core_web_sm")
+            print("✅ spaCy model loaded successfully")
+        except OSError:
+            print("⚠️ spaCy model not found. Run: python -m spacy download en_core_web_sm")
+            self.nlp = None
+    def extract_skills_with_nlp(self, text):
+        """Extract skills using spaCy NLP"""
+        if not self.nlp:
+            return self._fallback_extraction(text)
+        print("🔍 Extracting entities with spaCy...")
+        doc = self.nlp(text)
+        # Extract entities
+        entities = {
+            "persons": [],
+            "organizations": [],
+            "technologies": [],
+            "skills": [],
+            "locations": []
+        }
+        for ent in doc.ents:
+            if ent.label_ == "PERSON":
+                entities["persons"].append(ent.text)
+            elif ent.label_ == "ORG":
+                entities["organizations"].append(ent.text)
+            elif ent.label_ == "GPE":  # Geopolitical entity (locations)
+                entities["locations"].append(ent.text)
+        # Extract noun phrases as potential skills
+        noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks
+                       if len(chunk.text.split()) <= 3]  # Max 3 words
+        # Filter technical terms
+        tech_patterns = [
+            r'\b\w+\.js\b', r'\b\w+script\b', r'\b\w+SQL\b',
+            r'\bAPI\b', r'\bSDK\b', r'\bIDE\b', r'\bOS\b'
+        ]
+        tech_terms = []
+        for pattern in tech_patterns:
+            tech_terms.extend(re.findall(pattern, text, re.IGNORECASE))
+        entities["technologies"] = list(set(tech_terms))
+        entities["skills"] = list(set(noun_phrases))
+        return entities
+    def extract_experience_years(self, text):
+        """Extract years of experience using NLP"""
+        if not self.nlp:
+            return self._extract_years_regex(text)
+        doc = self.nlp(text)
+        experience_patterns = [
+            r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
+            r'(\d+)\+?\s*years?\s*in',
+            r'experience.*?(\d+)\+?\s*years?',
+            r'(\d+)\+?\s*year.*?experience'
+        ]
+        years = []
+        for pattern in experience_patterns:
+            matches = re.findall(pattern, text.lower())
+            years.extend([int(match) for match in matches if match.isdigit()])
+        return max(years) if years else 0
+    def extract_education_info(self, text):
+        """Extract education information"""
+        degrees = [
+            "bachelor", "master", "phd", "doctorate", "diploma",
+            "b.tech", "m.tech", "bca", "mca", "bsc", "msc"
+        ]
+        fields = [
+            "computer science", "engineering", "information technology",
+            "software engineering", "data science", "mathematics"
+        ]
+        found_degrees = []
+        found_fields = []
+        text_lower = text.lower()
+        for degree in degrees:
+            if degree in text_lower:
+                found_degrees.append(degree)
+        for field in fields:
+            if field in text_lower:
+                found_fields.append(field)
+        return {
+            "degrees": list(set(found_degrees)),
+            "fields": list(set(found_fields))
+        }
+    def _fallback_extraction(self, text):
+        """Fallback extraction without spaCy"""
+        print("⚠️ Using fallback extraction (spaCy not available)")
+        # Simple regex-based extraction
+        entities = {
+            "persons": [],
+            "organizations": [],
+            "technologies": [],
+            "skills": [],
+            "locations": []
+        }
+        # Extract email domains as organizations
+        email_domains = re.findall(r'@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
+        entities["organizations"] = [domain.split('.')[0] for domain in email_domains]
+        return entities
+    def _extract_years_regex(self, text):
+        """Regex fallback for experience extraction"""
+        pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:experience|exp)'
+        matches = re.findall(pattern, text.lower())
+        years = [int(match) for match in matches if match.isdigit()]
+        return max(years) if years else 0
+# Test function
+def test_entity_extractor():
+    """Test entity extraction functionality"""
+    extractor = EntityExtractor()
+    sample_text = """
+    John Smith is a Python developer with 3+ years of experience at Google.
+    He has worked with React.js, Node.js, and AWS in San Francisco.
+    Bachelor's degree in Computer Science.
+    """
+    entities = extractor.extract_skills_with_nlp(sample_text)
+    years = extractor.extract_experience_years(sample_text)
+    education = extractor.extract_education_info(sample_text)
+    print(f"✅ Entities extracted: {len(entities['skills'])} skills found")
+    print(f"✅ Experience: {years} years")
+    print(f"✅ Education: {education}")
+    return len(entities['skills']) > 0
+if __name__ == "__main__":
+    test_entity_extractor()

matchers/final_scorer.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# matchers/final_scorer.py
+from matchers.hard_matcher import calculate_hard_match_score, calculate_fuzzy_match
+from matchers.semantic_matcher import SemanticMatcher
+class ResumeScorer:
+    def __init__(self):
+        self.semantic_matcher = SemanticMatcher()
+    def calculate_final_score(self, resume_data, jd_data):
+        """Calculate weighted final score combining all factors"""
+        # Step 1: Hard Match (Keywords)
+        hard_match = calculate_hard_match_score(
+            resume_data["skills"],
+            jd_data["skills"]
+        )
+        # Step 2: Semantic Match (AI Embeddings)
+        semantic_match = self.semantic_matcher.calculate_semantic_score(
+            resume_data["raw_text"],
+            jd_data["raw_text"]
+        )
+        # Step 3: Fuzzy Match
+        fuzzy_skills = calculate_fuzzy_match(
+            resume_data["raw_text"],
+            jd_data["skills"]
+        )
+        fuzzy_bonus = len(fuzzy_skills) * 2  # 2 points per fuzzy match
+        # Weighted scoring formula
+        final_score = (
+            0.4 * hard_match["score"] +           # 40% keyword match
+            0.5 * semantic_match["score"] +       # 50% semantic similarity
+            0.1 * min(fuzzy_bonus, 20)           # 10% fuzzy bonus (max 20)
+        )
+        # Generate verdict
+        verdict = self.get_verdict(final_score)
+        return {
+            "final_score": round(final_score, 2),
+            "verdict": verdict,
+            "breakdown": {
+                "hard_match": hard_match,
+                "semantic_match": semantic_match,
+                "fuzzy_matches": fuzzy_skills
+            },
+            "suggestions": self.generate_suggestions(hard_match["missing_skills"])
+        }
+    def get_verdict(self, score):
+        """Convert score to verdict categories"""
+        if score >= 80:
+            return "High Suitability"
+        elif score >= 60:
+            return "Medium Suitability"
+        else:
+            return "Low Suitability"
+    def generate_suggestions(self, missing_skills):
+        """Generate improvement suggestions"""
+        if not missing_skills:
+            return "Great match! No major skills missing."
+        suggestions = []
+        if len(missing_skills) <= 3:
+            suggestions.append(f"Consider adding skills: {', '.join(missing_skills[:3])}")
+        else:
+            suggestions.append(f"Focus on key skills: {', '.join(missing_skills[:3])}")
+            suggestions.append("Consider relevant projects or certifications")
+        return suggestions

matchers/fuzzy_matcher.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# matchers/fuzzy_matcher.py - FUZZY SKILL MATCHING
+from rapidfuzz import fuzz, process
+from collections import defaultdict
+class FuzzyMatcher:
+    def __init__(self):
+        self.skill_variations = {
+            'javascript': ['js', 'javascript', 'ecmascript', 'node.js', 'nodejs'],
+            'python': ['python', 'py', 'python3'],
+            'typescript': ['typescript', 'ts'],
+            'kubernetes': ['kubernetes', 'k8s', 'kube'],
+            'postgresql': ['postgresql', 'postgres', 'psql'],
+            'ci/cd': ['ci/cd', 'cicd', 'continuous integration', 'continuous deployment'],
+            'docker': ['docker', 'containerization', 'containers'],
+            'aws': ['aws', 'amazon web services', 'amazon cloud'],
+            'react': ['react', 'reactjs', 'react.js'],
+            'angular': ['angular', 'angularjs', 'angular.js']
+        }
+        print("✅ Fuzzy matcher initialized with skill variations")
+    def fuzzy_skill_match(self, resume_skills, jd_skills, threshold=80):
+        """Find fuzzy matches between resume and JD skills"""
+        print("🔍 Running fuzzy skill matching...")
+        fuzzy_matches = []
+        matched_pairs = []
+        for jd_skill in jd_skills:
+            best_match = None
+            best_score = 0
+            for resume_skill in resume_skills:
+                # Direct fuzzy match
+                score = fuzz.ratio(jd_skill.lower(), resume_skill.lower())
+                if score > threshold and score > best_score:
+                    best_match = resume_skill
+                    best_score = score
+            # Check skill variations
+            if not best_match:
+                best_match, best_score = self._check_skill_variations(jd_skill, resume_skills)
+            if best_match and best_score > threshold:
+                fuzzy_matches.append(jd_skill)
+                matched_pairs.append({
+                    "jd_skill": jd_skill,
+                    "resume_skill": best_match,
+                    "confidence": round(best_score, 1)
+                })
+        return {
+            "fuzzy_matched_skills": fuzzy_matches,
+            "match_details": matched_pairs,
+            "fuzzy_score": len(fuzzy_matches)
+        }
+    def _check_skill_variations(self, jd_skill, resume_skills):
+        """Check if skill matches any known variations"""
+        jd_lower = jd_skill.lower()
+        # Check if JD skill is in our variations
+        for main_skill, variations in self.skill_variations.items():
+            if jd_lower in variations:
+                # Look for other variations in resume
+                for resume_skill in resume_skills:
+                    if resume_skill.lower() in variations:
+                        return resume_skill, 95  # High confidence for variation match
+        # Check reverse - if resume skill has variations
+        for resume_skill in resume_skills:
+            resume_lower = resume_skill.lower()
+            for main_skill, variations in self.skill_variations.items():
+                if resume_lower in variations and jd_lower in variations:
+                    return resume_skill, 90
+        return None, 0
+    def suggest_skill_improvements(self, missing_skills):
+        """Suggest skill variations that might be easier to learn"""
+        suggestions = []
+        for skill in missing_skills[:5]:  # Top 5 missing skills
+            skill_lower = skill.lower()
+            # Find related skills or easier alternatives
+            for main_skill, variations in self.skill_variations.items():
+                if skill_lower in variations:
+                    other_variations = [v for v in variations if v != skill_lower]
+                    if other_variations:
+                        suggestions.append({
+                            "missing_skill": skill,
+                            "alternatives": other_variations[:3],
+                            "suggestion": f"Consider learning {other_variations[0]} as an alternative to {skill}"
+                        })
+                    break
+        return suggestions
+# Test function
+def test_fuzzy_matcher():
+    """Test fuzzy matching functionality"""
+    matcher = FuzzyMatcher()
+    resume_skills = ["javascript", "python", "react", "nodejs", "aws"]
+    jd_skills = ["js", "python3", "reactjs", "node.js", "amazon web services", "docker"]
+    result = matcher.fuzzy_skill_match(resume_skills, jd_skills)
+    print(f"✅ Fuzzy matches found: {len(result['fuzzy_matched_skills'])}")
+    for match in result['match_details']:
+        print(f"   {match['jd_skill']} ↔ {match['resume_skill']} ({match['confidence']}%)")
+    return len(result['fuzzy_matched_skills']) > 0
+if __name__ == "__main__":
+    test_fuzzy_matcher()

matchers/hard_matcher.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# matchers/hard_matcher.py
+def calculate_hard_match_score(resume_skills, jd_skills):
+    """Calculate percentage match based on keyword overlap"""
+    if not jd_skills:  # avoid division by zero
+        return 0.0
+    matched_skills = set(resume_skills) & set(jd_skills)
+    total_jd_skills = len(set(jd_skills))
+    coverage_percentage = len(matched_skills) / total_jd_skills * 100
+    return {
+        "score": round(coverage_percentage, 2),
+        "matched_count": len(matched_skills),
+        "total_jd_skills": total_jd_skills,
+        "matched_skills": list(matched_skills),
+        "missing_skills": list(set(jd_skills) - set(resume_skills))
+    }
+def calculate_fuzzy_match(resume_text, jd_skills):
+    """Fuzzy matching for skill variations (JavaScript vs JS)"""
+    # Install: pip install rapidfuzz
+    from rapidfuzz import fuzz
+    resume_lower = resume_text.lower()
+    fuzzy_matches = []
+    for skill in jd_skills:
+        # Check if skill or common variations exist
+        variations = get_skill_variations(skill)
+        for variation in variations:
+            if fuzz.partial_ratio(variation, resume_lower) > 80:
+                fuzzy_matches.append(skill)
+                break
+    return list(set(fuzzy_matches))
+def get_skill_variations(skill):
+    """Common skill variations for fuzzy matching"""
+    variations = {
+        "javascript": ["js", "javascript", "node.js", "nodejs"],
+        "python": ["python", "py"],
+        "tensorflow": ["tensorflow", "tf"],
+        "kubernetes": ["kubernetes", "k8s"],
+        "postgresql": ["postgresql", "postgres", "psql"]
+    }
+    return variations.get(skill.lower(), [skill])

matchers/semantic_matcher.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# matchers/semantic_matcher.py - ENHANCED SEMANTIC MATCHER
+from sentence_transformers import SentenceTransformer, util
+import numpy as np
+class SemanticMatcher:
+    def __init__(self):
+        try:
+            # Using a lightweight, high-performance model
+            self.model = SentenceTransformer('all-MiniLM-L6-v2')
+            print("✅ Semantic matcher initialized with SentenceTransformer model")
+        except Exception as e:
+            print(f"⚠️ Could not load SentenceTransformer model: {e}")
+            print("   Install with: pip install sentence-transformers")
+            self.model = None
+    def calculate_semantic_similarity(self, text1: str, text2: str) -> dict:
+        """Calculate semantic similarity using sentence embeddings"""
+        if not self.model:
+            return {
+                "semantic_score": 0.0,
+                "error": "SentenceTransformer model not loaded"
+            }
+        try:
+            # Generate embeddings for both texts
+            embedding1 = self.model.encode(text1, convert_to_tensor=True)
+            embedding2 = self.model.encode(text2, convert_to_tensor=True)
+            # Calculate cosine similarity
+            cosine_score = util.pytorch_cos_sim(embedding1, embedding2)
+            return {
+                "semantic_score": round(float(cosine_score[0][0]) * 100, 2)
+            }
+        except Exception as e:
+            print(f"❌ Error during semantic similarity calculation: {e}")
+            return {"semantic_score": 0.0, "error": str(e)}

parsers/__iniy__.py ADDED Viewed

File without changes

parsers/__pycache__/cleaner.cpython-312.pyc ADDED Viewed

Binary file (520 Bytes). View file

parsers/__pycache__/docx_parser.cpython-312.pyc ADDED Viewed

Binary file (384 Bytes). View file

parsers/__pycache__/jd_parser.cpython-312.pyc ADDED Viewed

Binary file (902 Bytes). View file

parsers/__pycache__/job_requirement_parser.cpython-312.pyc ADDED Viewed

Binary file (20.2 kB). View file

parsers/__pycache__/pdf_parser.cpython-312.pyc ADDED Viewed

Binary file (1.54 kB). View file

parsers/__pycache__/section_splitter.cpython-312.pyc ADDED Viewed

Binary file (2.69 kB). View file

parsers/__pycache__/skill_extractor.cpython-312.pyc ADDED Viewed

Binary file (2.61 kB). View file

parsers/__pycache__/skills_list.cpython-312.pyc ADDED Viewed

Binary file (937 Bytes). View file

parsers/__pycache__/smart_skill_extractor.cpython-312.pyc ADDED Viewed

Binary file (12 kB). View file

parsers/cleaner.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import re
+def clean_text(text):
+    """Remove extra spaces, line breaks, normalize text"""
+    text = re.sub(r'\n+', '\n', text)   # collapse multiple newlines
+    text = re.sub(r'\s+', ' ', text)    # collapse multiple spaces
+    return text.strip()

parsers/docx_parser.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import docx2txt
+def extract_text_docx(file_path):
+    """Extract text from DOCX"""
+    return docx2txt.process(file_path)

parsers/entity_extractor.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# parsers/entity_extractor.py - Basic version
+import re
+class EntityExtractor:
+    def __init__(self):
+        print("✅ Entity extractor initialized (basic mode)")
+    def extract_skills_with_nlp(self, text):
+        """Basic entity extraction"""
+        return {
+            "persons": [],
+            "organizations": [],
+            "technologies": [],
+            "skills": [],
+            "locations": []
+        }
+    def extract_experience_years(self, text):
+        """Extract years of experience using regex"""
+        pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:experience|exp)'
+        matches = re.findall(pattern, text.lower())
+        years = [int(match) for match in matches if match.isdigit()]
+        return max(years) if years else 0
+    def extract_education_info(self, text):
+        """Extract education info"""
+        degrees = ["bachelor", "master", "phd", "b.tech", "m.tech"]
+        found_degrees = [degree for degree in degrees if degree in text.lower()]
+        return {
+            "degrees": found_degrees,
+            "fields": []
+        }

parsers/jd_parser.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import re
+from parsers.cleaner import clean_text
+from parsers.skill_extractor import extract_skills
+def parse_jd(file_text):
+    """Parse job description and extract role + skills"""
+    text = clean_text(file_text)
+    # Extract Job Role (look for keywords like "Job Title", "Role", "Position")
+    role_match = re.search(r"(job role|job title|position)\s*[:\-]\s*(.*)", text, re.I)
+    job_role = role_match.group(2).strip() if role_match else "Unknown"
+    # Extract skills
+    jd_skills = extract_skills(text)
+    return {
+        "role": job_role,
+        "skills": jd_skills,
+        "raw_text": text
+    }

parsers/job_requirement_parser.py ADDED Viewed

	@@ -0,0 +1,449 @@

+# parsers/job_requirement_parser.py - Advanced Job Requirement Analysis
+import re
+import json
+from typing import Dict, List, Tuple
+from dataclasses import dataclass
+from parsers.smart_skill_extractor import SmartSkillExtractor
+@dataclass
+class JobRequirement:
+    """Structured job requirement data"""
+    role_title: str
+    company: str
+    experience_required: str
+    education_required: List[str]
+    must_have_skills: List[str]
+    good_to_have_skills: List[str]
+    certifications: List[str]
+    responsibilities: List[str]
+    benefits: List[str]
+    location: str
+    employment_type: str
+    salary_range: str
+    industry: str
+    seniority_level: str
+class JobRequirementParser:
+    """Parse job descriptions to extract structured requirements"""
+    def __init__(self):
+        self.skill_extractor = SmartSkillExtractor()
+        self.patterns = self._initialize_patterns()
+        print("✅ Job Requirement Parser initialized")
+    def _initialize_patterns(self):
+        """Initialize regex patterns for job parsing"""
+        return {
+            'role_title': [
+                r'(?:job\s+title|position|role)[\s:]*([^\n.]{5,80})',
+                r'^([A-Z][\w\s,]+(?:engineer|developer|manager|analyst|specialist|coordinator))\b',
+                r'hiring\s+for[\s:]*([^\n.]{5,80})',
+            ],
+            'company': [
+                r'(?:company|organization)[\s:]*([^\n]+)',
+                r'(?:at|@)\s+([A-Z][a-zA-Z\s&,.-]+?)(?:\s|$)',
+            ],
+            'experience': [
+                r'(?:experience|exp)[\s:]*(\d+[\+\-]*\s*(?:to|\-)\s*\d+\s*years?|\d+\+?\s*years?)',
+                r'(\d+[\+\-]*)\s*(?:to|\-)\s*(\d+)\s*years?\s*(?:of\s+)?(?:experience|exp)',
+                r'minimum\s+(\d+\+?)\s*years?',
+                r'(\d+)\+?\s*years?\s+(?:of\s+)?(?:experience|exp)',
+            ],
+            'education': [
+                r'(?:education|degree|qualification)[\s:]*([^\n]+)',
+                r'(?:bachelor|master|phd|doctorate|diploma|b\.tech|m\.tech|bca|mca|bsc|msc)[\s\.]*([^\n]*)',
+                r'(?:degree\s+in|graduated\s+in)\s+([^\n]+)',
+            ],
+            'must_have': [
+                r'(?:must\s+have|required|mandatory|essential)[\s:]*([^.]+)',
+                r'(?:requirements|qualifications)[\s:]*([^.]+)',
+                r'(?:should\s+have|need\s+to\s+have)[\s:]*([^.]+)',
+            ],
+            'good_to_have': [
+                r'(?:good\s+to\s+have|nice\s+to\s+have|preferred|bonus|plus)[\s:]*([^.]+)',
+                r'(?:additional|optional)[\s:]*([^.]+)',
+            ],
+            'responsibilities': [
+                r'(?:responsibilities|duties|tasks)[\s:]*([^.]+)',
+                r'(?:you\s+will|role\s+involves)[\s:]*([^.]+)',
+            ],
+            'certifications': [
+                r'(?:certification|certified|certificate)[\s:]*([^.]+)',
+                r'(?:aws|azure|google\s+cloud|oracle|cisco|microsoft)\s+certified[\s:]*([^.]*)',
+            ],
+            'salary': [
+                r'(?:salary|compensation|package)[\s:]*([^.\n]+)',
+                r'(?:\$|₹|€|£)\s*([0-9,.-]+(?:\s*(?:to|\-)\s*[0-9,.-]+)?)',
+                r'([0-9,]+)\s*(?:to|\-)\s*([0-9,]+)\s*(?:per\s+)?(?:month|year|annum)',
+            ],
+            'location': [
+                r'(?:location|based\s+in|office)[\s:]*([^.\n]+)',
+                r'(?:remote|hybrid|onsite|work\s+from)[\s:]*([^.\n]*)',
+            ]
+        }
+    def parse_job_description(self, jd_text: str) -> JobRequirement:
+        """Parse job description into structured requirements"""
+        if not jd_text:
+            return self._create_empty_requirement()
+        print("🔍 Parsing job requirements...")
+        # Extract basic information
+        role_title = self._extract_role_title(jd_text)
+        company = self._extract_company(jd_text)
+        experience = self._extract_experience(jd_text)
+        education = self._extract_education(jd_text)
+        location = self._extract_location(jd_text)
+        salary = self._extract_salary(jd_text)
+        # Extract skills and requirements
+        must_have_skills, good_to_have_skills = self._extract_skills_by_priority(jd_text)
+        certifications = self._extract_certifications(jd_text)
+        responsibilities = self._extract_responsibilities(jd_text)
+        # Determine job characteristics
+        employment_type = self._determine_employment_type(jd_text)
+        industry = self._determine_industry(jd_text, role_title)
+        seniority_level = self._determine_seniority(role_title, experience)
+        job_req = JobRequirement(
+            role_title=role_title,
+            company=company,
+            experience_required=experience,
+            education_required=education,
+            must_have_skills=must_have_skills,
+            good_to_have_skills=good_to_have_skills,
+            certifications=certifications,
+            responsibilities=responsibilities,
+            benefits=[],  # Can be enhanced later
+            location=location,
+            employment_type=employment_type,
+            salary_range=salary,
+            industry=industry,
+            seniority_level=seniority_level
+        )
+        print(f"✅ Parsed job: {role_title} at {company}")
+        print(f"   📍 Location: {location}")
+        print(f"   💼 Experience: {experience}")
+        print(f"   🎯 Must-have skills: {len(must_have_skills)}")
+        print(f"   ⭐ Good-to-have skills: {len(good_to_have_skills)}")
+        return job_req
+    def _extract_role_title(self, text: str) -> str:
+        """Extract job role title"""
+        for pattern in self.patterns['role_title']:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+        # Fallback: look for common job titles
+        lines = text.split('\n')
+        for line in lines[:5]:  # Check first 5 lines
+            line = line.strip()
+            if any(title in line.lower() for title in
+                   ['engineer', 'developer', 'manager', 'analyst', 'specialist']):
+                return line
+        return "Unknown Role"
+    def _extract_company(self, text: str) -> str:
+        """Extract company name"""
+        for pattern in self.patterns['company']:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+        return "Unknown Company"
+    def _extract_experience(self, text: str) -> str:
+        """Extract experience requirements"""
+        for pattern in self.patterns['experience']:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group().strip()
+        # Look for fresher/entry level
+        if re.search(r'\b(?:fresher|entry\s+level|0\s+years?)\b', text, re.IGNORECASE):
+            return "0-1 years"
+        return "Not specified"
+    def _extract_education(self, text: str) -> List[str]:
+        """Extract education requirements"""
+        education = []
+        for pattern in self.patterns['education']:
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                education.append(match.group().strip())
+        # Common degree patterns
+        degree_patterns = [
+            r'\bb\.?tech\b', r'\bm\.?tech\b', r'\bbca\b', r'\bmca\b',
+            r'\bbsc\b', r'\bmsc\b', r'\bba\b', r'\bmba\b',
+            r'\bbachelor', r'\bmaster', r'\bphd\b', r'\bdoctorate\b'
+        ]
+        for pattern in degree_patterns:
+            if re.search(pattern, text, re.IGNORECASE):
+                match = re.search(pattern + r'[^.\n]*', text, re.IGNORECASE)
+                if match:
+                    education.append(match.group().strip())
+        return list(set(education)) if education else ["Any Graduate"]
+    def _extract_skills_by_priority(self, text: str) -> Tuple[List[str], List[str]]:
+        """Extract skills categorized by priority"""
+        # Use smart extractor to get all skills
+        all_skills = self.skill_extractor.extract_skills_comprehensive(text)
+        must_have = []
+        good_to_have = []
+        # Categorize based on context
+        text_lower = text.lower()
+        # Split text into sections
+        must_have_section = ""
+        good_to_have_section = ""
+        # Extract must-have skills
+        for pattern in self.patterns['must_have']:
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                must_have_section += " " + match.group(1)
+        # Extract good-to-have skills
+        for pattern in self.patterns['good_to_have']:
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                good_to_have_section += " " + match.group(1)
+        # Categorize skills
+        for skill in all_skills:
+            skill_lower = skill.lower()
+            # Check if skill is in must-have section
+            if skill_lower in must_have_section.lower():
+                must_have.append(skill)
+            # Check if skill is in good-to-have section
+            elif skill_lower in good_to_have_section.lower():
+                good_to_have.append(skill)
+            # Default categorization based on job requirements context
+            elif self._is_core_skill(skill, text):
+                must_have.append(skill)
+            else:
+                good_to_have.append(skill)
+        # Ensure no duplicates
+        must_have = list(set(must_have))
+        good_to_have = list(set(good_to_have) - set(must_have))
+        return must_have, good_to_have
+    def _is_core_skill(self, skill: str, text: str) -> bool:
+        """Determine if a skill is core based on frequency and context"""
+        skill_lower = skill.lower()
+        text_lower = text.lower()
+        # Count mentions
+        mentions = text_lower.count(skill_lower)
+        # Check for emphasis keywords around the skill
+        emphasis_patterns = [
+            rf'\b(?:required|must|essential|mandatory|need)\b[^.]*{re.escape(skill_lower)}',
+            rf'{re.escape(skill_lower)}[^.]*\b(?:required|must|essential|mandatory)\b',
+            rf'\b(?:experience|expertise|proficient)\b[^.]*{re.escape(skill_lower)}',
+            rf'{re.escape(skill_lower)}[^.]*\b(?:years?|experience)\b'
+        ]
+        for pattern in emphasis_patterns:
+            if re.search(pattern, text_lower):
+                return True
+        # If mentioned multiple times, likely core
+        return mentions >= 2
+    def _extract_certifications(self, text: str) -> List[str]:
+        """Extract certification requirements"""
+        certifications = []
+        for pattern in self.patterns['certifications']:
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                cert = match.group().strip()
+                if len(cert) > 5:  # Filter out too short matches
+                    certifications.append(cert)
+        return list(set(certifications))
+    def _extract_responsibilities(self, text: str) -> List[str]:
+        """Extract job responsibilities"""
+        responsibilities = []
+        for pattern in self.patterns['responsibilities']:
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                resp = match.group(1).strip()
+                # Split by bullet points or line breaks
+                resp_list = re.split(r'[•\-\*]\s*|\n', resp)
+                for r in resp_list:
+                    r = r.strip()
+                    if len(r) > 10:  # Filter meaningful responsibilities
+                        responsibilities.append(r)
+        return responsibilities[:10]  # Limit to top 10
+    def _extract_location(self, text: str) -> str:
+        """Extract job location"""
+        for pattern in self.patterns['location']:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+        # Look for city names (basic patterns)
+        city_pattern = r'\b(?:bangalore|mumbai|delhi|hyderabad|chennai|pune|kolkata|ahmedabad|remote|hybrid)\b'
+        match = re.search(city_pattern, text, re.IGNORECASE)
+        if match:
+            return match.group()
+        return "Not specified"
+    def _extract_salary(self, text: str) -> str:
+        """Extract salary information"""
+        for pattern in self.patterns['salary']:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group().strip()
+        return "Not specified"
+    def _determine_employment_type(self, text: str) -> str:
+        """Determine employment type"""
+        text_lower = text.lower()
+        if 'intern' in text_lower or 'internship' in text_lower:
+            return "Internship"
+        elif 'contract' in text_lower or 'freelance' in text_lower:
+            return "Contract"
+        elif 'part time' in text_lower or 'part-time' in text_lower:
+            return "Part-time"
+        else:
+            return "Full-time"
+    def _determine_industry(self, text: str, role_title: str) -> str:
+        """Determine industry based on job content"""
+        text_lower = (text + " " + role_title).lower()
+        industry_keywords = {
+            'Technology': ['software', 'tech', 'it', 'developer', 'engineer', 'programmer'],
+            'Finance': ['finance', 'banking', 'fintech', 'investment', 'trading'],
+            'Healthcare': ['healthcare', 'medical', 'hospital', 'pharma', 'clinical'],
+            'Education': ['education', 'teaching', 'learning', 'university', 'academic'],
+            'E-commerce': ['ecommerce', 'e-commerce', 'retail', 'shopping', 'marketplace'],
+            'Marketing': ['marketing', 'advertising', 'promotion', 'brand', 'digital marketing'],
+            'Consulting': ['consulting', 'advisory', 'strategy', 'management consulting'],
+            'Manufacturing': ['manufacturing', 'production', 'industrial', 'automotive'],
+        }
+        for industry, keywords in industry_keywords.items():
+            if any(keyword in text_lower for keyword in keywords):
+                return industry
+        return "General"
+    def _determine_seniority(self, role_title: str, experience: str) -> str:
+        """Determine seniority level"""
+        title_lower = role_title.lower()
+        if any(word in title_lower for word in ['senior', 'lead', 'principal', 'architect', 'manager']):
+            return "Senior"
+        elif any(word in title_lower for word in ['junior', 'associate', 'entry', 'trainee']):
+            return "Junior"
+        elif 'intern' in title_lower:
+            return "Intern"
+        else:
+            # Determine by experience
+            if '0' in experience or 'fresher' in experience.lower():
+                return "Entry Level"
+            elif any(num in experience for num in ['1', '2', '3']):
+                return "Mid Level"
+            else:
+                return "Senior"
+    def _create_empty_requirement(self) -> JobRequirement:
+        """Create empty job requirement for error cases"""
+        return JobRequirement(
+            role_title="Unknown Role",
+            company="Unknown Company",
+            experience_required="Not specified",
+            education_required=["Any Graduate"],
+            must_have_skills=[],
+            good_to_have_skills=[],
+            certifications=[],
+            responsibilities=[],
+            benefits=[],
+            location="Not specified",
+            employment_type="Full-time",
+            salary_range="Not specified",
+            industry="General",
+            seniority_level="Not specified"
+        )
+    def export_to_json(self, job_req: JobRequirement) -> str:
+        """Export job requirement to JSON"""
+        return json.dumps(job_req.__dict__, indent=2)
+# Test function
+def test_job_parser():
+    """Test the job requirement parser"""
+    parser = JobRequirementParser()
+    sample_jd = """
+    Senior Full Stack Developer - TechCorp Inc.
+    Location: Bangalore, India (Hybrid)
+    Experience: 3-5 years
+    Job Description:
+    We are looking for a Senior Full Stack Developer to join our growing team.
+    Must Have Requirements:
+    - 3+ years of experience in React.js and Node.js
+    - Proficiency in JavaScript, TypeScript
+    - Experience with MySQL and MongoDB
+    - Knowledge of AWS cloud services
+    - Bachelor's degree in Computer Science or related field
+    Good to Have:
+    - Experience with Docker and Kubernetes
+    - Knowledge of microservices architecture
+    - AWS certification preferred
+    - Experience with CI/CD pipelines
+    Responsibilities:
+    - Develop and maintain web applications
+    - Collaborate with cross-functional teams
+    - Write clean, maintainable code
+    - Participate in code reviews
+    Package: 8-12 LPA
+    """
+    job_req = parser.parse_job_description(sample_jd)
+    print("\n📋 Parsed Job Requirements:")
+    print(f"Role: {job_req.role_title}")
+    print(f"Company: {job_req.company}")
+    print(f"Must-have skills: {job_req.must_have_skills}")
+    print(f"Good-to-have skills: {job_req.good_to_have_skills}")
+    return len(job_req.must_have_skills) > 0
+if __name__ == "__main__":
+    test_job_parser()

parsers/pdf_parser.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import fitz  # PyMuPDF
+import pdfplumber
+import docx
+def extract_text_pymupdf(file_path):
+    """Extract text from PDF using PyMuPDF"""
+    text = ""
+    with fitz.open(file_path) as doc:
+        for page in doc:
+            text += page.get_text()
+    return text
+def extract_text_pdfplumber(file_path):
+    """Extract text from PDF using pdfplumber"""
+    text = ""
+    with pdfplumber.open(file_path) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text() or ""
+    return text
+def extract_text_docx(file_path):
+    """Extract text from DOCX using python-docx"""
+    doc = docx.Document(file_path)
+    text = "\n".join([para.text for para in doc.paragraphs])
+    return text

parsers/section_splitter.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# parsers/section_splitter.py - FIXED VERSION
+import re
+def split_sections(text):
+    """Split resume into sections like education, skills, experience"""
+    sections = {}
+    current_section = "general"
+    # Clean text first
+    text = text.replace('\n', ' ').strip()
+    # Split by common section headers (more comprehensive)
+    section_patterns = [
+        r'(professional\s+summary|summary|objective)',
+        r'(technical\s+skills|skills|core\s+competencies|technologies)',
+        r'(work\s+experience|experience|employment|professional\s+experience)',
+        r'(education|academic\s+background|qualifications)',
+        r'(projects|personal\s+projects|key\s+projects)',
+        r'(certifications|certificates|credentials)',
+        r'(achievements|accomplishments|awards)'
+    ]
+    # Find section boundaries
+    section_starts = []
+    for pattern in section_patterns:
+        matches = re.finditer(pattern, text, re.IGNORECASE)
+        for match in matches:
+            section_starts.append((match.start(), match.group().lower().strip()))
+    # Sort by position
+    section_starts.sort()
+    # Extract sections
+    if not section_starts:
+        # Fallback: if no clear sections, try to extract skills manually
+        sections["general"] = text
+        sections["skills"] = extract_skills_section_fallback(text)
+    else:
+        for i, (start_pos, section_name) in enumerate(section_starts):
+            # Determine end position
+            if i + 1 < len(section_starts):
+                end_pos = section_starts[i + 1][0]
+                section_text = text[start_pos:end_pos]
+            else:
+                section_text = text[start_pos:]
+            # Clean section name
+            clean_name = re.sub(r'[^\w\s]', '', section_name).strip()
+            sections[clean_name] = section_text.strip()
+    return sections
+def extract_skills_section_fallback(text):
+    """Fallback to extract skills when section detection fails"""
+    # Look for skills-related keywords
+    skills_indicators = [
+        r'programming languages?:?\s*([^.]*)',
+        r'technical skills?:?\s*([^.]*)',
+        r'technologies?:?\s*([^.]*)',
+        r'tools?:?\s*([^.]*)',
+        r'frameworks?:?\s*([^.]*)',
+        r'languages?:?\s*([^.]*)'
+    ]
+    skills_text = ""
+    for pattern in skills_indicators:
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        for match in matches:
+            skills_text += " " + match
+    return skills_text.strip() if skills_text else ""

parsers/skill_extractor.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# parsers/skill_extractor.py - ENHANCED VERSION
+import re
+from parsers.skills_list import skills
+def extract_skills(text):
+    """Extract known skills from text using dictionary matching"""
+    if not text:
+        return []
+    # Convert to lowercase for matching
+    text_lower = text.lower()
+    found_skills = []
+    # Enhanced skill extraction
+    for skill in skills:
+        skill_lower = skill.lower()
+        # Multiple matching strategies
+        patterns = [
+            rf'\b{re.escape(skill_lower)}\b',  # Exact word boundary match
+            rf'{re.escape(skill_lower)}(?:\.\s*js|js)',  # Handle variations like "node.js"
+            rf'{re.escape(skill_lower)}(?:\s*\.\s*\w+)?'  # Handle extensions
+        ]
+        for pattern in patterns:
+            if re.search(pattern, text_lower):
+                found_skills.append(skill)
+                break
+    # Additional extraction for common variations
+    skill_variations = {
+        'javascript': ['js', 'javascript', 'ecmascript'],
+        'python': ['python', 'py'],
+        'node.js': ['nodejs', 'node.js', 'node js'],
+        'postgresql': ['postgres', 'postgresql', 'psql'],
+        'kubernetes': ['k8s', 'kubernetes'],
+        'docker': ['docker', 'containerization'],
+        'ci/cd': ['ci/cd', 'cicd', 'continuous integration', 'continuous deployment']
+    }
+    for main_skill, variations in skill_variations.items():
+        for variation in variations:
+            if variation in text_lower and main_skill not in found_skills:
+                if main_skill in skills:  # Only add if it's in our skills list
+                    found_skills.append(main_skill)
+    # Remove duplicates and return
+    return list(set(found_skills))
+def debug_skills_extraction(text):
+    """Debug version to see what's happening"""
+    print(f"🔍 Text length: {len(text)}")
+    print(f"🔍 First 300 chars: {text[:300]}")
+    # Check for obvious skills manually
+    obvious_skills = ['python', 'javascript', 'react', 'node.js', 'aws', 'docker']
+    found_obvious = [skill for skill in obvious_skills if skill.lower() in text.lower()]
+    print(f"🔍 Obvious skills found: {found_obvious}")
+    skills_found = extract_skills(text)
+    print(f"🔍 Total skills extracted: {len(skills_found)}")
+    print(f"🔍 Skills: {skills_found}")
+    return skills_found

parsers/skills_list.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# parsers/skills_list.py
+skills = [
+    # Programming Languages
+    "python", "java", "javascript", "js", "typescript", "c++", "c#", "php", "go", "rust", "kotlin", "swift", "ruby",
+    # Web Technologies
+    "html", "css", "react", "angular", "vue", "nodejs", "node.js", "express", "django", "flask", "fastapi", "spring",
+    # Databases
+    "mysql", "postgresql", "postgres", "mongodb", "sqlite", "oracle", "redis", "cassandra", "dynamodb",
+    # Cloud & DevOps
+    "aws", "azure", "gcp", "google cloud", "docker", "kubernetes", "k8s", "terraform", "jenkins", "ci/cd", "cicd",
+    # Data Science & AI
+    "pandas", "numpy", "matplotlib", "seaborn", "scikit-learn", "sklearn", "tensorflow", "pytorch", "keras", "opencv",
+    # Frameworks & Libraries
+    "react", "angular", "vue", "jquery", "bootstrap", "tailwind", "material-ui", "redux",
+    # Tools & Technologies
+    "git", "github", "gitlab", "jira", "confluence", "slack", "trello", "figma", "photoshop",
+    # Operating Systems
+    "linux", "windows", "macos", "ubuntu", "centos",
+    # API & Protocols
+    "rest", "api", "graphql", "soap", "json", "xml", "http", "https",
+    # Testing
+    "junit", "pytest", "selenium", "cucumber", "postman", "jest",
+    # Methodologies
+    "agile", "scrum", "kanban", "devops", "microservices"
+]

parsers/smart_skill_extractor.py ADDED Viewed

	@@ -0,0 +1,244 @@

+# parsers/smart_skill_extractor.py - AI-Powered Skill Detection
+import re
+from collections import Counter
+class SmartSkillExtractor:
+    """AI-powered skill extraction that finds ANY skill mentioned in text"""
+    def __init__(self):
+        self.skill_database = self._load_comprehensive_skills()
+        self.patterns = self._create_extraction_patterns()
+        print(f"✅ Smart Skill Extractor loaded with {len(self.skill_database)} skills")
+    def _load_comprehensive_skills(self):
+        """Load comprehensive skill database covering all domains"""
+        # Programming Languages
+        programming = [
+            'python', 'java', 'javascript', 'typescript', 'c++', 'c#', 'c', 'php', 'ruby', 'go', 'rust',
+            'kotlin', 'swift', 'scala', 'r', 'matlab', 'perl', 'bash', 'powershell', 'sql', 'html',
+            'css', 'sass', 'less', 'coffeescript', 'dart', 'elixir', 'erlang', 'f#', 'haskell',
+            'julia', 'lua', 'objective-c', 'vb.net', 'assembly', 'cobol', 'fortran'
+        ]
+        # Frameworks & Libraries
+        frameworks = [
+            'react', 'angular', 'vue', 'svelte', 'ember', 'backbone', 'jquery', 'bootstrap', 'tailwind',
+            'django', 'flask', 'fastapi', 'express', 'nodejs', 'spring', 'hibernate', 'struts',
+            'rails', 'sinatra', 'laravel', 'symfony', 'codeigniter', 'asp.net', 'entity framework',
+            'xamarin', 'flutter', 'react native', 'ionic', 'cordova', 'electron', 'unity', 'unreal',
+            'tensorflow', 'pytorch', 'keras', 'scikit-learn', 'pandas', 'numpy', 'matplotlib',
+            'seaborn', 'plotly', 'opencv', 'nltk', 'spacy'
+        ]
+        # Databases
+        databases = [
+            'mysql', 'postgresql', 'mongodb', 'redis', 'cassandra', 'elasticsearch', 'neo4j',
+            'couchdb', 'dynamodb', 'firestore', 'sqlite', 'oracle', 'sql server', 'mariadb',
+            'influxdb', 'clickhouse', 'bigquery', 'snowflake', 'redshift'
+        ]
+        # Cloud & DevOps
+        cloud_devops = [
+            'aws', 'azure', 'gcp', 'docker', 'kubernetes', 'jenkins', 'gitlab ci', 'github actions',
+            'terraform', 'ansible', 'puppet', 'chef', 'vagrant', 'consul', 'vault', 'prometheus',
+            'grafana', 'elk stack', 'nginx', 'apache', 'tomcat',
+            'linux', 'ubuntu', 'centos', 'windows server', 'git', 'svn'
+        ]
+        # Data Science & AI
+        data_ai = [
+            'machine learning', 'deep learning', 'artificial intelligence', 'data science',
+            'data analysis', 'data mining', 'big data', 'analytics', 'statistics', 'regression',
+            'classification', 'clustering', 'nlp', 'computer vision', 'neural networks'
+        ]
+        # Business & Soft Skills
+        business_soft = [
+            'project management', 'agile', 'scrum', 'kanban', 'leadership', 'communication',
+            'teamwork', 'problem solving', 'time management', 'quality assurance',
+            'business analysis', 'user research', 'ux design', 'ui design'
+        ]
+        # Tools & Platforms
+        tools = [
+            'jira', 'confluence', 'slack', 'figma', 'photoshop', 'excel', 'powerpoint',
+            'salesforce', 'google analytics', 'seo', 'automation', 'crm', 'erp'
+        ]
+        # Combine all skills
+        all_skills = (programming + frameworks + databases + cloud_devops +
+                     data_ai + business_soft + tools)
+        # Create variations mapping
+        skill_variations = {}
+        for skill in all_skills:
+            variations = [skill, skill.replace(' ', ''), skill.replace(' ', '_'),
+                         skill.replace(' ', '-'), skill.upper(), skill.lower()]
+            # Add common abbreviations
+            abbreviations = {
+                'javascript': ['js', 'javascript'],
+                'typescript': ['ts', 'typescript'],
+                'artificial intelligence': ['ai', 'artificial intelligence'],
+                'machine learning': ['ml', 'machine learning'],
+                'amazon web services': ['aws', 'amazon web services'],
+                'google cloud platform': ['gcp', 'google cloud'],
+                'kubernetes': ['k8s', 'kubernetes'],
+                'user experience': ['ux', 'user experience'],
+                'user interface': ['ui', 'user interface'],
+                'structured query language': ['sql', 'structured query language'],
+                'cascading style sheets': ['css', 'cascading style sheets'],
+                'hypertext markup language': ['html', 'hypertext markup language']
+            }
+            skill_key = skill.lower()
+            if skill_key in abbreviations:
+                variations.extend(abbreviations[skill_key])
+            for var in variations:
+                if var and len(var) > 1:
+                    skill_variations[var.lower()] = skill
+        return skill_variations
+    def _create_extraction_patterns(self):
+        """Create regex patterns for skill extraction"""
+        return {
+            'experience_with': r'\b(?:experience|expertise|proficient|skilled)\s+(?:in|with|using)\s+([a-zA-Z+#.\s-]+)\b',
+            'years_exp': r'\b(\d+)\+?\s*(?:years?|yrs?)\s+(?:of\s+)?(?:experience|exp)\s+(?:in|with|using)\s+([a-zA-Z+#.\s-]+)\b',
+            'worked_with': r'\b(?:worked|working|used|using)\s+(?:with|on)?\s*([a-zA-Z+#.\s-]+)\b',
+            'technologies': r'\b(?:technologies|tools|frameworks|skills)[\s:]*([a-zA-Z+#.\s,-]+)\b',
+            'skills': r'\b(?:skills?|competencies)[\s:]*([a-zA-Z+#.\s,-]+)\b'
+        }
+    def extract_skills_comprehensive(self, text):
+        """Extract skills using multiple techniques"""
+        if not text or len(text.strip()) < 10:
+            return []
+        found_skills = set()
+        text_lower = text.lower()
+        # Method 1: Direct skill matching
+        for skill_variant, canonical_skill in self.skill_database.items():
+            if skill_variant in text_lower:
+                # Verify it's a whole word match
+                pattern = r'\b' + re.escape(skill_variant) + r'\b'
+                if re.search(pattern, text_lower):
+                    found_skills.add(canonical_skill)
+        # Method 2: Pattern-based extraction
+        for pattern_name, pattern in self.patterns.items():
+            matches = re.finditer(pattern, text_lower, re.IGNORECASE)
+            for match in matches:
+                if len(match.groups()) > 0 and match.group(1):
+                    # Clean and process the captured group
+                    skill_text = match.group(1).strip(' ,-')
+                    extracted_skills = self._process_skill_text(skill_text)
+                    found_skills.update(extracted_skills)
+        # Method 3: Context-based extraction
+        context_skills = self._extract_contextual_skills(text)
+        found_skills.update(context_skills)
+        return sorted(list(found_skills))
+    def _process_skill_text(self, skill_text):
+        """Process extracted skill text to find valid skills"""
+        skills = set()
+        # Split by common separators
+        parts = re.split(r'[,;/\|\n]', skill_text)
+        for part in parts:
+            part = part.strip(' ,-()[]{}')
+            if len(part) > 1:
+                # Check if it's in our skill database
+                part_lower = part.lower()
+                if part_lower in self.skill_database:
+                    skills.add(self.skill_database[part_lower])
+                # Check individual words
+                words = part.split()
+                for word in words:
+                    word = word.strip(' ,-()[]{}').lower()
+                    if word in self.skill_database:
+                        skills.add(self.skill_database[word])
+        return skills
+    def _extract_contextual_skills(self, text):
+        """Extract skills based on context clues"""
+        skills = set()
+        # Look for skills in specific sections
+        section_patterns = {
+            r'(?:technical\s+)?skills?[\s:]+([^.]+)': 'skills_section',
+            r'technologies?[\s:]+([^.]+)': 'tech_section',
+            r'tools?[\s:]+([^.]+)': 'tools_section'
+        }
+        for pattern, section_type in section_patterns.items():
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                if len(match.groups()) > 0:
+                    content = match.group(1)
+                    # Extract skills from this section
+                    section_skills = self._process_skill_text(content)
+                    skills.update(section_skills)
+        return skills
+    def get_skill_categories(self, skills):
+        """Categorize extracted skills"""
+        categories = {
+            'Programming Languages': [],
+            'Frameworks & Libraries': [],
+            'Databases': [],
+            'Cloud & DevOps': [],
+            'Data Science & AI': [],
+            'Business & Soft Skills': [],
+            'Tools & Platforms': []
+        }
+        # Simple categorization based on skill type
+        for skill in skills:
+            skill_lower = skill.lower()
+            if any(lang in skill_lower for lang in ['python', 'java', 'javascript', 'c++', 'php', 'ruby']):
+                categories['Programming Languages'].append(skill)
+            elif any(fw in skill_lower for fw in ['react', 'angular', 'django', 'spring', 'tensorflow']):
+                categories['Frameworks & Libraries'].append(skill)
+            elif any(db in skill_lower for db in ['mysql', 'mongodb', 'postgresql', 'redis']):
+                categories['Databases'].append(skill)
+            elif any(cloud in skill_lower for cloud in ['aws', 'azure', 'docker', 'kubernetes']):
+                categories['Cloud & DevOps'].append(skill)
+            elif any(ai in skill_lower for ai in ['machine learning', 'ai', 'data science', 'analytics']):
+                categories['Data Science & AI'].append(skill)
+            elif any(tool in skill_lower for tool in ['jira', 'figma', 'photoshop', 'excel']):
+                categories['Tools & Platforms'].append(skill)
+            else:
+                categories['Business & Soft Skills'].append(skill)
+        # Remove empty categories
+        return {k: v for k, v in categories.items() if v}
+# Test function
+def test_smart_extractor():
+    """Test the smart skill extractor"""
+    extractor = SmartSkillExtractor()
+    test_text = """
+    John Doe - Software Engineer
+    Skills: Python, JavaScript, React, MySQL, AWS
+    Experience: 3 years of experience in full-stack development
+    """
+    skills = extractor.extract_skills_comprehensive(test_text)
+    print(f"✅ Extracted {len(skills)} skills: {skills}")
+    return len(skills) > 0
+if __name__ == "__main__":
+    test_smart_extractor()

parsers/universal_parser.py ADDED Viewed

	@@ -0,0 +1,144 @@

+# parsers/universal_parser.py - Universal Resume Parser
+import os
+import re
+from pathlib import Path
+class UniversalResumeParser:
+    """Universal parser that handles multiple resume formats"""
+    def __init__(self):
+        self.supported_formats = {
+            '.pdf': self._extract_from_pdf,
+            '.docx': self._extract_from_docx,
+            '.txt': self._extract_from_txt,
+            '.doc': self._extract_from_doc
+        }
+        print("✅ Universal Resume Parser initialized")
+    def extract_text(self, file_path):
+        """Extract text from any supported file format"""
+        try:
+            file_ext = Path(file_path).suffix.lower()
+            if file_ext not in self.supported_formats:
+                # Fallback to text reading
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        return f.read()
+                except:
+                    raise ValueError(f"Unsupported format: {file_ext}")
+            print(f"🔍 Processing {file_ext} file...")
+            # Use appropriate extractor
+            extractor = self.supported_formats[file_ext]
+            text = extractor(file_path)
+            # Clean text
+            enhanced_text = self._enhance_extracted_text(text)
+            print(f"✅ Extracted {len(enhanced_text)} characters")
+            return enhanced_text
+        except Exception as e:
+            print(f"❌ Extraction failed: {e}")
+            # Try basic text reading as fallback
+            try:
+                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    return f.read()
+            except:
+                return f"Error extracting from {file_path}: {str(e)}"
+    def _extract_from_pdf(self, file_path):
+        """Extract from PDF using existing function"""
+        try:
+            from parsers.pdf_parser import extract_text_pymupdf
+            return extract_text_pymupdf(file_path)
+        except ImportError:
+            # Fallback if PyMuPDF not available
+            try:
+                import fitz
+                doc = fitz.open(file_path)
+                text = ""
+                for page in doc:
+                    text += page.get_text()
+                doc.close()
+                return text
+            except ImportError:
+                return "PDF extraction requires PyMuPDF package"
+        except Exception as e:
+            return f"PDF extraction error: {str(e)}"
+    def _extract_from_docx(self, file_path):
+        """Extract from DOCX using existing function"""
+        try:
+            from parsers.docx_parser import extract_text_docx
+            return extract_text_docx(file_path)
+        except ImportError:
+            try:
+                import docx
+                doc = docx.Document(file_path)
+                text = ""
+                for paragraph in doc.paragraphs:
+                    text += paragraph.text + "\n"
+                return text
+            except ImportError:
+                return "DOCX extraction requires python-docx package"
+        except Exception as e:
+            return f"DOCX extraction error: {str(e)}"
+    def _extract_from_txt(self, file_path):
+        """Extract from text file with encoding detection"""
+        encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
+        for encoding in encodings:
+            try:
+                with open(file_path, 'r', encoding=encoding) as f:
+                    return f.read()
+            except UnicodeDecodeError:
+                continue
+        # If all encodings fail
+        try:
+            with open(file_path, 'rb') as f:
+                raw_data = f.read()
+                return raw_data.decode('utf-8', errors='ignore')
+        except Exception as e:
+            return f"Text extraction error: {str(e)}"
+    def _extract_from_doc(self, file_path):
+        """Extract from legacy DOC format"""
+        try:
+            import docx2txt
+            text = docx2txt.process(file_path)
+            return text
+        except ImportError:
+            return "DOC format requires docx2txt package (pip install docx2txt)"
+        except Exception as e:
+            return f"DOC extraction error: {str(e)}"
+    def _enhance_extracted_text(self, text):
+        """Clean and enhance extracted text"""
+        if not text or len(text.strip()) < 10:
+            return text
+        # Remove excessive whitespace
+        text = re.sub(r'\n\s*\n', '\n\n', text)
+        text = re.sub(r'[ \t]+', ' ', text)
+        # Fix common extraction issues
+        text = re.sub(r'([a-zA-Z0-9._%+-]+)\s*@\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', r'\1@\2', text)
+        text = re.sub(r'(\d{3})\s*-?\s*(\d{3})\s*-?\s*(\d{4})', r'\1-\2-\3', text)
+        return text.strip()
+def test_universal_parser():
+    """Test the universal parser"""
+    parser = UniversalResumeParser()
+    test_text = "Test resume text"
+    enhanced = parser._enhance_extracted_text(test_text)
+    print("✅ Universal parser test completed")
+    return True
+if __name__ == "__main__":
+    test_universal_parser()

scoring/__pycache__/relevance_scorer.cpython-312.pyc ADDED Viewed

Binary file (12.3 kB). View file

scoring/relevance_scorer.py ADDED Viewed

	@@ -0,0 +1,314 @@

+# scoring/relevance_scorer.py - Job-Specific Resume Relevance Scoring
+from dataclasses import dataclass
+from typing import Dict, List, Tuple
+import re
+@dataclass
+class RelevanceScore:
+    """Structured relevance scoring result"""
+    overall_score: float  # 0-100
+    skill_match_score: float
+    experience_match_score: float
+    education_match_score: float
+    matched_must_have: List[str]
+    matched_good_to_have: List[str]
+    missing_must_have: List[str]
+    missing_good_to_have: List[str]
+    experience_gap: str
+    education_gap: List[str]
+    fit_verdict: str  # High/Medium/Low
+    confidence_score: float
+    improvement_suggestions: List[str]
+    quick_wins: List[str]
+    long_term_goals: List[str]
+class JobRelevanceScorer:
+    """Score resume relevance against specific job requirements"""
+    def __init__(self):
+        self.scoring_weights = {
+            'must_have_skills': 0.40,     # 40% weight
+            'experience': 0.25,           # 25% weight
+            'good_to_have_skills': 0.15,  # 15% weight
+            'education': 0.20             # 20% weight
+        }
+        print("✅ Job Relevance Scorer initialized")
+    def calculate_relevance(self, resume_text: str, job_req) -> RelevanceScore:
+        """Calculate comprehensive relevance score against job requirements"""
+        print(f"🎯 Scoring relevance for: {getattr(job_req, 'role_title', 'Unknown Role')}")
+        # Extract resume information
+        from parsers.smart_skill_extractor import SmartSkillExtractor
+        skill_extractor = SmartSkillExtractor()
+        resume_skills = skill_extractor.extract_skills_comprehensive(resume_text)
+        resume_experience = self._extract_experience_years(resume_text)
+        resume_education = self._extract_education_level(resume_text)
+        # Get job requirements
+        must_have_skills = getattr(job_req, 'must_have_skills', [])
+        good_to_have_skills = getattr(job_req, 'good_to_have_skills', [])
+        required_experience = getattr(job_req, 'experience_required', '')
+        required_education = getattr(job_req, 'education_required', [])
+        # Calculate component scores
+        skill_score, skill_matches = self._score_skills(
+            resume_skills, must_have_skills, good_to_have_skills
+        )
+        experience_score, exp_gap = self._score_experience(
+            resume_experience, required_experience
+        )
+        education_score, edu_gap = self._score_education(
+            resume_education, required_education
+        )
+        # Calculate weighted overall score
+        overall_score = (
+            skill_score * self.scoring_weights['must_have_skills'] +
+            experience_score * self.scoring_weights['experience'] +
+            education_score * self.scoring_weights['education']
+        )
+        # Add good-to-have bonus
+        good_to_have_bonus = len(skill_matches['matched_good_to_have']) * 2
+        overall_score = min(100, overall_score + good_to_have_bonus)
+        # Determine fit verdict
+        fit_verdict, confidence = self._determine_fit_verdict(
+            overall_score, skill_matches, experience_score
+        )
+        # Generate improvement suggestions
+        suggestions = self._generate_improvement_suggestions(
+            skill_matches, exp_gap, edu_gap, job_req
+        )
+        return RelevanceScore(
+            overall_score=round(overall_score, 1),
+            skill_match_score=round(skill_score, 1),
+            experience_match_score=round(experience_score, 1),
+            education_match_score=round(education_score, 1),
+            matched_must_have=skill_matches['matched_must_have'],
+            matched_good_to_have=skill_matches['matched_good_to_have'],
+            missing_must_have=skill_matches['missing_must_have'],
+            missing_good_to_have=skill_matches['missing_good_to_have'],
+            experience_gap=exp_gap,
+            education_gap=edu_gap,
+            fit_verdict=fit_verdict,
+            confidence_score=confidence,
+            improvement_suggestions=suggestions['main'],
+            quick_wins=suggestions['quick_wins'],
+            long_term_goals=suggestions['long_term']
+        )
+    def _score_skills(self, resume_skills: List[str], must_have: List[str],
+                     good_to_have: List[str]) -> Tuple[float, Dict]:
+        """Score skill matching against job requirements"""
+        resume_skills_lower = [skill.lower() for skill in resume_skills]
+        # Match must-have skills
+        matched_must_have = []
+        missing_must_have = []
+        for skill in must_have:
+            skill_lower = skill.lower()
+            if any(skill_lower in resume_skill for resume_skill in resume_skills_lower):
+                matched_must_have.append(skill)
+            else:
+                missing_must_have.append(skill)
+        # Match good-to-have skills
+        matched_good_to_have = []
+        missing_good_to_have = []
+        for skill in good_to_have:
+            skill_lower = skill.lower()
+            if any(skill_lower in resume_skill for resume_skill in resume_skills_lower):
+                matched_good_to_have.append(skill)
+            else:
+                missing_good_to_have.append(skill)
+        # Calculate skill score
+        if not must_have:
+            must_have_score = 100
+        else:
+            must_have_score = (len(matched_must_have) / len(must_have)) * 100
+        return must_have_score, {
+            'matched_must_have': matched_must_have,
+            'matched_good_to_have': matched_good_to_have,
+            'missing_must_have': missing_must_have,
+            'missing_good_to_have': missing_good_to_have
+        }
+    def _score_experience(self, resume_exp: int, required_exp: str) -> Tuple[float, str]:
+        """Score experience matching"""
+        req_years = self._parse_experience_requirement(required_exp)
+        if req_years is None:
+            return 100, "Experience requirement not specified"
+        if resume_exp >= req_years:
+            if resume_exp <= req_years + 2:
+                score = 100
+                gap = f"Perfect match ({resume_exp} years vs {req_years} required)"
+            else:
+                score = 95
+                gap = f"Overqualified ({resume_exp} years vs {req_years} required)"
+        else:
+            gap_years = req_years - resume_exp
+            if gap_years == 1:
+                score = 75
+                gap = f"1 year short ({resume_exp} years vs {req_years} required)"
+            elif gap_years == 2:
+                score = 50
+                gap = f"2 years short ({resume_exp} years vs {req_years} required)"
+            else:
+                score = 25
+                gap = f"{gap_years} years short ({resume_exp} years vs {req_years} required)"
+        return score, gap
+    def _score_education(self, resume_edu: List[str], required_edu: List[str]) -> Tuple[float, List[str]]:
+        """Score education matching"""
+        if not required_edu or "any graduate" in " ".join(required_edu).lower():
+            return 100, []
+        resume_edu_lower = [edu.lower() for edu in resume_edu]
+        matched = False
+        gaps = []
+        for req_edu in required_edu:
+            req_edu_lower = req_edu.lower()
+            found_match = False
+            for res_edu in resume_edu_lower:
+                if any(word in res_edu for word in req_edu_lower.split() if len(word) > 2):
+                    matched = True
+                    found_match = True
+                    break
+            if not found_match:
+                gaps.append(req_edu)
+        score = 100 if matched and not gaps else (80 if matched else 30)
+        return score, gaps
+    def _extract_experience_years(self, resume_text: str) -> int:
+        """Extract years of experience from resume"""
+        patterns = [
+            r'(\d+)[\+\s]*years?\s+(?:of\s+)?(?:experience|exp)',
+            r'(?:experience|exp)[\s:]*(\d+)[\+\s]*years?',
+            r'(\d+)[\+\s]*years?\s+(?:in|with)'
+        ]
+        years = []
+        for pattern in patterns:
+            matches = re.findall(pattern, resume_text, re.IGNORECASE)
+            years.extend([int(match) for match in matches if match.isdigit()])
+        return max(years) if years else 0
+    def _extract_education_level(self, resume_text: str) -> List[str]:
+        """Extract education from resume"""
+        patterns = [
+            r'bachelor[^.\n]*',
+            r'master[^.\n]*',
+            r'b\.?tech[^.\n]*',
+            r'm\.?tech[^.\n]*',
+            r'bca[^.\n]*',
+            r'mca[^.\n]*'
+        ]
+        education = []
+        for pattern in patterns:
+            matches = re.findall(pattern, resume_text, re.IGNORECASE)
+            education.extend(matches)
+        return education
+    def _parse_experience_requirement(self, exp_req: str) -> int:
+        """Parse experience requirement string to years"""
+        if not exp_req or exp_req.lower() == "not specified":
+            return None
+        numbers = re.findall(r'\d+', exp_req)
+        if not numbers:
+            return None
+        return int(numbers[0])
+    def _determine_fit_verdict(self, overall_score: float, skill_matches: Dict,
+                             experience_score: float) -> Tuple[str, float]:
+        """Determine fit verdict and confidence"""
+        must_have_count = len(skill_matches['matched_must_have']) + len(skill_matches['missing_must_have'])
+        must_have_ratio = len(skill_matches['matched_must_have']) / max(1, must_have_count)
+        confidence = min(100, (must_have_ratio * 50) + (experience_score * 0.3) + (overall_score * 0.2))
+        if overall_score >= 80 and must_have_ratio >= 0.8:
+            verdict = "High Suitability"
+        elif overall_score >= 60 and must_have_ratio >= 0.6:
+            verdict = "Medium Suitability"
+        elif overall_score >= 40:
+            verdict = "Low-Medium Suitability"
+        else:
+            verdict = "Low Suitability"
+        return verdict, round(confidence, 1)
+    def _generate_improvement_suggestions(self, skill_matches: Dict, exp_gap: str,
+                                        edu_gap: List[str], job_req) -> Dict[str, List[str]]:
+        """Generate personalized improvement suggestions"""
+        main_suggestions = []
+        quick_wins = []
+        long_term_goals = []
+        # Skill suggestions
+        missing_must_have = skill_matches['missing_must_have']
+        if missing_must_have:
+            main_suggestions.append(f"Acquire critical skills: {', '.join(missing_must_have[:3])}")
+            quick_wins.append(f"Start learning: {', '.join(missing_must_have[:2])}")
+        # Experience suggestions
+        if "short" in exp_gap:
+            quick_wins.append("Gain experience through projects and internships")
+        # Education suggestions
+        if edu_gap:
+            long_term_goals.append("Consider relevant degree or certification")
+        return {
+            'main': main_suggestions[:5],
+            'quick_wins': quick_wins[:5],
+            'long_term': long_term_goals[:3]
+        }
+def test_relevance_scorer():
+    """Test the relevance scorer"""
+    print("✅ Relevance scorer test completed")
+    return True
+if __name__ == "__main__":
+    test_relevance_scorer()