Gaykar commited on
Commit
73ee6c2
·
1 Parent(s): 2fc3031

Added to git

Browse files
app/agents/__init__.py ADDED
File without changes
app/agents/agents.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from app.schemas.pydanticschema import ResumeExtract,JobDescriptionExtract,SkillGapAnalysis
3
+
4
+ resume_agent=ChatGroq(
5
+ model="moonshotai/kimi-k2-instruct-0905",
6
+ temperature=0.2,
7
+ )
8
+
9
+
10
+ resume_agent=resume_agent.with_structured_output(
11
+
12
+ schema=ResumeExtract,
13
+ method="json_schema",
14
+ include_raw=True,
15
+ strict=True
16
+ )
17
+ jd_agent=ChatGroq(
18
+ model="meta-llama/llama-4-scout-17b-16e-instruct",
19
+ temperature=0.2,
20
+ )
21
+
22
+
23
+ jd_agent=jd_agent.with_structured_output(
24
+
25
+ schema=JobDescriptionExtract,
26
+ method="json_schema",
27
+ include_raw=True,
28
+ strict=True
29
+ )
30
+
31
+
32
+ gap_analysis_agent=ChatGroq(
33
+ model="moonshotai/kimi-k2-instruct-0905",
34
+ temperature=0.2,
35
+ )
36
+
37
+
38
+ gap_analysis_agent=gap_analysis_agent.with_structured_output(
39
+ schema=SkillGapAnalysis,
40
+ method="json_schema",
41
+ include_raw=True,
42
+ strict=True
43
+ )
44
+
45
+
46
+
47
+ roadmap_planner_agent=ChatGroq(
48
+ model="moonshotai/kimi-k2-instruct-0905",
49
+ temperature=0.2,
50
+ )
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
+
4
+ BASE_DIR = Path(__file__).resolve().parent.parent
5
+
6
+ class Settings(BaseSettings):
7
+ PROJECT_NAME: str = "Adaptive Onboarding Engine"
8
+
9
+ GROQ_API_KEY: str
10
+ PINECONE_API_KEY: str
11
+ CLOUDINARY_CLOUD_NAME: str
12
+ CLOUDINARY_API_KEY: str
13
+ CLOUDINARY_API_SECRET: str
14
+
15
+ model_config = SettingsConfigDict(
16
+ env_file=str(BASE_DIR / ".env"),
17
+ env_file_encoding="utf-8",
18
+ extra="ignore"
19
+ )
20
+
21
+ settings = Settings()
app/graph.py ADDED
File without changes
app/main.py ADDED
File without changes
app/nodes/__init__.py ADDED
File without changes
app/nodes/graphnodes.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from app.state.state import OnboardingState
3
+ from langchain_core.messages import SystemMessage, HumanMessage,ToolMessage,AIMessage
4
+ from app.prompts.resume_agent_prompt import resume_agent_prompt
5
+ from app.prompts.jd_agent_prompt import jd_agent_prompt
6
+ from app.prompts.roadmap_planner_agent_prompt import roadmap_planner_agent_prompt
7
+ from app.agents.agents import resume_agent,jd_agent,roadmap_planner_agent,gap_analysis_agent
8
+ from app.prompts.gap_analysis_agent_prompt import gap_analysis_agent_prompt
9
+ from app.schemas.pydanticschema import ResumeExtract,JobDescriptionExtract,SkillGapAnalysis
10
+ import json
11
+
12
+
13
+ from langchain_community.document_loaders import PyMuPDFLoader
14
+
15
+ def input_node(state: OnboardingState):
16
+
17
+ file_path = state.get("file_path")
18
+
19
+ if not file_path:
20
+ return {"extraction_error": "Missing file_path in state"}
21
+
22
+ try:
23
+ loader = PyMuPDFLoader(file_path)
24
+ docs = loader.load()
25
+
26
+
27
+ resume_text = "\n".join([doc.page_content for doc in docs])
28
+
29
+ return {
30
+ "resume_text": resume_text,
31
+ "extraction_error": None
32
+ }
33
+
34
+ except Exception as e:
35
+ return {
36
+ "extraction_error": f"Failed to load resume: {str(e)}"
37
+ }
38
+
39
+
40
+ def extractResumeDataNode(state: OnboardingState):
41
+
42
+ resume_text = state["resume_text"]
43
+
44
+ messages = [
45
+ SystemMessage(content=resume_agent_prompt),
46
+ HumanMessage(content=f"<resume_text>{resume_text}</resume_text>")
47
+ ]
48
+
49
+
50
+ result = resume_agent.invoke(messages)
51
+
52
+ return {"resume_data": result["parsed"]}
53
+
54
+
55
+ def extractJDDataNode(state: OnboardingState):
56
+ # 1. Safety Check: Is the text even in the state?
57
+ jd_text = state.get("job_description", "")
58
+
59
+ if not jd_text or len(jd_text.strip()) < 5:
60
+ print("DEBUGGER ERROR: job_description text is MISSING from state!")
61
+ return {"JobDescriptionExtract_data": JobDescriptionExtract()}
62
+
63
+ print(f"DEBUGGER: Sending {len(jd_text)} characters to JD Agent...")
64
+
65
+ messages = [
66
+ SystemMessage(content=jd_agent_prompt),
67
+ HumanMessage(content=f"EXTRACT FROM THIS TEXT:\n\n{jd_text}")
68
+ ]
69
+
70
+ try:
71
+ # 2. Invoke the agent
72
+ result = jd_agent.invoke(messages)
73
+
74
+ # 3. Handle the 'parsed' key (ensure your chain is configured correctly)
75
+ # If result is already the Pydantic object, use it directly.
76
+ # If result is a dict with 'parsed', use result['parsed'].
77
+ parsed_data = result.get("parsed") if isinstance(result, dict) else result
78
+
79
+ # 4. Critical Check: Did it actually find anything?
80
+ if parsed_data.job_title is None and parsed_data.tools_technologies is None:
81
+ print("DEBUGGER WARNING: LLM returned empty schema! Checking prompt...")
82
+ else:
83
+ print(f"DEBUGGER SUCCESS: Extracted {parsed_data.job_title}")
84
+
85
+ return {"JobDescriptionExtract_data": parsed_data}
86
+
87
+ except Exception as e:
88
+ print(f"DEBUGGER CRITICAL: Invoke failed: {str(e)}")
89
+ return {"JobDescriptionExtract_data": JobDescriptionExtract()}
90
+
91
+
92
+
93
+
94
+ def skill_gap_node(state: OnboardingState):
95
+
96
+ resume_data = state["resume_data"]
97
+ candidate_name = state["candidate_name"]
98
+
99
+ # To remove noise and reduce size of the prompt.
100
+ lean_resume_dict = resume_data.model_dump(
101
+ exclude={
102
+ "achievements": True, # Drops the entire achievements list
103
+ "skills": {"__all__": {"category"}}, # Drops 'category' from every skill
104
+ "experience": {"__all__": {"responsibilities"}}, # Drops bullet points
105
+ "projects": {"__all__": {"what_was_built"}}, # Drops project descriptions
106
+ "certifications": {"__all__": {"issuer"}} # Drops the issuer
107
+ },
108
+ exclude_none=True # Bonus: Automatically drops any fields that are None/null!
109
+ )
110
+
111
+ raw_jd = state["JobDescriptionExtract_data"]
112
+
113
+ # Strip the HR noise and text bloat
114
+ lean_jd_dict = raw_jd.model_dump(
115
+ exclude={
116
+ "company_name": True,
117
+ "location": True,
118
+ "employment_type": True,
119
+ "duration_months": True,
120
+ "responsibilities": True, # Dropping verbose bullet points
121
+ "requirements": True,
122
+ "constraints": True
123
+ },
124
+ exclude_none=True # Drops any null fields
125
+ )
126
+
127
+
128
+
129
+ lean_resume_json = json.dumps(lean_resume_dict, indent=2)
130
+
131
+
132
+ lean_jd_json = json.dumps(lean_jd_dict, indent=2)
133
+
134
+ messages = [
135
+ SystemMessage(content=gap_analysis_agent_prompt),
136
+ HumanMessage(content=f"Users Resume:<lean_resume_json>{lean_resume_json}</lean_resume_json> Job Description:<lean_jd_json>{lean_jd_json}</lean_jd_json>"),
137
+
138
+ ]
139
+
140
+
141
+ result = gap_analysis_agent.invoke(messages)
142
+
143
+ return {"skill_gap_analysis_data": result["parsed"]}
144
+
145
+
146
+
147
+
148
+ def roadmap_planning_node(state: OnboardingState):
149
+
150
+ """
151
+ The agent's 'thinking' node. It looks at the Skill Gaps and
152
+ decides which tool to call next.
153
+ """
154
+ skill_gap_data = state["skill_gap_analysis_data"]
155
+
156
+ skill_gap_data= skill_gap_data.model_dump()
157
+
158
+ system_prompt = SystemMessage(content=roadmap_planner_agent_prompt)
159
+ input_msg = HumanMessage(content=f"<skill_gap_data> {skill_gap_data} </skill_gap_data>")
160
+
161
+ response = roadmap_planner_agent.invoke([system_prompt, input_msg] + state["messages"])
162
+
163
+ return {"messages": [response]}
164
+
165
+
166
+ def finalize_state_node(state: OnboardingState):
167
+ """
168
+ Final node that extracts structured data from the message scratchpad
169
+ and populates the main state keys. No global variables needed!
170
+ """
171
+ final_roadmap = None
172
+ mermaid_code = None
173
+
174
+ # We search the messages in reverse to find the LATEST tool calls
175
+ for msg in reversed(state["messages"]):
176
+ # Check if the message has tool calls (this will be an AIMessage)
177
+ if hasattr(msg, "tool_calls") and msg.tool_calls:
178
+ for tool_call in msg.tool_calls:
179
+
180
+ # 1. Extract the Roadmap JSON
181
+ if tool_call["name"] == "submit_final_roadmap":
182
+ final_roadmap = tool_call["args"]
183
+
184
+ # 2. Extract the Mermaid String
185
+ elif tool_call["name"] == "submit_mermaid_visualization":
186
+ mermaid_code = tool_call["args"].get("mermaid_code")
187
+
188
+ # Once we have both, we can stop searching
189
+ if final_roadmap and mermaid_code:
190
+ break
191
+
192
+
193
+
194
+ return {
195
+ "final_roadmap": final_roadmap,
196
+ "mermaid_code": mermaid_code
197
+ }
app/prompts/__init__.py ADDED
File without changes
app/prompts/gap_analysis_agent_prompt.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gap_analysis_agent_prompt="""
2
+ <role>
3
+ You are an expert technical assessor and the core intelligence of an AI-driven, adaptive onboarding engine[cite: 5].
4
+ Your objective is to parse a new hire's current capabilities against a target job description and identify precise skill gaps to reach role-specific competency[cite: 5].
5
+ </role>
6
+
7
+ <context>
8
+ Current corporate onboarding utilizes static, "one-size-fits-all" curricula, resulting in significant inefficiencies[cite: 3].
9
+ Your ultimate goal is to solve this: you must ensure experienced hires do NOT waste time on known concepts, while ensuring beginners are NOT overwhelmed by advanced modules[cite: 3, 4].
10
+ </context>
11
+
12
+ <rules>
13
+ - Cross-reference the JD's `skills_required` and `tools_technologies` against the candidate's `skills_list`, `experience.technologies`, and `projects.technologies`.
14
+ - Identify Hard Gaps: Technologies explicitly required by the JD that are completely absent from the candidate's profile.
15
+ - Apply Adaptive Logic (Proficiency Gaps):
16
+ - For Experienced Hires: If they possess the skill, DO NOT flag it for basic training. Only flag a gap if they need an advanced, role-specific upgrade based on low duration of use.
17
+ - For Beginners/Freshers: Flag foundational gaps and prerequisites heavily to ensure they are prepared before tackling complex JD requirements.
18
+ - Keep skills atomic and highly specific (e.g., output "FastAPI" or "PostgreSQL", do NOT output vague terms like "Backend Frameworks").
19
+ - Do NOT hallucinate requirements that are not explicitly stated in the JD data.
20
+ - Do NOT attempt to build the curriculum or suggest courses yet. Your sole focus is diagnosing the gaps.
21
+ - Provide a concise `reasoning` string for each identified gap. This reasoning MUST justify why the gap exists based on the user's experience level to prove the adaptive logic.
22
+ </rules>
23
+ <output_format>
24
+ Return a valid JSON object only.
25
+ </output_format>
26
+
27
+
28
+ """
app/prompts/jd_agent_prompt.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ jd_agent_prompt ="""
2
+ <role>
3
+ You are a precise job description parser.
4
+ Extract structured information from the given job description.
5
+ </role>
6
+
7
+ <rules>
8
+ - Extract ONLY explicitly mentioned information. Do NOT infer or hallucinate.
9
+
10
+ - Follow the provided schema strictly.
11
+
12
+ - If a field is not present, return null (not empty list unless schema default applies).
13
+
14
+ - Keep skills atomic (e.g., Python, SQL, React).
15
+
16
+ - Do NOT mix fields:
17
+ - skills = only required skills
18
+ - responsibilities = what the candidate will do
19
+ - constraints = restrictions like location, duration, eligibility
20
+
21
+ - Convert durations like "6 months" into integer months.
22
+
23
+ - is_fresher_allowed:
24
+ - True only if explicitly allowed
25
+ - False only if explicitly restricted
26
+
27
+ </rules>
28
+
29
+ <output_format>
30
+ Return a valid JSON object only.
31
+ </output_format> """
app/prompts/resume_agent_prompt.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ resume_agent_prompt = """
3
+ <role>
4
+ You are a precise resume parser. Your only job is to extract structured information from a raw resume text.
5
+ </role>
6
+
7
+ <rules>
8
+ - Extract ONLY what is explicitly present in the resume. Do NOT infer or hallucinate missing fields.
9
+ - current_role: the job title stated at the top of the resume or most recent role. If the candidate is a student with no job, set it to "Student".
10
+ - is_fresher: set True ONLY if the candidate has zero professional work experience. Having projects or certifications does NOT make someone non-fresher.
11
+ - total_experience_years: total years of professional work only. Set 0.0 for freshers.
12
+ - skills: extract from the explicit skills section only. Do NOT pull skills from project descriptions here.
13
+ - experience: each role is a SEPARATE entry. Ignore company name. Focus on job_title, technologies used, and what they did or learned.
14
+ - projects: extract each project separately. Capture technologies and one line on what was built.
15
+ - certifications: extract ONLY if present. Set null if none found. Include topics the certification covers.
16
+ - achievements: extract ONLY if present. Set null if none found. Include the domain (e.g. Hackathon, Quiz, Competitive Programming).
17
+
18
+ </rules>
19
+
20
+ <output_format>
21
+ Return a single valid JSON object matching the schema. No extra text, no markdown, no explanation.
22
+ </output_format>
23
+
24
+
25
+ """
app/prompts/roadmap_planner_agent_prompt.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ roadmap_planner_agent_prompt="""
2
+ <role>
3
+ You are the "Architect of Growth," an expert technical roadmap planner.
4
+ Your objective is to transform a "Skill Gap Analysis" into a logically sequenced,
5
+ personalized learning journey that ensures "Role Competency" in the minimum time possible.
6
+ </role>
7
+
8
+ <logic_flow>
9
+ 1. ANALYZE GAPS: Review the identified skill gaps, their priority, and the 'gap_type' (foundation vs upgrade).
10
+ 2. INITIAL SEARCH (RAG): For every high/medium priority gap, call 'search_courses'.
11
+ - Match the 'level' and 'category' strictly.
12
+ 3. DEPENDENCY RESOLUTION (The "ID-Lookup" Step):
13
+ - For every course retrieved, inspect the 'prerequisites' field (list of IDs).
14
+ - CHECK: Does the 'resume_data' show the candidate already knows these prerequisites?
15
+ - IF NOT: You MUST call 'get_course_by_id' for each missing prerequisite ID.
16
+ - RECURSION: If the prerequisite itself has prerequisites, repeat this step until the path is complete.
17
+ 4. ADAPTIVE SEQUENCING:
18
+ - Always place Prerequisite modules BEFORE the target Skill Gap module.
19
+ - If 'is_fresher_adaptation_needed' is True, start the entire roadmap with the 'SOFT-AGILE-101' or similar professional module.
20
+ 5. JUSTIFY: For every course (including prerequisites), provide a unique 'reasoning' trace.
21
+ - Example for Prereq: "Added 'SQL Basics' because 'PostgreSQL Mastery' requires it, and your resume shows no prior database experience."
22
+ 6.after you have a complete roadmap, call 'submit_final_roadmap' and 'submit_mermaid_visualization'.
23
+ </logic_flow>
24
+
25
+ <constraints>
26
+ - STRICT ID USAGE: Use ONLY the 'course_id' returned by tools. Never guess an ID.
27
+ - REDUNDANCY CHECK: Do not assign a course if the candidate's projects or experience already prove mastery of that specific topic.
28
+ - PATH LENGTH: Prioritize the most critical 5-6 modules total to ensure the onboarding is high-impact and achievable.
29
+ </constraints>
30
+
31
+
32
+ <constraints>
33
+ - DO NOT provide a conversational response at the end.
34
+ - DO NOT just print JSON.
35
+ - You MUST call the 'submit_final_roadmap' and 'submit_mermaid_visualization' tool with the final plan.
36
+ - Ensure 'sequence_order' is 1, 2, 3...
37
+ </constraints>
38
+
39
+ <example_mermaid>
40
+ flowchart TD
41
+ A([Start — Rahul's current skills]):::start
42
+ subgraph W1["Week 1 — Core gaps"]
43
+ B[CS-DOCKER-101\nDocker & Containerization]:::gap
44
+ C[CS-PY-101\nPython Fundamentals]:::known
45
+ end
46
+ subgraph W2["Week 2 — Role readiness"]
47
+ D[CS-CICD-201\nCI/CD with GitHub Actions]:::gap
48
+ end
49
+ Z([Role-ready — DevOps Engineer]):::done
50
+ A --> B & C
51
+ B --> D
52
+ D --> Z
53
+ classDef gap fill:#EEEDFE,stroke:#534AB7,color:#26215C
54
+ classDef known fill:#E1F5EE,stroke:#0F6E56,color:#085041
55
+ classDef start fill:#1D9E75,stroke:#0F6E56,color:#E1F5EE
56
+ classDef done fill:#534AB7,stroke:#3C3489,color:#EEEDFE
57
+ </example_mermaid>
58
+
59
+
60
+ """
app/schemas/__init__.py ADDED
File without changes
app/schemas/pydanticschema.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Literal
3
+
4
+
5
+ class SkillRequirement(BaseModel):
6
+ name: str = Field(
7
+ ...,
8
+ description="Skill or technology required for the job (e.g., Python, SQL, React)"
9
+ )
10
+ level: Optional[str] = Field(
11
+ None,
12
+ description="Expected proficiency level: beginner | intermediate | strong"
13
+ )
14
+
15
+
16
+ class ResponsibilityItem(BaseModel):
17
+ description: str = Field(
18
+ ...,
19
+ description="Key responsibility or task expected from the candidate"
20
+ )
21
+
22
+
23
+ class RequirementItem(BaseModel):
24
+ description: str = Field(
25
+ ...,
26
+ description="Qualification or requirement such as education, availability, etc."
27
+ )
28
+
29
+
30
+ class ConstraintItem(BaseModel):
31
+ type: str = Field(
32
+ ...,
33
+ description="Constraint type such as location, duration, eligibility"
34
+ )
35
+ value: str = Field(
36
+ ...,
37
+ description="Constraint value (e.g., 'Pune only', '6 months', 'Fresher')"
38
+ )
39
+
40
+
41
+
42
+ class JobDescriptionExtract(BaseModel):
43
+ job_title: Optional[str] = Field(
44
+ None,
45
+ description="Job role/title (e.g., AI/ML Intern, Web Developer)"
46
+ )
47
+
48
+ company_name: Optional[str] = Field(
49
+ None,
50
+ description="Company offering the job"
51
+ )
52
+
53
+ location: Optional[str] = Field(
54
+ None,
55
+ description="Job location if specified"
56
+ )
57
+
58
+ employment_type: Optional[str] = Field(
59
+ None,
60
+ description="Type of job: internship, full-time, contract"
61
+ )
62
+
63
+ duration_months: Optional[int] = Field(
64
+ None,
65
+ description="Duration of role in months (for internships/contracts)"
66
+ )
67
+
68
+ is_fresher_allowed: Optional[bool] = Field(
69
+ None,
70
+ description="Whether freshers are eligible for this role"
71
+ )
72
+
73
+ skills_required: Optional[List[SkillRequirement]] = Field(
74
+ None,
75
+ description="List of required skills and expected levels"
76
+ )
77
+
78
+ tools_technologies: Optional[List[str]] = Field(
79
+ None,
80
+ description="Specific tools/frameworks mentioned (e.g., Pandas, WordPress)"
81
+ )
82
+
83
+ responsibilities: Optional[List[ResponsibilityItem]] = Field(
84
+ None,
85
+ description="Key job responsibilities"
86
+ )
87
+
88
+ requirements: Optional[List[RequirementItem]] = Field(
89
+ None,
90
+ description="General requirements like availability, qualifications"
91
+ )
92
+
93
+ constraints: Optional[List[ConstraintItem]] = Field(
94
+ None,
95
+ description="Special constraints like location restriction, duration, etc."
96
+ )
97
+
98
+
99
+
100
+ class Skill(BaseModel):
101
+ name: str = Field(..., description="Skill name e.g. Python, Docker")
102
+ category: Optional[str] = Field(
103
+ None, description="Category: Backend | ML | DevOps | Frontend | Other"
104
+ )
105
+
106
+
107
+ class ExperienceItem(BaseModel):
108
+ job_title: str = Field(
109
+ ...,
110
+ description="Role title of the candidate. Example: 'Backend Intern', 'Software Engineer'"
111
+ )
112
+
113
+ experience_type: Optional[Literal['internship', 'full_time', 'contract', 'freelance']] = Field(
114
+ None,
115
+ description="Type of experience: internship, full_time, contract, or freelance"
116
+ )
117
+
118
+ duration_months: Optional[int] = Field(
119
+ None,
120
+ description="Duration of this role in months. Null if not explicitly mentioned"
121
+ )
122
+
123
+ technologies: Optional[List[str]] = Field(
124
+ default_factory=list,
125
+ description="Technologies, tools, or frameworks used in this role"
126
+ )
127
+
128
+ responsibilities: Optional[List[str]] = Field(
129
+ default_factory=list,
130
+ description="Key responsibilities, tasks, or learnings in concise bullet points"
131
+ )
132
+
133
+ class ProjectItem(BaseModel):
134
+ name: str = Field(..., description="Project name")
135
+ technologies: List[str] = Field(
136
+ default_factory=list,
137
+ description="Technologies used in this project"
138
+ )
139
+ what_was_built: Optional[str] = Field(
140
+ None,
141
+ description="One line — what problem it solved or what was built"
142
+ )
143
+
144
+
145
+ class CertificationItem(BaseModel):
146
+ name: str = Field(..., description="Certification name")
147
+ issuer: Optional[str] = Field(None, description="Issuing organization")
148
+ topics_covered: List[str] = Field(
149
+ default_factory=list,
150
+ description="Key topics or skills the certification covers"
151
+ )
152
+
153
+
154
+ class AchievementItem(BaseModel):
155
+ title: str = Field(..., description="Achievement title")
156
+ domain: Optional[str] = Field(
157
+ None,
158
+ description="Domain of achievement e.g. Competitive Programming, Hackathon, Quiz"
159
+ )
160
+
161
+
162
+
163
+
164
+ class ResumeExtract(BaseModel):
165
+
166
+
167
+ job_title: Optional[str] = Field(
168
+ None,
169
+ description=(
170
+ "Primary job title or role of the candidate. "
171
+ "Examples: 'AI Engineer', 'Data Scientist', "
172
+ "'Construction Project Manager', 'Healthcare Representative'. "
173
+ "Should reflect the most recent or current role."
174
+ )
175
+ )
176
+
177
+
178
+
179
+
180
+ total_experience_months: Optional[int] = Field(
181
+ 0,
182
+ description=(
183
+ "Total professional work experience in months. "
184
+ "Includes internships and full-time roles. "
185
+ "0 if fresher or no experience found."
186
+ )
187
+ )
188
+
189
+
190
+
191
+ skills: List[Skill] = Field(
192
+ default_factory=list,
193
+ description="Skills explicitly listed by the candidate"
194
+ )
195
+ experience: List[ExperienceItem] = Field(
196
+ default_factory=list,
197
+ description=(
198
+ "Each role as a separate entry. "
199
+ "No company name needed — focus on what was done and learned."
200
+ )
201
+ )
202
+ projects: List[ProjectItem] = Field(
203
+ default_factory=list,
204
+ description="Projects with technologies used and what was built"
205
+ )
206
+ certifications: Optional[List[CertificationItem]] = Field(
207
+ None,
208
+ description="Certifications with topics they cover. None if not present."
209
+ )
210
+ achievements: Optional[List[AchievementItem]] = Field(
211
+ None,
212
+ description="Accomplishments that signal domain strength or soft skills. None if not present."
213
+ )
214
+
215
+
216
+ is_fresher: bool = Field(
217
+ ...,
218
+ description=(
219
+ "Set to True if the candidate lacks full-time professional employment. "
220
+ "Academic projects, certifications, and internships are considered "
221
+ "part of the learning phase and do not qualify a candidate as 'non-fresher' hence is_."
222
+ )
223
+ )
224
+
225
+
226
+
227
+ class SkillRequirement(BaseModel):
228
+ name: str = Field(
229
+ ...,
230
+ description="Skill or technology required for the job (e.g., Python, SQL, React)"
231
+ )
232
+ level: Optional[str] = Field(
233
+ None,
234
+ description="Expected proficiency level: beginner | intermediate | strong"
235
+ )
236
+
237
+
238
+ class ResponsibilityItem(BaseModel):
239
+ description: str = Field(
240
+ ...,
241
+ description="Key responsibility or task expected from the candidate"
242
+ )
243
+
244
+
245
+ class RequirementItem(BaseModel):
246
+ description: str = Field(
247
+ ...,
248
+ description="Qualification or requirement such as education, availability, etc."
249
+ )
250
+
251
+
252
+ class ConstraintItem(BaseModel):
253
+ type: str = Field(
254
+ ...,
255
+ description="Constraint type such as location, duration, eligibility"
256
+ )
257
+ value: str = Field(
258
+ ...,
259
+ description="Constraint value (e.g., 'Pune only', '6 months', 'Fresher')"
260
+ )
261
+
262
+
263
+
264
+ class JobDescriptionExtract(BaseModel):
265
+ job_title: Optional[str] = Field(
266
+ None,
267
+ description="Job role/title (e.g., AI/ML Intern, Web Developer)"
268
+ )
269
+
270
+ company_name: Optional[str] = Field(
271
+ None,
272
+ description="Company offering the job"
273
+ )
274
+
275
+ location: Optional[str] = Field(
276
+ None,
277
+ description="Job location if specified"
278
+ )
279
+
280
+ employment_type: Optional[str] = Field(
281
+ None,
282
+ description="Type of job: internship, full-time, contract"
283
+ )
284
+
285
+ duration_months: Optional[int] = Field(
286
+ None,
287
+ description="Duration of role in months (for internships/contracts)"
288
+ )
289
+
290
+ is_fresher_allowed: Optional[bool] = Field(
291
+ None,
292
+ description="Whether freshers are eligible for this role"
293
+ )
294
+
295
+ skills_required: Optional[List[SkillRequirement]] = Field(
296
+ None,
297
+ description="List of required skills and expected levels"
298
+ )
299
+
300
+ tools_technologies: Optional[List[str]] = Field(
301
+ None,
302
+ description="Specific tools/frameworks mentioned (e.g., Pandas, WordPress)"
303
+ )
304
+
305
+ responsibilities: Optional[List[ResponsibilityItem]] = Field(
306
+ None,
307
+ description="Key job responsibilities"
308
+ )
309
+
310
+ requirements: Optional[List[RequirementItem]] = Field(
311
+ None,
312
+ description="General requirements like availability, qualifications"
313
+ )
314
+
315
+ constraints: Optional[List[ConstraintItem]] = Field(
316
+ None,
317
+ description="Special constraints like location restriction, duration, etc."
318
+ )
319
+
320
+
321
+ class SkillGap(BaseModel):
322
+ skill_name: str = Field(
323
+ ...,
324
+ description="The specific technology or tool missing or requiring an upgrade (e.g., 'PostgreSQL')"
325
+ )
326
+
327
+ gap_type: Literal["missing_foundation", "needs_advanced_upgrade"] = Field(
328
+ ...,
329
+ description=(
330
+ "missing_foundation: Candidate has no recorded experience in this core requirement. "
331
+ "needs_advanced_upgrade: Candidate knows the basics but needs role-specific advanced training."
332
+ )
333
+ )
334
+
335
+ priority: Literal["high", "medium", "low"] = Field(
336
+ ...,
337
+ description="How critical this skill is for the target job role."
338
+ )
339
+
340
+ reasoning: str = Field(
341
+ ...,
342
+ description=(
343
+ "The 'Reasoning Trace'. This MUST be provided for every skill gap identified. "
344
+ "Explain exactly WHY this gap was flagged based on the resume vs JD comparison. "
345
+ "Example: 'JD requires FastAPI; candidate has Python experience but no record of using FastAPI framework.'"
346
+ )
347
+ )
348
+
349
+ target_competency: str = Field(
350
+ ...,
351
+ description="The specific outcome the candidate needs to reach (e.g., 'Build asynchronous database endpoints')"
352
+ )
353
+
354
+ class SkillGapAnalysis(BaseModel):
355
+ job_title: str = Field(..., description="The target role from the JD")
356
+ candidate_name: Optional[str] = Field(None, description="Extracted name from resume")
357
+
358
+ analyzed_gaps: List[SkillGap] = Field(
359
+ default_factory=list,
360
+ description="List of specific technical gaps found between Resume and JD"
361
+ )
362
+
363
+ is_fresher_adaptation_needed: bool = Field(
364
+ default=False,
365
+ description="True if foundational corporate/soft-skill modules should be added to the path."
366
+ )
367
+
368
+ executive_summary: str = Field(
369
+ ...,
370
+ description="A 2-3 sentence overview of the candidate's readiness and the primary focus of the onboarding."
371
+ )
372
+
373
+
374
+ class RoadmapStep(BaseModel):
375
+ course_id: str
376
+ title: str
377
+ reasoning: str = Field(..., description="Why this specific course was chosen for this user")
378
+ is_foundation: bool
379
+ sequence_order: int = Field(..., description="The order in which the course should be taken")
380
+
381
+ class LearningRoadmap(BaseModel):
382
+ candidate_name: str
383
+ target_role: str
384
+ roadmap: List[RoadmapStep]
385
+ onboarding_summary: str
app/state/__init__.py ADDED
File without changes
app/state/state.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional, Tuple,TypedDict,Literal
2
+ from typing import Annotated, Sequence
3
+ import os
4
+ from langchain_core.messages import SystemMessage, HumanMessage,ToolMessage,AIMessage
5
+ from langchain_core.tools import Tool
6
+ from langgraph.graph import StateGraph,END,START
7
+ from langgraph.types import interrupt
8
+ from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
9
+ from langchain_community.document_loaders import PyMuPDFLoader
10
+ from pydantic import BaseModel, Field
11
+ from typing import List, Optional
12
+ from pprint import pprint
13
+ from langchain_core.messages import BaseMessage
14
+ from langgraph.graph import add_messages
15
+ from app.schemas.pydanticschema import ResumeExtract,JobDescriptionExtract,SkillGapAnalysis
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+ class OnboardingState(TypedDict):
24
+ candidate_name: Optional[str]
25
+ resume_text: str
26
+ file_path: str
27
+ job_description: str
28
+ messages: Annotated[Sequence[BaseMessage], add_messages]
29
+
30
+ # Analysis & Extraction Data
31
+ skill_gap_analysis_data: Optional[SkillGapAnalysis]
32
+ resume_data: Optional[ResumeExtract]
33
+ extraction_error: Optional[str]
34
+ JobDescriptionExtract_data: Optional[JobDescriptionExtract]
35
+
36
+ # --- NEW KEYS FOR OUTPUT ---
37
+ mermaid_code: Optional[str] # Stores the Mermaid visualization string
38
+ final_roadmap: Optional[Dict] # Stores the final structured JSON roadmap
app/tools/__init__.py ADDED
File without changes
app/utils/__init__.py ADDED
File without changes
app/utils/vectordatabase.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pinecone import Pinecone, ServerlessSpec
2
+ from pinecone_text.sparse import BM25Encoder
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from langchain_community.retrievers import PineconeHybridSearchRetriever
6
+ import torch
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain_community.schema import Document
9
+
10
+
11
+
12
+ device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": device})
14
+
15
+
16
+ load_dotenv()
17
+
18
+
19
+
20
+
21
+
22
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
23
+ pc = Pinecone(api_key=PINECONE_API_KEY)
24
+
25
+ index_name = "catalog-embeddings"
26
+
27
+
28
+ # Create index if not exists
29
+ if index_name not in pc.list_indexes().names():
30
+ pc.create_index(
31
+ name=index_name,
32
+ dimension=384,
33
+ metric="dotproduct",
34
+ spec=ServerlessSpec(
35
+ cloud="aws",
36
+ region="us-east-1"
37
+ )
38
+ )
39
+ print("Index created.")
40
+
41
+ index = pc.Index(index_name)
42
+ print("Index ready:", index.describe_index_stats())
43
+
44
+ bm25_encoder = BM25Encoder()
45
+
46
+ bm25_encoder.fit([doc.page_content for doc in documents])
47
+
48
+ retriever = PineconeHybridSearchRetriever(
49
+ embeddings=embeddings,
50
+ sparse_encoder=bm25_encoder,
51
+ index=index
52
+ )
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain==1.2.10
2
+ pydantic==2.11.7
3
+ langchain_huggingface
4
+ langchain-groq==1.1.1
5
+ pinecone==8.0.0
6
+ langchain_community==0.4.1
7
+ fastapi==0.118.1
8
+ uvicorn
9
+ pinecone-text