Spaces:

Gaykar
/

AdaptiveEngineService

Sleeping

App Files Files Community

Gaykar commited on Mar 21

Commit

73ee6c2

1 Parent(s): 2fc3031

Added to git

Browse files

Files changed (21) hide show

app/agents/__init__.py +0 -0
app/agents/agents.py +50 -0
app/core/__init__.py +0 -0
app/core/config.py +21 -0
app/graph.py +0 -0
app/main.py +0 -0
app/nodes/__init__.py +0 -0
app/nodes/graphnodes.py +197 -0
app/prompts/__init__.py +0 -0
app/prompts/gap_analysis_agent_prompt.py +28 -0
app/prompts/jd_agent_prompt.py +31 -0
app/prompts/resume_agent_prompt.py +25 -0
app/prompts/roadmap_planner_agent_prompt.py +60 -0
app/schemas/__init__.py +0 -0
app/schemas/pydanticschema.py +385 -0
app/state/__init__.py +0 -0
app/state/state.py +38 -0
app/tools/__init__.py +0 -0
app/utils/__init__.py +0 -0
app/utils/vectordatabase.py +52 -0
requirements.txt +9 -0

app/agents/__init__.py ADDED Viewed

File without changes

app/agents/agents.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from langchain_groq import ChatGroq
+from app.schemas.pydanticschema import ResumeExtract,JobDescriptionExtract,SkillGapAnalysis
+resume_agent=ChatGroq(
+    model="moonshotai/kimi-k2-instruct-0905",
+    temperature=0.2,
+)
+resume_agent=resume_agent.with_structured_output(
+    schema=ResumeExtract,
+    method="json_schema",
+    include_raw=True,
+    strict=True
+)
+jd_agent=ChatGroq(
+    model="meta-llama/llama-4-scout-17b-16e-instruct",
+    temperature=0.2,
+)
+jd_agent=jd_agent.with_structured_output(
+    schema=JobDescriptionExtract,
+    method="json_schema",
+    include_raw=True,
+    strict=True
+)
+gap_analysis_agent=ChatGroq(
+    model="moonshotai/kimi-k2-instruct-0905",
+    temperature=0.2,
+)
+gap_analysis_agent=gap_analysis_agent.with_structured_output(
+    schema=SkillGapAnalysis,
+    method="json_schema",
+    include_raw=True,
+    strict=True
+)
+roadmap_planner_agent=ChatGroq(
+    model="moonshotai/kimi-k2-instruct-0905",
+    temperature=0.2,
+)

app/core/__init__.py ADDED Viewed

File without changes

app/core/config.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from pathlib import Path
+from pydantic_settings import BaseSettings, SettingsConfigDict
+BASE_DIR = Path(__file__).resolve().parent.parent
+class Settings(BaseSettings):
+    PROJECT_NAME: str = "Adaptive Onboarding Engine"
+    GROQ_API_KEY: str
+    PINECONE_API_KEY: str
+    CLOUDINARY_CLOUD_NAME: str
+    CLOUDINARY_API_KEY: str
+    CLOUDINARY_API_SECRET: str
+    model_config = SettingsConfigDict(
+        env_file=str(BASE_DIR / ".env"),
+        env_file_encoding="utf-8",
+        extra="ignore"
+    )
+settings = Settings()

app/graph.py ADDED Viewed

File without changes

app/main.py ADDED Viewed

File without changes

app/nodes/__init__.py ADDED Viewed

File without changes

app/nodes/graphnodes.py ADDED Viewed

	@@ -0,0 +1,197 @@

+from app.state.state import OnboardingState
+from langchain_core.messages import SystemMessage, HumanMessage,ToolMessage,AIMessage
+from app.prompts.resume_agent_prompt import resume_agent_prompt
+from app.prompts.jd_agent_prompt import jd_agent_prompt
+from app.prompts.roadmap_planner_agent_prompt import roadmap_planner_agent_prompt
+from app.agents.agents import resume_agent,jd_agent,roadmap_planner_agent,gap_analysis_agent
+from app.prompts.gap_analysis_agent_prompt import gap_analysis_agent_prompt
+from app.schemas.pydanticschema import ResumeExtract,JobDescriptionExtract,SkillGapAnalysis
+import json
+from langchain_community.document_loaders import PyMuPDFLoader
+def input_node(state: OnboardingState):
+    file_path = state.get("file_path")
+    if not file_path:
+        return {"extraction_error": "Missing file_path in state"}
+    try:
+        loader = PyMuPDFLoader(file_path)
+        docs = loader.load()
+        resume_text = "\n".join([doc.page_content for doc in docs])
+        return {
+            "resume_text": resume_text,
+            "extraction_error": None
+        }
+    except Exception as e:
+        return {
+            "extraction_error": f"Failed to load resume: {str(e)}"
+        }
+def extractResumeDataNode(state: OnboardingState):
+    resume_text = state["resume_text"]
+    messages = [
+        SystemMessage(content=resume_agent_prompt),
+        HumanMessage(content=f"<resume_text>{resume_text}</resume_text>")
+    ]
+    result = resume_agent.invoke(messages)
+    return {"resume_data": result["parsed"]}
+def extractJDDataNode(state: OnboardingState):
+    # 1. Safety Check: Is the text even in the state?
+    jd_text = state.get("job_description", "")
+    if not jd_text or len(jd_text.strip()) < 5:
+        print("DEBUGGER ERROR: job_description text is MISSING from state!")
+        return {"JobDescriptionExtract_data": JobDescriptionExtract()}
+    print(f"DEBUGGER: Sending {len(jd_text)} characters to JD Agent...")
+    messages = [
+        SystemMessage(content=jd_agent_prompt),
+        HumanMessage(content=f"EXTRACT FROM THIS TEXT:\n\n{jd_text}")
+    ]
+    try:
+        # 2. Invoke the agent
+        result = jd_agent.invoke(messages)
+        # 3. Handle the 'parsed' key (ensure your chain is configured correctly)
+        # If result is already the Pydantic object, use it directly.
+        # If result is a dict with 'parsed', use result['parsed'].
+        parsed_data = result.get("parsed") if isinstance(result, dict) else result
+        # 4. Critical Check: Did it actually find anything?
+        if parsed_data.job_title is None and parsed_data.tools_technologies is None:
+            print("DEBUGGER WARNING: LLM returned empty schema! Checking prompt...")
+        else:
+            print(f"DEBUGGER SUCCESS: Extracted {parsed_data.job_title}")
+        return {"JobDescriptionExtract_data": parsed_data}
+    except Exception as e:
+        print(f"DEBUGGER CRITICAL: Invoke failed: {str(e)}")
+        return {"JobDescriptionExtract_data": JobDescriptionExtract()}
+def skill_gap_node(state: OnboardingState):
+    resume_data = state["resume_data"]
+    candidate_name = state["candidate_name"]
+    # To remove noise and reduce size  of the prompt.
+    lean_resume_dict = resume_data.model_dump(
+        exclude={
+            "achievements": True, # Drops the entire achievements list
+            "skills": {"__all__": {"category"}}, # Drops 'category' from every skill
+            "experience": {"__all__": {"responsibilities"}}, # Drops bullet points
+            "projects": {"__all__": {"what_was_built"}}, # Drops project descriptions
+            "certifications": {"__all__": {"issuer"}} # Drops the issuer
+        },
+        exclude_none=True # Bonus: Automatically drops any fields that are None/null!
+    )
+    raw_jd = state["JobDescriptionExtract_data"]
+    # Strip the HR noise and text bloat
+    lean_jd_dict = raw_jd.model_dump(
+        exclude={
+            "company_name": True,
+            "location": True,
+            "employment_type": True,
+            "duration_months": True,
+            "responsibilities": True, # Dropping verbose bullet points
+            "requirements": True,
+            "constraints": True
+        },
+        exclude_none=True # Drops any null fields
+    )
+    lean_resume_json = json.dumps(lean_resume_dict, indent=2)
+    lean_jd_json = json.dumps(lean_jd_dict, indent=2)
+    messages = [
+        SystemMessage(content=gap_analysis_agent_prompt),
+        HumanMessage(content=f"Users Resume:<lean_resume_json>{lean_resume_json}</lean_resume_json> Job Description:<lean_jd_json>{lean_jd_json}</lean_jd_json>"),
+    ]
+    result = gap_analysis_agent.invoke(messages)
+    return {"skill_gap_analysis_data": result["parsed"]}
+def roadmap_planning_node(state: OnboardingState):
+    """
+    The agent's 'thinking' node. It looks at the Skill Gaps and
+    decides which tool to call next.
+    """
+    skill_gap_data = state["skill_gap_analysis_data"]
+    skill_gap_data= skill_gap_data.model_dump()
+    system_prompt = SystemMessage(content=roadmap_planner_agent_prompt)
+    input_msg = HumanMessage(content=f"<skill_gap_data> {skill_gap_data} </skill_gap_data>")
+    response = roadmap_planner_agent.invoke([system_prompt, input_msg] + state["messages"])
+    return {"messages": [response]}
+def finalize_state_node(state: OnboardingState):
+    """
+    Final node that extracts structured data from the message scratchpad
+    and populates the main state keys. No global variables needed!
+    """
+    final_roadmap = None
+    mermaid_code = None
+    # We search the messages in reverse to find the LATEST tool calls
+    for msg in reversed(state["messages"]):
+        # Check if the message has tool calls (this will be an AIMessage)
+        if hasattr(msg, "tool_calls") and msg.tool_calls:
+            for tool_call in msg.tool_calls:
+                # 1. Extract the Roadmap JSON
+                if tool_call["name"] == "submit_final_roadmap":
+                    final_roadmap = tool_call["args"]
+                # 2. Extract the Mermaid String
+                elif tool_call["name"] == "submit_mermaid_visualization":
+                    mermaid_code = tool_call["args"].get("mermaid_code")
+        # Once we have both, we can stop searching
+        if final_roadmap and mermaid_code:
+            break
+    return {
+        "final_roadmap": final_roadmap,
+        "mermaid_code": mermaid_code
+    }

app/prompts/__init__.py ADDED Viewed

File without changes

app/prompts/gap_analysis_agent_prompt.py ADDED Viewed

	@@ -0,0 +1,28 @@

+gap_analysis_agent_prompt="""
+<role>
+You are an expert technical assessor and the core intelligence of an AI-driven, adaptive onboarding engine[cite: 5].
+Your objective is to parse a new hire's current capabilities against a target job description and identify precise skill gaps to reach role-specific competency[cite: 5].
+</role>
+<context>
+Current corporate onboarding utilizes static, "one-size-fits-all" curricula, resulting in significant inefficiencies[cite: 3].
+Your ultimate goal is to solve this: you must ensure experienced hires do NOT waste time on known concepts, while ensuring beginners are NOT overwhelmed by advanced modules[cite: 3, 4].
+</context>
+<rules>
+- Cross-reference the JD's `skills_required` and `tools_technologies` against the candidate's `skills_list`, `experience.technologies`, and `projects.technologies`.
+- Identify Hard Gaps: Technologies explicitly required by the JD that are completely absent from the candidate's profile.
+- Apply Adaptive Logic (Proficiency Gaps):
+  - For Experienced Hires: If they possess the skill, DO NOT flag it for basic training. Only flag a gap if they need an advanced, role-specific upgrade based on low duration of use.
+  - For Beginners/Freshers: Flag foundational gaps and prerequisites heavily to ensure they are prepared before tackling complex JD requirements.
+- Keep skills atomic and highly specific (e.g., output "FastAPI" or "PostgreSQL", do NOT output vague terms like "Backend Frameworks").
+- Do NOT hallucinate requirements that are not explicitly stated in the JD data.
+- Do NOT attempt to build the curriculum or suggest courses yet. Your sole focus is diagnosing the gaps.
+- Provide a concise `reasoning` string for each identified gap. This reasoning MUST justify why the gap exists based on the user's experience level to prove the adaptive logic.
+</rules>
+<output_format>
+Return a valid JSON object only.
+</output_format>
+"""

app/prompts/jd_agent_prompt.py ADDED Viewed

	@@ -0,0 +1,31 @@

+jd_agent_prompt ="""
+<role>
+You are a precise job description parser.
+Extract structured information from the given job description.
+</role>
+<rules>
+- Extract ONLY explicitly mentioned information. Do NOT infer or hallucinate.
+- Follow the provided schema strictly.
+- If a field is not present, return null (not empty list unless schema default applies).
+- Keep skills atomic (e.g., Python, SQL, React).
+- Do NOT mix fields:
+  - skills = only required skills
+  - responsibilities = what the candidate will do
+  - constraints = restrictions like location, duration, eligibility
+- Convert durations like "6 months" into integer months.
+- is_fresher_allowed:
+  - True only if explicitly allowed
+  - False only if explicitly restricted
+</rules>
+<output_format>
+Return a valid JSON object only.
+</output_format> """

app/prompts/resume_agent_prompt.py ADDED Viewed

	@@ -0,0 +1,25 @@

+resume_agent_prompt = """
+<role>
+You are a precise resume parser. Your only job is to extract structured information from a raw resume text.
+</role>
+<rules>
+- Extract ONLY what is explicitly present in the resume. Do NOT infer or hallucinate missing fields.
+- current_role: the job title stated at the top of the resume or most recent role. If the candidate is a student with no job, set it to "Student".
+- is_fresher: set True ONLY if the candidate has zero professional work experience. Having projects or certifications does NOT make someone non-fresher.
+- total_experience_years: total years of professional work only. Set 0.0 for freshers.
+- skills: extract from the explicit skills section only. Do NOT pull skills from project descriptions here.
+- experience: each role is a SEPARATE entry. Ignore company name. Focus on job_title, technologies used, and what they did or learned.
+- projects: extract each project separately. Capture technologies and one line on what was built.
+- certifications: extract ONLY if present. Set null if none found. Include topics the certification covers.
+- achievements: extract ONLY if present. Set null if none found. Include the domain (e.g. Hackathon, Quiz, Competitive Programming).
+</rules>
+<output_format>
+Return a single valid JSON object matching the schema. No extra text, no markdown, no explanation.
+</output_format>
+"""

app/prompts/roadmap_planner_agent_prompt.py ADDED Viewed

	@@ -0,0 +1,60 @@

+roadmap_planner_agent_prompt="""
+<role>
+You are the "Architect of Growth," an expert technical roadmap planner.
+Your objective is to transform a "Skill Gap Analysis" into a logically sequenced,
+personalized learning journey that ensures "Role Competency" in the minimum time possible.
+</role>
+<logic_flow>
+1. ANALYZE GAPS: Review the identified skill gaps, their priority, and the 'gap_type' (foundation vs upgrade).
+2. INITIAL SEARCH (RAG): For every high/medium priority gap, call 'search_courses'.
+   - Match the 'level' and 'category' strictly.
+3. DEPENDENCY RESOLUTION (The "ID-Lookup" Step):
+   - For every course retrieved, inspect the 'prerequisites' field (list of IDs).
+   - CHECK: Does the 'resume_data' show the candidate already knows these prerequisites?
+   - IF NOT: You MUST call 'get_course_by_id' for each missing prerequisite ID.
+   - RECURSION: If the prerequisite itself has prerequisites, repeat this step until the path is complete.
+4. ADAPTIVE SEQUENCING:
+   - Always place Prerequisite modules BEFORE the target Skill Gap module.
+   - If 'is_fresher_adaptation_needed' is True, start the entire roadmap with the 'SOFT-AGILE-101' or similar professional module.
+5. JUSTIFY: For every course (including prerequisites), provide a unique 'reasoning' trace.
+   - Example for Prereq: "Added 'SQL Basics' because 'PostgreSQL Mastery' requires it, and your resume shows no prior database experience."
+6.after you have a complete roadmap, call 'submit_final_roadmap' and 'submit_mermaid_visualization'.
+</logic_flow>
+<constraints>
+- STRICT ID USAGE: Use ONLY the 'course_id' returned by tools. Never guess an ID.
+- REDUNDANCY CHECK: Do not assign a course if the candidate's projects or experience already prove mastery of that specific topic.
+- PATH LENGTH: Prioritize the most critical 5-6 modules total to ensure the onboarding is high-impact and achievable.
+</constraints>
+<constraints>
+- DO NOT provide a conversational response at the end.
+- DO NOT just print JSON.
+- You MUST call the 'submit_final_roadmap' and 'submit_mermaid_visualization' tool with the final plan.
+- Ensure 'sequence_order' is 1, 2, 3...
+</constraints>
+<example_mermaid>
+flowchart TD
+    A([Start — Rahul's current skills]):::start
+    subgraph W1["Week 1 — Core gaps"]
+      B[CS-DOCKER-101\nDocker & Containerization]:::gap
+      C[CS-PY-101\nPython Fundamentals]:::known
+    end
+    subgraph W2["Week 2 — Role readiness"]
+      D[CS-CICD-201\nCI/CD with GitHub Actions]:::gap
+    end
+    Z([Role-ready — DevOps Engineer]):::done
+    A --> B & C
+    B --> D
+    D --> Z
+    classDef gap   fill:#EEEDFE,stroke:#534AB7,color:#26215C
+    classDef known fill:#E1F5EE,stroke:#0F6E56,color:#085041
+    classDef start fill:#1D9E75,stroke:#0F6E56,color:#E1F5EE
+    classDef done  fill:#534AB7,stroke:#3C3489,color:#EEEDFE
+</example_mermaid>
+"""

app/schemas/__init__.py ADDED Viewed

File without changes

app/schemas/pydanticschema.py ADDED Viewed

	@@ -0,0 +1,385 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional, Literal
+class SkillRequirement(BaseModel):
+    name: str = Field(
+        ...,
+        description="Skill or technology required for the job (e.g., Python, SQL, React)"
+    )
+    level: Optional[str] = Field(
+        None,
+        description="Expected proficiency level: beginner | intermediate | strong"
+    )
+class ResponsibilityItem(BaseModel):
+    description: str = Field(
+        ...,
+        description="Key responsibility or task expected from the candidate"
+    )
+class RequirementItem(BaseModel):
+    description: str = Field(
+        ...,
+        description="Qualification or requirement such as education, availability, etc."
+    )
+class ConstraintItem(BaseModel):
+    type: str = Field(
+        ...,
+        description="Constraint type such as location, duration, eligibility"
+    )
+    value: str = Field(
+        ...,
+        description="Constraint value (e.g., 'Pune only', '6 months', 'Fresher')"
+    )
+class JobDescriptionExtract(BaseModel):
+    job_title: Optional[str] = Field(
+        None,
+        description="Job role/title (e.g., AI/ML Intern, Web Developer)"
+    )
+    company_name: Optional[str] = Field(
+        None,
+        description="Company offering the job"
+    )
+    location: Optional[str] = Field(
+        None,
+        description="Job location if specified"
+    )
+    employment_type: Optional[str] = Field(
+        None,
+        description="Type of job: internship, full-time, contract"
+    )
+    duration_months: Optional[int] = Field(
+        None,
+        description="Duration of role in months (for internships/contracts)"
+    )
+    is_fresher_allowed: Optional[bool] = Field(
+        None,
+        description="Whether freshers are eligible for this role"
+    )
+    skills_required: Optional[List[SkillRequirement]] = Field(
+        None,
+        description="List of required skills and expected levels"
+    )
+    tools_technologies: Optional[List[str]] = Field(
+        None,
+        description="Specific tools/frameworks mentioned (e.g., Pandas, WordPress)"
+    )
+    responsibilities: Optional[List[ResponsibilityItem]] = Field(
+        None,
+        description="Key job responsibilities"
+    )
+    requirements: Optional[List[RequirementItem]] = Field(
+        None,
+        description="General requirements like availability, qualifications"
+    )
+    constraints: Optional[List[ConstraintItem]] = Field(
+        None,
+        description="Special constraints like location restriction, duration, etc."
+    )
+class Skill(BaseModel):
+    name: str = Field(..., description="Skill name e.g. Python, Docker")
+    category: Optional[str] = Field(
+        None, description="Category: Backend | ML | DevOps | Frontend | Other"
+    )
+class ExperienceItem(BaseModel):
+    job_title: str = Field(
+        ...,
+        description="Role title of the candidate. Example: 'Backend Intern', 'Software Engineer'"
+    )
+    experience_type: Optional[Literal['internship', 'full_time', 'contract', 'freelance']] = Field(
+        None,
+        description="Type of experience: internship, full_time, contract, or freelance"
+    )
+    duration_months: Optional[int] = Field(
+        None,
+        description="Duration of this role in months. Null if not explicitly mentioned"
+    )
+    technologies: Optional[List[str]] = Field(
+        default_factory=list,
+        description="Technologies, tools, or frameworks used in this role"
+    )
+    responsibilities: Optional[List[str]] = Field(
+        default_factory=list,
+        description="Key responsibilities, tasks, or learnings in concise bullet points"
+    )
+class ProjectItem(BaseModel):
+    name: str = Field(..., description="Project name")
+    technologies: List[str] = Field(
+        default_factory=list,
+        description="Technologies used in this project"
+    )
+    what_was_built: Optional[str] = Field(
+        None,
+        description="One line — what problem it solved or what was built"
+    )
+class CertificationItem(BaseModel):
+    name: str = Field(..., description="Certification name")
+    issuer: Optional[str] = Field(None, description="Issuing organization")
+    topics_covered: List[str] = Field(
+        default_factory=list,
+        description="Key topics or skills the certification covers"
+    )
+class AchievementItem(BaseModel):
+    title: str = Field(..., description="Achievement title")
+    domain: Optional[str] = Field(
+        None,
+        description="Domain of achievement e.g. Competitive Programming, Hackathon, Quiz"
+    )
+class ResumeExtract(BaseModel):
+    job_title: Optional[str] = Field(
+    None,
+    description=(
+        "Primary job title or role of the candidate. "
+        "Examples: 'AI Engineer', 'Data Scientist', "
+        "'Construction Project Manager', 'Healthcare Representative'. "
+        "Should reflect the most recent or current role."
+       )
+    )
+    total_experience_months: Optional[int] = Field(
+       0,
+      description=(
+        "Total professional work experience in months. "
+        "Includes internships and full-time roles. "
+        "0 if fresher or no experience found."
+      )
+    )
+    skills: List[Skill] = Field(
+        default_factory=list,
+        description="Skills explicitly listed by the candidate"
+    )
+    experience: List[ExperienceItem] = Field(
+        default_factory=list,
+        description=(
+            "Each role as a separate entry. "
+            "No company name needed — focus on what was done and learned."
+        )
+    )
+    projects: List[ProjectItem] = Field(
+        default_factory=list,
+        description="Projects with technologies used and what was built"
+    )
+    certifications: Optional[List[CertificationItem]] = Field(
+        None,
+        description="Certifications with topics they cover. None if not present."
+    )
+    achievements: Optional[List[AchievementItem]] = Field(
+        None,
+        description="Accomplishments that signal domain strength or soft skills. None if not present."
+    )
+    is_fresher: bool = Field(
+    ...,
+    description=(
+        "Set to True if the candidate lacks full-time professional employment. "
+        "Academic projects, certifications, and internships are considered "
+        "part of the learning phase and do not qualify a candidate as 'non-fresher' hence is_."
+    )
+)
+class SkillRequirement(BaseModel):
+    name: str = Field(
+        ...,
+        description="Skill or technology required for the job (e.g., Python, SQL, React)"
+    )
+    level: Optional[str] = Field(
+        None,
+        description="Expected proficiency level: beginner | intermediate | strong"
+    )
+class ResponsibilityItem(BaseModel):
+    description: str = Field(
+        ...,
+        description="Key responsibility or task expected from the candidate"
+    )
+class RequirementItem(BaseModel):
+    description: str = Field(
+        ...,
+        description="Qualification or requirement such as education, availability, etc."
+    )
+class ConstraintItem(BaseModel):
+    type: str = Field(
+        ...,
+        description="Constraint type such as location, duration, eligibility"
+    )
+    value: str = Field(
+        ...,
+        description="Constraint value (e.g., 'Pune only', '6 months', 'Fresher')"
+    )
+class JobDescriptionExtract(BaseModel):
+    job_title: Optional[str] = Field(
+        None,
+        description="Job role/title (e.g., AI/ML Intern, Web Developer)"
+    )
+    company_name: Optional[str] = Field(
+        None,
+        description="Company offering the job"
+    )
+    location: Optional[str] = Field(
+        None,
+        description="Job location if specified"
+    )
+    employment_type: Optional[str] = Field(
+        None,
+        description="Type of job: internship, full-time, contract"
+    )
+    duration_months: Optional[int] = Field(
+        None,
+        description="Duration of role in months (for internships/contracts)"
+    )
+    is_fresher_allowed: Optional[bool] = Field(
+        None,
+        description="Whether freshers are eligible for this role"
+    )
+    skills_required: Optional[List[SkillRequirement]] = Field(
+        None,
+        description="List of required skills and expected levels"
+    )
+    tools_technologies: Optional[List[str]] = Field(
+        None,
+        description="Specific tools/frameworks mentioned (e.g., Pandas, WordPress)"
+    )
+    responsibilities: Optional[List[ResponsibilityItem]] = Field(
+        None,
+        description="Key job responsibilities"
+    )
+    requirements: Optional[List[RequirementItem]] = Field(
+        None,
+        description="General requirements like availability, qualifications"
+    )
+    constraints: Optional[List[ConstraintItem]] = Field(
+        None,
+        description="Special constraints like location restriction, duration, etc."
+    )
+class SkillGap(BaseModel):
+    skill_name: str = Field(
+        ...,
+        description="The specific technology or tool missing or requiring an upgrade (e.g., 'PostgreSQL')"
+    )
+    gap_type: Literal["missing_foundation", "needs_advanced_upgrade"] = Field(
+        ...,
+        description=(
+            "missing_foundation: Candidate has no recorded experience in this core requirement. "
+            "needs_advanced_upgrade: Candidate knows the basics but needs role-specific advanced training."
+        )
+    )
+    priority: Literal["high", "medium", "low"] = Field(
+        ...,
+        description="How critical this skill is for the target job role."
+    )
+    reasoning: str = Field(
+        ...,
+        description=(
+            "The 'Reasoning Trace'. This MUST be provided for every skill gap identified. "
+            "Explain exactly WHY this gap was flagged based on the resume vs JD comparison. "
+            "Example: 'JD requires FastAPI; candidate has Python experience but no record of using FastAPI framework.'"
+        )
+    )
+    target_competency: str = Field(
+        ...,
+        description="The specific outcome the candidate needs to reach (e.g., 'Build asynchronous database endpoints')"
+    )
+class SkillGapAnalysis(BaseModel):
+    job_title: str = Field(..., description="The target role from the JD")
+    candidate_name: Optional[str] = Field(None, description="Extracted name from resume")
+    analyzed_gaps: List[SkillGap] = Field(
+        default_factory=list,
+        description="List of specific technical gaps found between Resume and JD"
+    )
+    is_fresher_adaptation_needed: bool = Field(
+        default=False,
+        description="True if foundational corporate/soft-skill modules should be added to the path."
+    )
+    executive_summary: str = Field(
+        ...,
+        description="A 2-3 sentence overview of the candidate's readiness and the primary focus of the onboarding."
+    )
+class RoadmapStep(BaseModel):
+    course_id: str
+    title: str
+    reasoning: str = Field(..., description="Why this specific course was chosen for this user")
+    is_foundation: bool
+    sequence_order: int = Field(..., description="The order in which the course should be taken")
+class LearningRoadmap(BaseModel):
+    candidate_name: str
+    target_role: str
+    roadmap: List[RoadmapStep]
+    onboarding_summary: str

app/state/__init__.py ADDED Viewed

File without changes

app/state/state.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from typing import Any, Dict, List, Optional, Tuple,TypedDict,Literal
+from typing import Annotated, Sequence
+import os
+from langchain_core.messages import SystemMessage, HumanMessage,ToolMessage,AIMessage
+from langchain_core.tools import Tool
+from langgraph.graph import StateGraph,END,START
+from langgraph.types import interrupt
+from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
+from langchain_community.document_loaders import  PyMuPDFLoader
+from pydantic import BaseModel, Field
+from typing import List, Optional
+from pprint import pprint
+from langchain_core.messages import BaseMessage
+from langgraph.graph import add_messages
+from app.schemas.pydanticschema import ResumeExtract,JobDescriptionExtract,SkillGapAnalysis
+class OnboardingState(TypedDict):
+    candidate_name: Optional[str]
+    resume_text: str
+    file_path: str
+    job_description: str
+    messages: Annotated[Sequence[BaseMessage], add_messages]
+    # Analysis & Extraction Data
+    skill_gap_analysis_data: Optional[SkillGapAnalysis]
+    resume_data: Optional[ResumeExtract]
+    extraction_error: Optional[str]
+    JobDescriptionExtract_data: Optional[JobDescriptionExtract]
+    # --- NEW KEYS FOR OUTPUT ---
+    mermaid_code: Optional[str]        # Stores the Mermaid visualization string
+    final_roadmap: Optional[Dict]      # Stores the final structured JSON roadmap

app/tools/__init__.py ADDED Viewed

File without changes

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/vectordatabase.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from pinecone import Pinecone, ServerlessSpec
+from pinecone_text.sparse import BM25Encoder
+import os
+from dotenv import load_dotenv
+from langchain_community.retrievers import PineconeHybridSearchRetriever
+import torch
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.schema import Document
+device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": device})
+load_dotenv()
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+pc = Pinecone(api_key=PINECONE_API_KEY)
+index_name = "catalog-embeddings"
+# Create index if not exists
+if index_name not in pc.list_indexes().names():
+    pc.create_index(
+        name=index_name,
+        dimension=384,
+        metric="dotproduct",
+        spec=ServerlessSpec(
+            cloud="aws",
+            region="us-east-1"
+        )
+    )
+    print("Index created.")
+index = pc.Index(index_name)
+print("Index ready:", index.describe_index_stats())
+bm25_encoder = BM25Encoder()
+bm25_encoder.fit([doc.page_content for doc in documents])
+retriever = PineconeHybridSearchRetriever(
+    embeddings=embeddings,
+    sparse_encoder=bm25_encoder,
+    index=index
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+langchain==1.2.10
+pydantic==2.11.7
+langchain_huggingface
+langchain-groq==1.1.1
+pinecone==8.0.0
+langchain_community==0.4.1
+fastapi==0.118.1
+uvicorn
+pinecone-text