diff --git a/.gitignore b/.gitignore index cb7e5a3e1d1982498148606f51d3e50dfdb582df..606d7417e97429fb62d2690f6bcb14cc036a6fba 100644 --- a/.gitignore +++ b/.gitignore @@ -67,4 +67,7 @@ src/database/cvs/tests/*.txt .lgcache/ .langgraph_api/ -.idea/ \ No newline at end of file +.idea/ + +# any .wav files +*.wav \ No newline at end of file diff --git a/README.md b/README.md index 2c655bb16410575dec4c3991a598118d0ba5d33c..63a820a01046ca1c0637dc721d8228f9afe40f44 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,3 @@ ---- -license: mit -title: HR Assistant -sdk: docker -emoji: ๐ข -colorFrom: green -colorTo: green -tags: - - mcp-in-action-track-enterprise ---- - # ***`Recruitment Agent`***
@@ -22,16 +11,14 @@ tags:
"
+ except Exception as e:
+ result = f"Error during execution: {repr(e)}"
+ new_keys = set(_locals.keys()) - original_keys
+ new_vars = {key: _locals[key] for key in new_keys}
+ return result, new_vars
+
+ @staticmethod
+ def _filter_serializable(d: dict[str, Any]) -> dict[str, Any]:
+ """Keep only JSON/msgpack-serializable values (basic Python types).
+ """
+ serializable_types = (
+ str, int, float, bool, list, dict, type(None)
+ )
+ return {
+ k: v for k, v in d.items() if isinstance(v, serializable_types)
+ }
+
+
+ def _create_codeact(
+ self,
+ model: BaseChatModel,
+ tools: Sequence[Union[StructuredTool, Callable]],
+ eval_fn: Union[EvalFunction, EvalCoroutine],
+ *,
+ state_schema: StateSchemaType = CodeActState,
+ ) -> StateGraph:
+ """Create a LangGraph state graph for the CodeAct agent.
+ """
+ tools = [
+ t if isinstance(t, StructuredTool) else create_tool(t)
+ for t in tools
+ ]
+ self.tools_context = {tool.name: tool.func for tool in tools}
+
+ def call_model_stream(state: StateSchema):
+ messages = [{"role": "system", "content": self.prompt}] + state["messages"]
+
+ # Accumulate into one combined chunk
+ accumulated: AIMessageChunk | None = None
+
+ # stream partial tokens as AIMessagesChunks wioth .content = "Hel",
+ for delta in self.model.stream(messages):
+ if accumulated is None:
+ accumulated = delta
+ else:
+ accumulated = accumulated + delta # merge chunks
+
+ # yield partial update immediately (for streaming UI)
+ yield Command(update={"messages": [delta], "script": None})
+
+ # after streaming completes
+ if accumulated is None:
+ yield Command(update={"messages": [], "script": None})
+ return # nothing came back
+
+ # Convert merged chunks into a final message
+ full_text = accumulated.content or ""
+
+ # Check for code blocks
+ code = self._extract_and_combine_codeblocks(full_text)
+
+ if code:
+ # Create a fake tool call entry
+ tool_call_id = "sandbox"
+ fake_tool_call = {
+ "id": tool_call_id,
+ "type": "function",
+ "function": {
+ "name": "sandbox",
+ "arguments": code
+ }
+ }
+ # Patch the assistant message with tool_calls
+ accumulated.additional_kwargs = {"tool_calls": [fake_tool_call]}
+
+ # Pass both the patched assistant message and code to sandbox
+ yield Command(
+ goto="sandbox",
+ update={
+ "messages": [accumulated],
+ "script": code
+ }
+ )
+ else:
+ yield Command(
+ update={
+ "messages": [accumulated],
+ "script": None
+ }
+ )
+
+
+ if inspect.iscoroutinefunction(eval_fn):
+
+ async def sandbox(state: StateSchema):
+ """Run the code in the sandbox and return a proper OpenAI tool message.
+ """
+ existing_context = state.get("context", {})
+
+ # Combine persistent context with runtime-only tools
+ exec_context = {**existing_context, **self.tools_context}
+
+ # Get tool_call_id for traceability
+ prev_msgs = state.get("messages", [])
+ tool_call_id = "sandbox"
+ for msg in reversed(prev_msgs):
+ if hasattr(msg, "additional_kwargs") and msg.additional_kwargs.get("tool_calls"):
+ tool_call_id = msg.additional_kwargs["tool_calls"][0]["id"]
+ break
+
+ # Execute user code
+ output, new_vars = await eval_fn(state["script"], exec_context)
+
+ # Only persist serializable data
+ serializable_new_vars = self._filter_serializable(new_vars)
+ new_context = {**existing_context, **serializable_new_vars}
+
+ # Format output properly
+ content_str = (
+ f"Sandbox result of your executed code:\n{json.dumps(output, default=str)}"
+ if not isinstance(output, str)
+ else f"Sandbox result of your executed code:\n{output}"
+ )
+
+ # Return OpenAI-compliant tool result
+ return {
+ "messages": [
+ {
+ "role": "tool",
+ "tool_call_id": tool_call_id,
+ "name": "sandbox",
+ "content": content_str
+ }
+ ],
+ "context": new_context,
+ }
+
+
+ else:
+ def sandbox(state: StateSchema):
+ """Run the code in the sandbox and return a proper OpenAI tool message.
+ """
+ existing_context = state.get("context", {})
+
+ # Combine persistent context with runtime-only tools
+ exec_context = {**existing_context, **self.tools_context}
+
+ # Get tool_call_id for traceability
+ prev_msgs = state.get("messages", [])
+ tool_call_id = "sandbox"
+ for msg in reversed(prev_msgs):
+ if hasattr(msg, "additional_kwargs") and msg.additional_kwargs.get("tool_calls"):
+ tool_call_id = msg.additional_kwargs["tool_calls"][0]["id"]
+ break
+
+ # Execute user code
+ output, new_vars = eval_fn(state["script"], exec_context)
+
+ # Only persist serializable data
+ serializable_new_vars = self._filter_serializable(new_vars)
+ new_context = {**existing_context, **serializable_new_vars}
+
+ # Format output properly
+ content_str = ( # NOTE: before "json.dumps(output)"
+ f"Sandbox result of your executed code:\n{json.dumps(output, default=str)}"
+ if not isinstance(output, str)
+ else f"Sandbox result of your executed code:\n{output}"
+ )
+
+ # Return OpenAI-compliant tool result
+ return {
+ "messages": [
+ {
+ "role": "tool",
+ "tool_call_id": tool_call_id,
+ "name": "sandbox",
+ "content": content_str,
+ # Keep as string if already string else JSON serialize
+ }
+ ],
+ "context": new_context,
+ }
+
+ # --- Build the state graph ---
+ agent = StateGraph(state_schema)
+ agent.add_node(call_model_stream, destinations=(END, "sandbox"))
+ agent.add_node(sandbox)
+ agent.add_edge(START, "call_model_stream")
+ agent.add_edge("sandbox", "call_model_stream")
+ return agent
+
+
+ def stream(
+ self,
+ messages: list[dict],
+ thread_id: int = 1
+ ) -> Generator[
+ TokenStream,
+ None,
+ None
+ ]:
+ """
+ Generator yielding agent outputs during execution.
+
+ Yields
+ ------
+ tuple[str, Any]
+ - "messages": list of chat message objects (e.g. AIMessage)
+ - "values": dict of current agent state (messages, script, context)
+
+ Example
+ -------
+ messages [AIMessage(content="```python\nresult = 3*7+5\nprint(result)\n```")]
+ values {"messages": [...], "script": "result = 3*7+5\nprint(result)", "context": {}}
+ messages [AIMessage(content="26")]
+ values {"messages": [...], "script": None, "context": {"result": 26}}
+ """
+
+ config = {
+ "configurable": {
+ "thread_id": thread_id
+ }
+ }
+ for typ, chunk in self.compiled_agent.stream(
+ {"messages": messages},
+ stream_mode=["values", "messages"],
+ config=config,
+ ):
+ yield TokenStream(type=typ, data=chunk)
+
+ #------- BEFORE DB AGENT EXECUTOR -------#
+ #def generate(
+ # self,
+ # messages: list[dict],
+ # thread_id: int = 1
+ #) -> dict[str, Any]:
+ # """
+ # Run the agent to completion and return final state.#
+
+ # Returns
+ # -------
+ # dict
+ # Final agent state containing messages, script, context.
+ # """
+ # config = {
+ # "configurable": {
+ # "thread_id": thread_id
+ # }
+ # }
+ # final_state = self.compiled_agent.generate(
+ # {"messages": messages},
+ # config=config,
+ # )
+ # return final_state
+ #------- BEFORE DB AGENT EXECUTOR -------#
+ def generate(
+ self,
+ messages: list[dict],
+ thread_id: int = 1,
+ context: Optional[dict[str, Any]] = None,
+ ) -> dict[str, Any]:
+ """
+ *** Test method for db executor ***
+ """
+ config = {
+ "configurable": {"thread_id": thread_id}
+ }
+ state = {
+ "messages": messages, "context": context or {}
+ }
+ return self.compiled_agent.invoke( #TODO: note changed from generate to invoke, hope it works
+ state, config=config
+ )
+
+
+
+
+if __name__ == "__main__":
+ """
+ Run the CodeActAgent in different modes:
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ - python -m agent.core.codeact --mode chat
+ - python -m agent.core.codeact --mode debug
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ """
+ import argparse
+ import json
+ from rich.console import Console
+
+ # Validate environment (api key) before doing *anything* else
+ OpenAIApiKey.validate_environment()
+
+ # --- Parse args ---
+ parser = argparse.ArgumentParser(description="Run CodeActAgent in different modes")
+ parser.add_argument(
+ "--mode",
+ choices=["chat", "debug"],
+ default="chat",
+ help="Mode: 'chat' for normal conversation, 'debug' to also show state values."
+ )
+ args = parser.parse_args()
+
+ # --- Instantiate agent ---
+ agent = CodeActAgent(
+ model_name="gpt-4o",
+ model_provider="openai",
+ tools=[],
+ eval_fn=CodeActAgent.default_eval, # built-in evaluator
+ system_prompt="agent/prompts/local_archive/original.txt",
+ bind_tools=False,
+ memory=True
+ )
+ #~~~~~~~~~~~~~~~~~~~~~~~~~~#
+ # --- Conversation loop ---#
+ #~~~~~~~~~~~~~~~~~~~~~~~~~~#
+ # --- Rich console setup ---
+ console = Console(width=100, soft_wrap=False)
+
+ while True:
+ user_query = input("\n๐ USER:\nโบโบโบ ")
+ if user_query.lower() == "exit":
+ break
+
+ messages = [{"role": "user", "content": user_query}]
+
+ # --- Dynamic assistant header (chat only) ---
+ if args.mode == "chat":
+ console.print("\n๐ง [bold magenta]Assistant[/]:\nโบโบโบ ", end="")
+
+ # --- Stream agent responses ---
+ for typ, chunk in agent.stream(messages):
+ if args.mode == "chat" and typ == "messages":
+ print(chunk[0].content, end="", flush=True)
+
+ elif args.mode == "debug":
+ if typ == "values":
+ # Print only the nicely formatted message + optional context
+ pretty_print_state(chunk, show_context=False)
+
+ print("\n")
+
diff --git a/src/backend/agents/db_executor/codeact/prompts/local_archive/original.txt b/src/backend/agents/db_executor/codeact/prompts/local_archive/original.txt
new file mode 100644
index 0000000000000000000000000000000000000000..713893d69be7d0e95c89d351ec2cad6b59319532
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/prompts/local_archive/original.txt
@@ -0,0 +1,18 @@
+
+You are a helpful assistant. You are encouraged to generate Python code for calculations.
+
+You will be given a task to perform. You should output either
+- a Python code snippet that provides the solution to the task, or a step towards the solution. Any output you want
+to extract from the code should be printed to the console. Code should be output in a fenced code block.
+- text to be shown directly to the user, if you want to ask for more information or provide the final answer.
+
+In addition to the Python Standard Library, you can use the following functions:
+
+{tools}
+
+Variables defined at the top level of previous code snippets can be referenced in your code.
+
+When you include a code block, put a blank line after the closing triple backticks
+before any further text.
+
+Reminder: use Python code snippets to call tools.
\ No newline at end of file
diff --git a/src/backend/agents/db_executor/codeact/prompts/local_archive/test.txt b/src/backend/agents/db_executor/codeact/prompts/local_archive/test.txt
new file mode 100644
index 0000000000000000000000000000000000000000..860542653bdedc98c176c84142a6564275799f15
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/prompts/local_archive/test.txt
@@ -0,0 +1,25 @@
+You are a helpful assistant that can solve tasks using Python code and a set of predefined tools.
+
+=== RULES ===
+1. CODE BLOCKS:
+ - Always use triple backticks: ```python ... ```
+ - Never include natural language inside code blocks.
+ - Comments (#) are allowed but should be minimal.
+before any further text.
+
+2. OUTPUT EXPLANATION:
+ - After each code block, provide a brief natural language explanation.
+ - Use code outputs in your response.
+ - Keep explanations separate from code.
+
+Note:
+When you include a code block, put a blank line after the closing triple backticks
+before any further text.
+
+=== VALID EXAMPLE ===
+```python
+# Calculate the product
+result = multiply(15, 23)
+print(result)
+```
+The calculation shows that 15 multiplied by 23 equals 345.
\ No newline at end of file
diff --git a/src/backend/agents/db_executor/codeact/prompts/prompt_layer.py b/src/backend/agents/db_executor/codeact/prompts/prompt_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3a866754ec68d3a056e9456be066e3533e51bcb
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/prompts/prompt_layer.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""
+PromptLayer Integration for Prompt Management
+==============================================
+
+This module provides a centralized way to manage prompts using PromptLayer platform.
+Allows for versioned, labeled prompts that can be easily updated without code changes.
+"""
+
+import promptlayer
+from promptlayer import PromptLayer
+from dotenv import load_dotenv
+import os
+from typing import Dict, Any, Optional
+from functools import lru_cache
+
+load_dotenv()
+
+
+class PromptManager:
+ """
+ Centralized prompt management using PromptLayer platform.
+ link:
+ - https://www.promptlayer.com
+
+ Features:
+ - Version control for prompts
+ - Environment-based prompt labels (dev, staging, production)
+ - Caching for performance
+ - Fallback to local files if PromptLayer unavailable
+ """
+
+ def __init__(self, api_key: Optional[str] = None, environment: str = "production"):
+ """
+ Initialize PromptManager.
+
+ Args:
+ api_key: PromptLayer API key (defaults to PROMPTLAYER_API_KEY env var)
+ environment: Environment label for prompts (dev, staging, production)
+ """
+ self.api_key = api_key or os.getenv("PROMPTLAYER_API_KEY")
+ self.environment = environment
+ self.client = None
+
+ # Initialize client if API key is available
+ if self.api_key:
+ try:
+ self.client = PromptLayer(api_key=self.api_key)
+ print(f"โ
PromptLayer connected (environment: {environment})")
+
+ except Exception as e:
+ print(f"โ ๏ธ PromptLayer connection failed: {e}")
+ self.client = None
+ else:
+ print("โ ๏ธ No PROMPTLAYER_API_KEY found, using local fallback")
+
+ @lru_cache(maxsize=128)
+ def get_prompt(
+ self,
+ template_name: str,
+ version: Optional[int] = None,
+ label: Optional[str] = None,
+ fallback_path: Optional[str] = None
+ ) -> str:
+ """
+ Get a prompt from PromptLayer with fallback to local file.
+
+ Args:
+ template_name: Name of the prompt template
+ version: Specific version number (defaults to latest)
+ label: Environment label (defaults to instance environment)
+ fallback_path: Local file path if PromptLayer unavailable
+
+ Returns:
+ Prompt content as string
+
+ Raises:
+ ValueError: If prompt cannot be found and no fallback provided
+ """
+ # Use provided label or instance default
+ label = label or self.environment
+
+ # Try PromptLayer first
+ if self.client:
+ try:
+ template_config = {
+ "label": label
+ }
+ if version:
+ template_config["version"] = version
+
+ prompttemplate = self.client.templates.get(
+ template_name,
+ template_config
+ )
+ # Extract prompt content from response
+ prompt_content = prompttemplate["llm_kwargs"]["messages"][0]["content"]
+ print(f"๐ Loaded prompt '{template_name}' from PromptLayer (v{prompttemplate.get('version', 'latest')}, {label})")
+ return prompt_content
+
+ except Exception as e:
+ print(f"โ ๏ธ PromptLayer failed: {e}, trying fallback...")
+ # Fall through to fallback instead of raising
+
+ # Fallback to local file
+ if fallback_path:
+ try:
+ with open(fallback_path, 'r') as f:
+ content = f.read()
+ print(f"๐ Loaded prompt '{template_name}' from local file: {fallback_path}")
+ return content
+ except Exception as e:
+ raise ValueError(
+ f"โ Failed to load fallback file '{fallback_path}': {e}"
+ )
+
+ # Only raise if both PromptLayer AND fallback fail
+ raise ValueError(
+ f"Could not load prompt '{template_name}' from any source"
+ )
+
+
+ def list_available_prompts(self) -> Dict[str, Any]:
+ """
+ List all available prompts from PromptLayer.
+
+ Returns:
+ Dictionary of available prompts with metadata
+ """
+ if not self.client:
+ return {"error": "PromptLayer client not available"}
+
+ try:
+ # This would depend on PromptLayer's API for listing templates
+ # Placeholder implementation
+ return {
+ "message": "PromptLayer template listing not implemented in this version",
+ "available_methods": [
+ "get_judge_prompt(simple=True/False)",
+ "get_agent_prompt(version=int)",
+ "get_prompt(template_name, version, label, fallback_path)"
+ ]
+ }
+ except Exception as e:
+ return {"error": f"Failed to list prompts: {e}"}
+
+ def clear_cache(self):
+ """Clear the prompt cache."""
+ self.get_prompt.cache_clear()
+ print("๐๏ธ Prompt cache cleared")
+
+ def set_environment(self, environment: str):
+ """
+ Change the environment label for subsequent prompt requests.
+
+ Args:
+ environment: New environment (dev, staging, production)
+ """
+ self.environment = environment
+ self.clear_cache() # Clear cache since environment changed
+ print(f"๐ Environment changed to: {environment}")
+
diff --git a/src/backend/agents/db_executor/codeact/schemas/__init__.py b/src/backend/agents/db_executor/codeact/schemas/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..19b98f8e8e1e260c3c80be6b16117af94b60e689
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/schemas/__init__.py
@@ -0,0 +1,10 @@
+"""Init file for pydantic schemas.
+"""
+
+from .openai_key import OpenAIApiKey
+from .stream import TokenStream
+
+__all__ = [
+ "OpenAIApiKey",
+ "TokenStream",
+]
diff --git a/src/backend/agents/db_executor/codeact/schemas/openai_key.py b/src/backend/agents/db_executor/codeact/schemas/openai_key.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8e89528b9103b4926680349f9bd580b6bdf99e9
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/schemas/openai_key.py
@@ -0,0 +1,56 @@
+import os
+from pydantic import Field, ConfigDict, field_validator
+from pydantic_settings import BaseSettings
+from pathlib import Path
+from dotenv import load_dotenv
+from pydantic import ValidationError
+import sys
+
+# Load environment variables
+load_dotenv()
+
+
+class OpenAIApiKey(BaseSettings):
+ """Schema for validating and loading the OpenAI API key configuration.
+ """
+ model_config = ConfigDict(
+ title="OpenAI API Key Schema",
+ description="Validates and loads the OpenAI API key from environment variables.",
+ )
+ api_key: str = Field(
+ ..., # >>> required field
+ title="OpenAI API Key",
+ description="API key for OpenAI authentication.",
+ examples=["sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"],
+ alias="OPENAI_API_KEY",
+ )
+
+ @field_validator("api_key")
+ @classmethod
+ def validate_openai_api_key(cls, v: str) -> str:
+ """Validate that the API key is present and has the correct format.
+ """
+ if not v:
+ raise ValueError(
+ "๐ฅ Missing `OPENAI_API_KEY` environment variable."
+ )
+ if not v.startswith("sk-"):
+ raise ValueError(
+ "๐ฅ Invalid `OPENAI_API_KEY` โ must start with 'sk-'."
+ )
+ return v
+
+ @classmethod
+ def validate_environment(cls) -> "OpenAIApiKey":
+ """
+ Load .env from the root directory
+ and validate that the API key is present and valid.
+ """
+ try:
+ # Pydantic auto-loads .env and validates
+ config = cls()
+ os.environ["OPENAI_API_KEY"] = config.api_key # Set for runtime access
+ return config
+ except ValidationError as e:
+ print(f"๐ฅ OpenAI API key misconfiguration:\n{e}")
+ sys.exit(1)
diff --git a/src/backend/agents/db_executor/codeact/schemas/stream.py b/src/backend/agents/db_executor/codeact/schemas/stream.py
new file mode 100644
index 0000000000000000000000000000000000000000..4634438b163e0004c888e64a23de803e4e3ea012
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/schemas/stream.py
@@ -0,0 +1,8 @@
+from typing import NamedTuple, Literal, Union, Any
+from langchain_core.messages import AIMessage
+
+class TokenStream(NamedTuple):
+ """Represents a single streamed update emitted by the agent.
+ """
+ type: Literal["messages", "values"]
+ data: Union[list[AIMessage], dict[str, Any]]
\ No newline at end of file
diff --git a/src/backend/agents/db_executor/codeact/states/state.py b/src/backend/agents/db_executor/codeact/states/state.py
new file mode 100644
index 0000000000000000000000000000000000000000..11322aec1443a7e85ee73882924fafacbf49f7a3
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/states/state.py
@@ -0,0 +1,10 @@
+from langgraph.graph import END, START, MessagesState
+from typing import Optional, Any
+
+class CodeActState(MessagesState):
+ """State for CodeAct agent."""
+
+ script: Optional[str]
+ """The Python code script to be executed."""
+ context: dict[str, Any]
+ """Dictionary containing the execution context with available tools and variables."""
\ No newline at end of file
diff --git a/src/backend/agents/db_executor/codeact/tools/__init__.py b/src/backend/agents/db_executor/codeact/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/agents/db_executor/codeact/tools/tools.py b/src/backend/agents/db_executor/codeact/tools/tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..bafc2c7ef699c9128709dc2f3c37338750d808d1
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/tools/tools.py
@@ -0,0 +1,53 @@
+import inspect
+from langchain_core.tools import StructuredTool
+from typing import Optional
+from pathlib import Path
+
+# Example tools
+def add(a: float, b: float) -> float:
+ """Add two numbers together."""
+ return a + b
+
+def multiply(a: float, b: float) -> float:
+ """Multiply two numbers together."""
+ return a * b
+
+def divide(a: float, b: float) -> float:
+ """Divide two numbers."""
+ return a / b
+
+def subtract(a: float, b: float) -> float:
+ """Subtract two numbers."""
+ return a - b
+
+# Prompt creation
+def create_default_prompt(
+ tools: list,
+ system_prompt: Optional[str] = None,
+ base_prompt: str = "original.txt",
+) -> str:
+ template_path = Path(__file__).parent.parent / "prompts" / base_prompt
+ template = template_path.read_text()
+
+ tool_strings = []
+ for t in tools:
+ func = t.func if isinstance(t, StructuredTool) else t
+ sig = inspect.signature(func)
+ doc = (func.__doc__ or "").strip()
+ tool_strings.append(
+ f"def {func.__name__}{sig}:\n \"\"\"{doc}\"\"\"\n ..."
+ )
+ tools_str = "\n\n".join(tool_strings)
+
+ prompt = template.replace("{tools}", tools_str)
+
+ if system_prompt:
+ prompt = f"{system_prompt}\n\n{prompt}"
+
+ return prompt
+
+
+
+if __name__ == "__main__":
+ tools = [multiply, divide, subtract]
+ print(create_default_prompt(tools, system_prompt="You are a coding agent."))
diff --git a/src/backend/agents/db_executor/codeact/utils/__init__.py b/src/backend/agents/db_executor/codeact/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..51ea01dbad8225be4033a4e0bb2b2417ec093510
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/utils/__init__.py
@@ -0,0 +1,5 @@
+"""Utility functions for the agent."""
+
+from .pretty_state import pretty_print_state
+
+__all__ = ["pretty_print_state"]
\ No newline at end of file
diff --git a/src/backend/agents/db_executor/codeact/utils/pretty_state.py b/src/backend/agents/db_executor/codeact/utils/pretty_state.py
new file mode 100644
index 0000000000000000000000000000000000000000..1157d59d967446aeaf52ca6bfb0421a9c49a3da3
--- /dev/null
+++ b/src/backend/agents/db_executor/codeact/utils/pretty_state.py
@@ -0,0 +1,73 @@
+import json
+from rich.console import Console
+from rich.syntax import Syntax
+from rich.panel import Panel
+from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
+
+console = Console(width=100, soft_wrap=False)
+
+_last_context_snapshot = None # used to suppress repeated context
+_last_message_ids = set() # track printed messages
+
+
+
+def serialize_message(msg) -> dict:
+ """Convert LangChain message objects into serializable dicts."""
+ if hasattr(msg, "dict"):
+ return msg.dict()
+ elif hasattr(msg, "__dict__"):
+ return {k: serialize_message(v) for k, v in msg.__dict__.items()}
+ elif isinstance(msg, list):
+ return [serialize_message(v) for v in msg]
+ elif isinstance(msg, dict):
+ return {k: serialize_message(v) for k, v in msg.items()}
+ else:
+ return msg
+
+
+def pretty_print_state(state: dict, show_context: bool = True) -> None:
+ """
+ Pretty-print the agent's state in a clean, color-coded way.
+
+ Parameters
+ ----------
+ state : dict
+ The LangGraph agent state chunk (from the stream).
+ show_context : bool, optional
+ Whether to display the context (default True).
+ If True, only shows context when it has changed since last call.
+ """
+ global _last_context_snapshot
+
+ # --- Display message chunks ---
+ for msg in state.get("messages", []):
+
+ msg_id = getattr(msg, "id", id(msg))
+ if msg_id in _last_message_ids:
+ continue # skip duplicates
+ _last_message_ids.add(msg_id)
+
+ msg_dict = serialize_message(msg)
+ msg_json = json.dumps(msg_dict, indent=2)
+
+ if isinstance(msg, HumanMessage):
+ color, title = "cyan", "๐ง HumanMessage"
+ elif isinstance(msg, ToolMessage):
+ color, title = "yellow", f"๐งฐ ToolMessage ({msg_dict.get('name','?')})"
+ elif isinstance(msg, AIMessage):
+ color, title = "magenta", "๐ค AIMessage"
+ else:
+ color, title = "white", "Other"
+
+ syntax = Syntax(msg_json, "json", theme="monokai", line_numbers=False)
+ console.print(Panel(syntax, title=title, border_style=color))
+
+ # --- Optional context view ---
+ #if show_context:
+ # context = state.get("context", {})
+ # if context and context != _last_context_snapshot:
+ # _last_context_snapshot = context.copy() # cache for next comparison
+
+ # context_json = json.dumps(context, indent=2, default=str)
+ # syntax = Syntax(context_json, "json", theme="monokai", line_numbers=False)
+ # console.print(Panel(syntax, title="๐ง Context (updated)", border_style="green"))
diff --git a/src/backend/agents/db_executor/db_executor.py b/src/backend/agents/db_executor/db_executor.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4c8b7aef328a566d6b60aa06a4391ae24365bcd
--- /dev/null
+++ b/src/backend/agents/db_executor/db_executor.py
@@ -0,0 +1,99 @@
+from .codeact.core.codeact import CodeActAgent
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import (
+ Candidate,
+ CVScreeningResult,
+ VoiceScreeningResult,
+ InterviewScheduling,
+ FinalDecision,
+)
+from src.backend.state.candidate import CandidateStatus, InterviewStatus, DecisionStatus
+from langchain_core.tools import tool
+from typing import Dict, Any
+from src.backend.database.candidates import evaluate_cv_screening_decision
+from src.backend.prompts import get_prompt
+
+
+SYSTEM_PROMPT = get_prompt(
+ template_name="DB_Executor",
+ local_prompt_path="db_executor/v2.txt",
+)
+
+
+@tool
+def db_executor(query: str) -> str:
+ """
+ Consumes a natural-language query as input which is being translated into
+ SQLAlchemy ORM code by the coding agent. Finally, the code is executed against
+ the database and the result is returned.
+
+ Args:
+ query (str): Natural-language database query.
+ Returns:
+ str: The natural language summary of the result or error.
+ """
+ # 1. Initialize DB session and ORM context
+ session = SessionLocal()
+ context = {
+ "session": session,
+ "Candidate": Candidate,
+ "CVScreeningResult": CVScreeningResult,
+ "VoiceScreeningResult": VoiceScreeningResult,
+ "InterviewScheduling": InterviewScheduling,
+ "FinalDecision": FinalDecision,
+ "CandidateStatus": CandidateStatus,
+ "InterviewStatus": InterviewStatus,
+ "DecisionStatus": DecisionStatus,
+ }
+
+ try:
+ # 2. Initialize CodeAct agent with system prompt
+ agent = CodeActAgent(
+ model_name="gpt-4o",
+ model_provider="openai",
+ tools=[evaluate_cv_screening_decision], # Passed as a tool
+ eval_fn=CodeActAgent.default_eval,
+ system_prompt=SYSTEM_PROMPT,
+ bind_tools=True, # Enable tool binding so agent sees signature
+ memory=False, # optional โ can enable if you want persistent thread context
+ )
+
+ # 3. Run natural-language query
+ messages = [{"role": "user", "content": query}]
+ final_state = agent.generate(messages, context=context)
+
+ # 4. Extract model output
+ # Return the final natural language response from the assistant
+ output_msg = final_state["messages"][-1].content if final_state.get("messages") else ""
+
+ return output_msg
+
+ except Exception as e:
+ import traceback
+ error_trace = traceback.format_exc()
+ print(f"\nโ Error in db_executor: {e}\n{error_trace}")
+
+ # Return a clear text error message
+ return f"The DB Executor encountered an internal error: {str(e)}"
+
+ finally:
+ session.close()
+
+
+
+if __name__ == "__main__":
+ from rich.console import Console
+ from rich.panel import Panel
+
+ console = Console()
+ query = "Fetch all candidates and their status."
+
+ console.rule("[bold magenta]DB Executor Test Run[/bold magenta]")
+ console.print(f"[cyan]Query:[/] {query}\n")
+
+ result = db_executor(query)
+
+ # ๐ง Show model result nicely
+ console.print(Panel.fit(result, title="๐ง Model Output", border_style="blue"))
+
+ console.rule("[bold green]End of Execution[/bold green]")
diff --git a/src/backend/agents/db_executor/info.md b/src/backend/agents/db_executor/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..64c815fdee312948d517ed3ca7d818398980d3d4
--- /dev/null
+++ b/src/backend/agents/db_executor/info.md
@@ -0,0 +1,22 @@
+This agent coding agent based `CodeAct`agent pattern, see:
+https://github.com/langchain-ai/langgraph-codeact
+
+
+Test as follows:
+
+>>> cd /Users/sebastianwefers/Desktop/projects/recruitment-agent
+
+>>> docker compose -f docker/docker-compose.yml up --build candidates_db_init
+
+
+# Make sure your OpenAI key is available to the process
+>>> export OPENAI_API_KEY=sk-... # or however you normally set it
+
+# Override host so the Python code connects to localhost, not 'db' and run "db_executor"
+>>> POSTGRES_HOST=localhost POSTGRES_PORT=5433 python -m src.agents.db_executor.db_executor
+
+
+# DEBUG attempt
+------------------------------------------------------------------------------------
+- works:
+POSTGRES_HOST=localhost POSTGRES_PORT=5433 python src/agents/db_executor/debug_db_connection.py
\ No newline at end of file
diff --git a/src/backend/agents/example/info.md b/src/backend/agents/example/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..2edd6f86c1ca02a83dce3a9d393a0c3da85b160d
--- /dev/null
+++ b/src/backend/agents/example/info.md
@@ -0,0 +1,66 @@
+### How to Run the LangGraph Reasoning Monitoring Demo Agent
+
+1. Make sure to have the follwijg installed
+```bash
+pip install -r requriements/dev.txt
+```
+
+2. Set TAVILY_API_KEY:
+- link: https://www.tavily.com
+
+3. Run the following from repo root:
+```bash
+export PYTHONPATH=./src
+langgraph dev
+```
+This loads the root-level `langgraph.json` and makes all agents available in LangGraph Studio.
+
+4 Open the Studio UI
+After the server starts, open:
+```bash
+https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024
+```
+**NOTE:** Open it in anything, but safari!
+
+Select the agent named react_agent (or whichever your config specifies).
+
+---
+
+### Demo Prompt to Use
+Paste the following into the Studio console:
+```txt
+First search for the current temperature in Fahrenheit in Cape Town, South Africa.
+Then convert that temperature to Celsius using the conversion tool.
+```
+
+***This triggers:***
+1. A Tavily search for the current Fahrenheit temperature
+2. A tool call to convert Fahrenheit โ Celsius
+3. Full ReAct reasoning + tool trace in the UI
+
+---
+
+### โ๏ธ Multiple Agents in langgraph.json
+You can expose multiple agents to LangGraph Studio by listing them under the graphs section of your root `langgraph.json`.
+
+Example:
+```json
+{
+ "dependencies": ["src"],
+ "graphs": {
+ "react_agent": "agents.example.react_agent:agent",
+ "cv_screener": "agents.cv_screening.screener:agent",
+ "supervisor": "agents.supervisor.supervisor:agent"
+ }
+}
+```
+Each entry maps:
+```bash
+"graph_name": "module.path:object_name"
+```
+
+Where:
+- `graph_name` โ appears in LangGraph Studio
+- `module.path` โ Python import path under `src/`
+- `object_name` โ the variable that contains the graph/agent
+This allows one project to host many agents simultaneously (e.g., supervisor, tools agent, CV-screening agent, etc.).
diff --git a/src/backend/agents/example/react_agent.py b/src/backend/agents/example/react_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..af674c94abfe1264b21806f2c86067e18d61ad80
--- /dev/null
+++ b/src/backend/agents/example/react_agent.py
@@ -0,0 +1,59 @@
+"""
+Simple React Agent implementation with monitoring capabilities.
+
+- React agent:
+ - https://docs.langchain.com/oss/python/langchain/agents
+
+
+install:
+ - langgraph-cli
+
+Run as follows:
+>>> cd src/agents/example/
+>>> langgraph dev
+
+"""
+from langchain.agents import create_agent
+from langchain_tavily import TavilySearch
+from langchain_core.tools import tool
+from dotenv import load_dotenv
+
+
+
+load_dotenv()
+
+
+
+# --- Tools ---
+@tool
+def convert_fahrenheit_celsius(fahrenheit: float) -> float:
+ """
+ Convert fahrenheit to celsius.
+ Args:
+ fahrenheit (float): Temperature in fahrenheit.
+ Returns:
+ float: Temperature in celsius.
+ """
+ return (fahrenheit - 32) * 5.0/9.0
+
+
+
+web_search = TavilySearch(
+ max_results = 5,
+ topic = "general",
+ # include_answer = False,
+ # include_raw_content = False,
+ # ...
+)
+
+
+tools = [
+ web_search,
+ convert_fahrenheit_celsius
+]
+
+
+agent = create_agent(
+ "gpt-5",
+ tools=tools
+)
\ No newline at end of file
diff --git a/src/backend/agents/gcalendar/__init__.py b/src/backend/agents/gcalendar/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..af440251541c569d239ee088ebe55df2191a393b
--- /dev/null
+++ b/src/backend/agents/gcalendar/__init__.py
@@ -0,0 +1,2 @@
+from .gcalendar_agent import gcalendar_agent
+
diff --git a/src/backend/agents/gcalendar/gcalendar_agent.py b/src/backend/agents/gcalendar/gcalendar_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8bba89d50718da86c970c748b5cec3d165cb42a
--- /dev/null
+++ b/src/backend/agents/gcalendar/gcalendar_agent.py
@@ -0,0 +1,94 @@
+import asyncio
+import sys
+from langchain_core.tools import tool
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from langchain.agents import create_agent
+from langchain_openai import ChatOpenAI
+from src.mcp_servers.examples.gcalendar.settings import GoogleCalendarSettings
+from src.backend.prompts import get_prompt
+
+
+SYSTEM_PROMPT = get_prompt(
+ template_name="GCalendar",
+ latest_version=True
+)
+
+@tool
+def gcalendar_agent(query: str) -> str:
+ """
+ A tool that acts as a Google Calendar agent.
+ It can list, create, and analyze calendar events using the Google Calendar MCP server.
+
+ Args:
+ query (str): The natural language request for the calendar (e.g., "Schedule a meeting with X on Friday at 3pm").
+
+ Returns:
+ str: The natural language response from the agent confirming the action or providing the requested information.
+
+ Example output:
+ "I have successfully scheduled the meeting with X for Friday at 3pm. The event ID is 1234567890."
+ """
+ try:
+ import asyncio
+ async def _run_async():
+ # Load settings
+ settings = GoogleCalendarSettings()
+ CALENDAR_MCP_DIR = settings.calendar_mcp_dir
+ CREDS = settings.creds
+ TOKEN = settings.token
+
+ # Initialize model
+ model = ChatOpenAI(model="gpt-4o", temperature=0)
+
+ # Connect to MCP server
+ # Note: This spawns a new process for each call.
+ # In a production environment, you might want to manage a persistent connection.
+ client = MultiServerMCPClient({
+ "calendar": {
+ "command": sys.executable,
+ "args": [
+ f"{CALENDAR_MCP_DIR}/run_server.py",
+ "--creds-file-path", str(CREDS),
+ "--token-path", str(TOKEN),
+ ],
+ "transport": "stdio",
+ }
+ })
+
+ # Fetch tools
+ try:
+ tools = await client.get_tools()
+ except Exception as e:
+ return f"โ Failed to connect to Calendar MCP server: {str(e)}"
+
+ if not tools:
+ return "โ No tools available from Calendar MCP server."
+
+ # Create agent
+ agent = create_agent(model, tools)
+
+ # Run agent
+ # We wrap the user query in a system/user message structure
+ result = await agent.ainvoke({
+ "messages": [
+ {
+ "role": "system",
+ "content": SYSTEM_PROMPT,
+ },
+ {
+ "role": "user",
+ "content": query,
+ },
+ ]
+ })
+
+ # Extract result
+ output = result["messages"][-1].content
+ return output
+
+ return asyncio.run(_run_async())
+
+ except Exception as e:
+ import traceback
+ return f"โ Error in gcalendar_agent: {str(e)}\n{traceback.format_exc()}"
+
diff --git a/src/backend/agents/gcalendar/schemas/__init__.py b/src/backend/agents/gcalendar/schemas/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/agents/gcalendar/tools/__init__.py b/src/backend/agents/gcalendar/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/agents/gmail/__init__.py b/src/backend/agents/gmail/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f23865c6eb94070c2fc8d5b6fc53087f8f7a05e4
--- /dev/null
+++ b/src/backend/agents/gmail/__init__.py
@@ -0,0 +1,2 @@
+from .gmail_agent import gmail_agent
+
diff --git a/src/backend/agents/gmail/gmail_agent.py b/src/backend/agents/gmail/gmail_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..b73a6f7bc2111a7ae6b6b50bf37fdd1b57c86adf
--- /dev/null
+++ b/src/backend/agents/gmail/gmail_agent.py
@@ -0,0 +1,102 @@
+import asyncio
+import shutil
+from pathlib import Path
+from langchain_core.tools import tool
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from langchain.agents import create_agent
+from langchain_openai import ChatOpenAI
+from src.mcp_servers.examples.gmail.settings import GMailSettings
+from src.backend.prompts import get_prompt
+
+
+# Attempt to find uv executable
+#-----------------------------------------------------------------------------
+# `Dockerfile.supervisor` installs uv in the base image in `/usr/local/bin/uv`
+# `which` attempts to find it in the system PATH and returns the full path to it.
+UV_PATH = shutil.which("uv")
+
+
+SYSTEM_PROMPT = get_prompt(
+ template_name="GMail",
+ latest_version=True
+)
+
+@tool
+def gmail_agent(query: str) -> str:
+ """
+ A tool that acts as a Gmail agent.
+ It can read, search, label, and send emails using the Gmail MCP server.
+
+ Args:
+ query (str): The natural language request (e.g., "Send an email to X", "Check unread emails").
+
+ Returns:
+ str: The natural language response from the agent confirming the action or providing the requested information.
+
+ Example output:
+ "I have successfully sent the email to X with the subject 'Interview Invitation'."
+ """
+ if not UV_PATH:
+ return "โ Error: 'uv' executable not found. Please ensure uv is installed and in the system PATH."
+
+ try:
+ import asyncio
+ async def _run_async():
+ # Load settings
+ settings = GMailSettings()
+
+ # Initialize model
+ model = ChatOpenAI(model="gpt-4o", temperature=0)
+
+ # Connect to MCP server
+ client = MultiServerMCPClient(
+ {
+ "gmail": {
+ "command": UV_PATH,
+ "args": [
+ "--directory", str(settings.gmail_mcp_dir),
+ "run", "gmail",
+ "--creds-file-path", str(settings.creds),
+ "--token-path", str(settings.token),
+ ],
+ "transport": "stdio",
+ }
+ }
+ )
+
+ # Fetch tools
+ try:
+ tools = await client.get_tools()
+ except Exception as e:
+ return f"โ Failed to connect to Gmail MCP server: {str(e)}"
+
+ if not tools:
+ return "โ No tools available from Gmail MCP server."
+
+ # Create agent
+ agent = create_agent(model, tools)
+
+ # Run agent
+ result = await agent.ainvoke({
+ "messages": [
+ {
+ "role": "system",
+ "content": SYSTEM_PROMPT,
+ },
+ {
+ "role": "user",
+ "content": query,
+ },
+ ]
+ })
+
+ # Extract result
+ output = result["messages"][-1].content
+ return output
+
+ return asyncio.run(_run_async())
+
+ except Exception as e:
+ import traceback
+ return f"โ Error in gmail_agent: {str(e)}\n{traceback.format_exc()}"
+
diff --git a/src/backend/agents/supervisor/__init__.py b/src/backend/agents/supervisor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5816d344407963fd580bdac4f9f0220965732a8
--- /dev/null
+++ b/src/backend/agents/supervisor/__init__.py
@@ -0,0 +1 @@
+"""Supervisor agent for HR recruitment workflow"""
diff --git a/src/backend/agents/supervisor/info.md b/src/backend/agents/supervisor/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..2efc0d5ca4af2314d4e8c85de730af74330708a8
--- /dev/null
+++ b/src/backend/agents/supervisor/info.md
@@ -0,0 +1,123 @@
+# Supervisor Agent - Implementation Details
+
+## Architecture
+
+The Supervisor Agent (`src/agents/supervisor/supervisor_v2.py`) uses a **LangGraph** orchestration model to manage the recruitment workflow. It maintains state for each interaction using a `MemorySaver` checkpointer, which allows it to remember context across multiple turns of conversation.
+
+### Core Components
+
+1. **Agent Type**: OpenAI Functions Agent (powered by `gpt-4o`).
+2. **Orchestration Framework**: [LangGraph](https://github.com/langchain-ai/langgraph) for state management and tool execution.
+3. **State Persistence**: Uses `thread_id` to maintain conversation history per session.
+
+### System Overview
+
+```mermaid
+graph TD
+ User(User / HR Manager) -->|Chat Request| Supervisor[Supervisor Agent]
+
+ Supervisor -->|Delegate| DB_Exec[DB Executor]
+ Supervisor -->|Delegate| CV_Screen[CV Screening]
+ Supervisor -->|Delegate| Cal_Agent[Calendar Agent]
+ Supervisor -->|Delegate| Gmail_Agent[Gmail Agent]
+
+ DB_Exec -->|SQL| Database[(PostgreSQL)]
+ CV_Screen -->|Parse & Score| Database
+
+ Cal_Agent -->|MCP| Calendar_MCP[Google Calendar MCP]
+ Gmail_Agent -->|MCP| Gmail_MCP[Gmail MCP]
+
+ Calendar_MCP -->|API| GCalendar(Google Calendar)
+ Gmail_MCP -->|API| GMail(Google Mail)
+```
+
+## Tools & Sub-agents
+
+The supervisor has access to the following tools, which it delegates tasks to:
+
+1. **`db_executor`**:
+ * **Purpose**: Querying the database for candidate information, status updates, and aggregations.
+ * **Capabilities**: SQL generation and execution (read-only by default, with specific write actions allowed).
+2. **`cv_screening_workflow`**:
+ * **Purpose**: Running the CV analysis pipeline.
+ * **Behavior**: Takes a candidate name, finds their CV, parses it, compares it against the job description, and saves the score/decision to the DB.
+ * **Sync/Async**: Currently synchronous wrapper around the workflow.
+3. **`gcalendar_agent`**:
+ * **Purpose**: Interacting with Google Calendar.
+ * **Backend**: Connects to a local Model Context Protocol (MCP) server (`src/mcp_servers/calendar-mcp`).
+ * **Capabilities**: List events, create events, check availability.
+ * **Execution**: Spawns a subprocess to run the MCP server.
+4. **`gmail_agent`**:
+ * **Purpose**: Interacting with Gmail.
+ * **Backend**: Connects to a local Model Context Protocol (MCP) server (`src/mcp_servers/gmail-mcp`).
+ * **Capabilities**: Send emails, read emails, search threads.
+ * **Execution**: Spawns a subprocess to run the MCP server.
+
+## Development & Debugging
+
+### Running the Supervisor UI
+
+The supervisor is exposed via a Streamlit UI (`src/supervisor_ui/app.py`).
+
+```bash
+# Run with Docker (Recommended)
+docker compose -f docker/docker-compose.yml up --build supervisor_ui
+```
+
+### Debugging with LangSmith UI
+
+For deeper transparency and debugging of the agent's thought process, you can run it with LangSmith.
+
+**Note:** You may need to temporarily disable `memory` (checkpointer) in `src/agents/supervisor/supervisor_v2.py` for the LangSmith UI to work correctly with this specific setup.
+
+```bash
+# 1. Start the database service
+docker compose --env-file .env -f docker/docker-compose.yml up --build
+
+# 2. Run LangGraph dev server (pointing to local DB port)
+POSTGRES_HOST=localhost POSTGRES_PORT=5433 langgraph dev
+```
+
+### Troubleshooting MCP Connections
+
+If the supervisor fails to use Gmail or Calendar tools with errors like `Connection refused`, `Executable not found`, or `invalid_grant`:
+
+#### 1. `invalid_grant: Token has been expired or revoked`
+
+**Cause:**
+The Google OAuth2 tokens (`token.json` or `calendar_token.json`) copied into the Docker container are invalid, expired, or were revoked. This happens if tokens are old or the environment changed.
+
+**Solution:**
+You must regenerate the tokens **locally** and then rebuild the container to copy the fresh tokens inside.
+
+1. **Delete old tokens locally:**
+ ```bash
+ rm secrets/gmail-mcp/token.json
+ rm secrets/gcalendar-mcp/calendar_token.json
+ ```
+ *(Do NOT delete the `credentials.json` files!)*
+
+2. **Regenerate tokens:**
+ Run the local test scripts. **A browser window will open asking you to log in and authorize the app.**
+
+ *For Gmail:*
+ ```bash
+ python -m src.mcp_servers.examples.gmail.send_email
+ ```
+
+ *For Calendar:*
+ ```bash
+ python -m src.mcp_servers.examples.gcalendar.interact_calendar
+ ```
+
+3. **Rebuild and Restart Container:**
+ The `Dockerfile.supervisor` copies the `secrets/` folder at build time. You must rebuild to get the new files.
+ ```bash
+ docker compose -f docker/docker-compose.yml build supervisor_ui
+ docker compose -f docker/docker-compose.yml up -d
+ ```
+
+#### 2. Other Issues
+1. **Check `uv` installation**: The MCP servers use `uv` to run. In Docker, this is installed at `/usr/local/bin/uv`.
+2. **Check Subprocess**: The supervisor spawns new processes for MCP servers. Ensure the container has enough memory.
+
diff --git a/src/backend/agents/supervisor/supervisor_v2.py b/src/backend/agents/supervisor/supervisor_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb31ac2568229159bf01d1e29a3dbe62ce93c913
--- /dev/null
+++ b/src/backend/agents/supervisor/supervisor_v2.py
@@ -0,0 +1,58 @@
+"""
+Supervisor Agent that orchestrates sub-agents for recruitment tasks.
+
+For more transparency in langsmith UI disable memory, then run:
+----------------------------------------------------------------
+| >>> docker compose -f docker/docker-compose.yml up --build. |
+| >>> POSTGRES_HOST=localhost POSTGRES_PORT=5433 langgraph dev |
+----------------------------------------------------------------
+"""
+
+
+from langchain.agents import create_agent
+from langchain_openai import ChatOpenAI
+from langgraph.checkpoint.memory import MemorySaver
+from src.backend.prompts import get_prompt
+
+# โ
Correct import via src.agents package (which re-exports from src.backend.agents.db_executor)
+from src.backend.agents import (
+ db_executor,
+ cv_screening_workflow,
+ gcalendar_agent,
+ gmail_agent,
+ voice_judge,
+)
+
+SYSTEM_PROMPT = get_prompt(
+ template_name="Supervisor",
+ local_prompt_path="supervisor/v2.txt",
+)
+
+# --------- Subagents as tools ---------
+subagents = [
+ db_executor,
+ cv_screening_workflow,
+ gcalendar_agent,
+ gmail_agent,
+ voice_judge,
+]
+
+# --------------- Memory ----------------
+# **NOTE:**
+# >>> In UI make sure to use 'thread_id' as a configurable parameter to the agent.invoke() method.
+# >>> When willing to use langsmith UI, then you must remove the checkpointer=memory,
+# otherwise it will not work.
+memory = MemorySaver()
+
+# ------------- Supervisor --------------
+supervisor_model = ChatOpenAI(
+ model="gpt-4o",
+ temperature=0,
+)
+
+supervisor_agent = create_agent(
+ model=supervisor_model,
+ tools=subagents,
+ system_prompt=SYSTEM_PROMPT,
+ checkpointer=memory, # outcomment for langsmith UI
+)
diff --git a/src/backend/agents/supervisor/test_queries/owen.md b/src/backend/agents/supervisor/test_queries/owen.md
new file mode 100644
index 0000000000000000000000000000000000000000..fdc44bb1996f0b43e53fa5b1e61f7084c7358517
--- /dev/null
+++ b/src/backend/agents/supervisor/test_queries/owen.md
@@ -0,0 +1,171 @@
+# Queries
+These queries / tests are used to test how well the supervisor agent performs by evaluating its responses to various tasks.
+
+## 1. Run CV screening for a newly uploaded candidate
+### Queries
+- "Please screen the new applicant and update their status accordingly."
+- "Please check if there is any applicants. Please tell me who if so. Then send them to the cv screening and update their status accordingly"
+
+### Expected behavior
+Supervisor identifies that the candidate is in a state requiring CV screening.
+Supervisor delegates the work to the CV Screening agent.
+CV Screening agent parses the CV, scores it, determines pass or fail, and writes results into the database via the DB Executor.
+Supervisor waits for the tool output and then reports the updated status without performing the screening itself.
+### Notes / issues
+- The supervisor asks for the name of the applicant. Instead it should have automatically delegated it to the DB Executor. It needs to be less reliant on the user for something this trivial.
+- The DB agent keeps trying to get a file at src\database\cvs\parsed\1dd5c1f2-737e-430f-9747-8b77d60219f3_SWefers_CV.txt. That path doesn't exist. Something is confusing it on which path the CVs are at.
+
+***`Comments`: Note that the cv screening workflow currently only set status to **applied**. Status quo is to let db executor run the eval. Was not sure yesterday to already include **devision node** in cv screening workflow. But would defintiely make it more autonomous. For now I decided to attach `cv screening decision tool`to db agent, where the supervisor can set a threshold of what defines fail vs. pass***
+
+---
+
+## 2. Process multiple new candidates simultaneously
+### Query
+
+**Queries:**
+- "We have several new applicants. Process all of them and let me know how the screening went."
+
+- """We have two applicants in our database: one has just applied and the other has passed CV screening, correct?
+Please confirm that first and tell me what the actual statuses are.
+
+Then:
+- If one has status "applied", send him to the voice screening.
+- If the other candidate has successfully passed the CV screening, then prepare a congratulatory email.
+ - Before preparing the email, check our calendar for available time slots for a person-to-person interview.
+ - Include these available time slots in the email.
+
+At the end, summarize the actions you took.
+"""
+### Expected behavior
+Supervisor queries current candidate states via DB Executor and identifies all candidates in the new or cv_uploaded state.
+Supervisor routes each candidate to the CV Screening agent using isolated per candidate threads.
+Each CV Screening agent run updates the database through DB Executor.
+Supervisor receives aggregated outcomes and summarizes them for HR.
+### Notes / issues
+TODO
+
+---
+
+## 3. Notify a passed candidate and request time slots
+### Query
+"This candidate X passed screening. Notify them and ask for their availability."
+### Expected behavior
+Supervisor detects that the candidate is in a screened_passed state.
+Supervisor delegates email sending to the Gmail Agent.
+Gmail Agent contacts Gmail MCP to send the message.
+DB Executor updates the candidate status to awaiting_time_slots.
+Supervisor reports the next expected step.
+### Notes / issues
+- Works correctly. Asked DB Executor for email, then sent the email.
+
+---
+
+## 4. Notify a failed candidate
+### Query
+"The screening result is fail. Please notify the candidate and update the system."
+### Expected behavior
+Supervisor sees the screened_failed state.
+Supervisor calls the Gmail Agent.
+Gmail Agent sends a rejection email using Gmail MCP.
+DB Executor updates status to rejected.
+Supervisor returns a clean confirmation.
+### Notes / issues
+- Gmail has issue without using `--allow-blocking` when launching `langgraph dev`. But this also breaks the database.
+- Gmail agent kept asking multiple times whether info was correct, even after being told yes. It needs
+to just do what it is told.
+
+***`Comment:`*** sending emails works for me on mac without any issues. not sure whether windows thing?
+
+---
+
+## 5. Generate a system wide status report
+### Query
+"What is the current status of all candidates?"
+### Expected behavior
+Supervisor calls DB Executor to retrieve aggregated counts and per status numbers.
+Supervisor formats the report for HR.
+No state transitions occur.
+No subagent beyond DB Executor is involved.
+### Notes / issues
+- Worked correctly without issues.
+
+---
+
+## 6. Schedule an interview for a candidate with provided availability
+### Query
+"The candidate already provided availability. Please schedule their interview."
+### Expected behavior
+Supervisor determines the candidate is in awaiting_time_slots and that availability is present in the DB.
+Supervisor calls the Calendar Agent.
+Calendar Agent uses the Calendar MCP to match candidate availability with HR calendar and schedules a meeting.
+DB Executor updates the status to interview_scheduled.
+Supervisor reports the scheduled event.
+### Notes / issues
+- Works correctly. Asked DB Executor for email, then sent the email.
+
+---
+
+## 7. Process all candidates to their next required step
+### Query
+"Process all candidates and advance everyone to the next appropriate step."
+### Expected behavior
+Supervisor retrieves all candidates via DB Executor.
+Supervisor groups them by workflow state and delegates each group to the appropriate subagent (CV Screening, Gmail Agent, Calendar Agent).
+All work is executed per candidate thread.
+DB Executor performs all writes.
+Supervisor produces a summary of completed actions.
+### Notes / issues
+- Got lots of database errors and missing CVs (the CVs should be in the DB, they're in the files). The DB Executor needs to have clearer instructions for how to use it, and be more persistant. It cannot give up if it fails once.
+
+***`Comment`: we are still at single candidate mvp.*** `BUT`we should still try if already `possible`!
+
+---
+
+## 8. Parse a CV without screening
+### Query
+"Only parse this new CV and store the structured data. Do not run screening."
+### Expected behavior
+Supervisor identifies that parsing is needed but screening is not requested.
+Supervisor routes to the CV Screening agent or dedicated parser if available.
+Parser extracts structured data and writes it to the DB via DB Executor.
+Supervisor leaves candidate in the correct state without triggering screening logic.
+### Notes / issues
+- Worked without issues.
+
+---
+
+## 9. Follow up when no time slots were received
+### Query
+"The candidate has not replied with availability. Follow up with them."
+### Expected behavior
+Supervisor identifies the state awaiting_time_slots with no stored availability.
+Supervisor delegates a follow up email to the Gmail Agent.
+Gmail Agent sends the email through Gmail MCP.
+DB Executor records that a follow up was sent.
+Supervisor confirms the action.
+### Notes / issues
+- Works correctly. Asked DB Executor for email, then sent the email.
+
+---
+
+## 10. Resume a stuck candidate from checklist state
+### Query
+"This candidate is stuck. Resume from exactly where they left off."
+### Expected behavior
+Supervisor loads the stored checklist and candidate state via DB Executor.
+Supervisor identifies the next unchecked atomic step.
+Supervisor routes to the appropriate subagent for that specific step.
+Subagent performs the atomic action and DB Executor persists the update.
+Supervisor does not repeat completed steps or skip steps.
+### Notes / issues
+- It worked fine, but it took many attempts to set info in the database. Maybe more clear explanation is needed.
+
+## 11. High level summary of all candidates
+### Query
+"Tell me a high level summary about all candidates that have "applied" but not yet moved on."
+### Expected Behavior
+Supervisor does not provide just the names, emails, or phone numbers.
+Supervisor asks CV screener for info about the candidates.
+### Notes / issues
+- CV screener is having issues finding applicant CVs.
+- Supervisor tried just giving names or contact info which is insufficient.
\ No newline at end of file
diff --git a/src/backend/agents/supervisor/test_queries/seb.md b/src/backend/agents/supervisor/test_queries/seb.md
new file mode 100644
index 0000000000000000000000000000000000000000..90fb394784088daa662933a33508deb88876ab31
--- /dev/null
+++ b/src/backend/agents/supervisor/test_queries/seb.md
@@ -0,0 +1,47 @@
+## ***`Example Queries`***
+---
+
+### ***`Tool Availabiity`***
+
+Use the following prompt to see if it cann see the tools atatched to the agent without having to mention in systemn prompt, since when passing the tools as list in `create_agent`it alkready takes care of that.
+```text
+hey what tools do you have avialable? please give comprehensive info and overview
+```
+**NOTE**
+In my last run it listed an additional tool called `Multi-Tool Use (multi_tool_use.parallel)`. This is NOT a real tool in the codebase but an internal OpenAI artifact representing the model's capability to call multiple tools in parallel. It can be ignored.
+
+### ***`Already working:`***
+```text
+>>> Hey is there any candidates in our databse?
+
+>>> Great tell me more about this person!
+
+>>> Ok so please send him an email and notify him that his cv has been screened!
+
+>>> Please udate his status from cv screened to applied!
+
+>>> I checked his cv and it looks great by manual inspection by myself. hence can we set his interview status to scheduled?
+```
+
+
+### ***`Goals:`***
+```text
+>>> Since his cv was screened, can you update his interview status as completed and decision status as maybe?
+
+>>> Since his cv was screened, can you update his interview status as completed and decision status as maybe? Then also send him an email that we will soon schedule a person-to-person interview with him.
+
+>>> can you send him an email that we liked his cv and want to schedule a meeting with him for for the foloowing friday at 3pm? After sending the email please update interview scheduling statius as 'scheduled'
+
+---
+
+>>> Please schedule an interview for that person for this friday 2pm and then notfiy the applicant that he has an personal interview at that time and shall mark it in his calendar.
+
+>>> Please schedule an interview in our hr calendar that candidate x will have an person-to-person interview. Also notofy both hr and the applicant by email and send ***calendar invitation** to the candidate!
+
+
+>>> Please check in our HR calendar what days havee available slots for an 1h interview. Once we found we found that out we suggest the candidate the available time slots. Once he agrees to one slot we can schedule that agreed slot.
+
+>>> Can you please send an calendar invite to that person for this friday 2pm and to HR as well?
+
+>>>
+```
diff --git a/src/backend/agents/voice_screening/__init__.py b/src/backend/agents/voice_screening/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a0b97ee54b01cd516d5545a0b690a940fcc8038
--- /dev/null
+++ b/src/backend/agents/voice_screening/__init__.py
@@ -0,0 +1,3 @@
+from .judge import voice_judge, evaluate_voice_screening
+
+__all__ = ["voice_judge", "evaluate_voice_screening"]
diff --git a/src/backend/agents/voice_screening/audio_processor.py b/src/backend/agents/voice_screening/audio_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab6247755e0dd1bad9957c3acf31ee68e2cbc31a
--- /dev/null
+++ b/src/backend/agents/voice_screening/audio_processor.py
@@ -0,0 +1,188 @@
+"""
+Audio processing utilities for voice screening.
+Handles audio combining, resampling, and WAV export.
+"""
+import io
+import wave
+import struct
+import logging
+from typing import List, Dict
+
+logger = logging.getLogger(__name__)
+
+
+def combine_and_export_audio(
+ user_chunks: List[Dict],
+ agent_chunks: List[Dict],
+ session_start_time: float,
+ session_id: str
+) -> bytes:
+ """
+ Combine user and agent audio chunks and export as WAV file.
+
+ Audio chunks are continuous streams - we concatenate them in order and mix
+ based on when each stream actually started relative to session start.
+
+ Args:
+ user_chunks: List of dicts with 'timestamp' and 'data' (bytes)
+ agent_chunks: List of dicts with 'timestamp' and 'data' (bytes)
+ session_start_time: Session start timestamp for relative positioning
+ session_id: The session ID for logging.
+
+ Returns:
+ bytes: WAV file data.
+ """
+ if not session_start_time:
+ raise ValueError("Session start time not found")
+
+ if not user_chunks and not agent_chunks:
+ logger.warning(f"No audio chunks found for session {session_id}")
+ # Return empty WAV file
+ wav_buffer = io.BytesIO()
+ with wave.open(wav_buffer, 'wb') as wav_file:
+ wav_file.setnchannels(1) # Mono
+ wav_file.setsampwidth(2) # 16-bit = 2 bytes
+ wav_file.setframerate(24000) # OpenAI uses 24kHz
+ wav_file.writeframes(b'')
+ return wav_buffer.getvalue()
+
+ # Sample rate: OpenAI Realtime API uses 24kHz PCM16
+ SAMPLE_RATE = 24000
+ BYTES_PER_SAMPLE = 2 # 16-bit = 2 bytes
+
+ # Detect user audio sample rate (browser typically captures at 48kHz)
+ # NOTE: Frontend now resamples to 24kHz before sending, so we can trust it matches.
+ user_sample_rate = SAMPLE_RATE # Always 24kHz
+ logger.info(f"Using standard sample rate: {user_sample_rate}Hz")
+
+ # Process and prepare all chunks with their timestamps
+ # We need to interleave user and agent chunks based on when they actually occurred
+ all_chunks = []
+
+ # Process user chunks (resample if needed)
+ for chunk in user_chunks:
+ chunk_data = chunk["data"]
+ chunk_samples = len(chunk_data) // BYTES_PER_SAMPLE
+
+ all_chunks.append({
+ "timestamp": chunk["timestamp"],
+ "type": "user",
+ "data": chunk_data,
+ "samples": chunk_samples
+ })
+
+ # Process agent chunks (already at 24kHz, no resampling needed)
+ for chunk in agent_chunks:
+ chunk_data = chunk["data"]
+ chunk_samples = len(chunk_data) // BYTES_PER_SAMPLE
+
+ all_chunks.append({
+ "timestamp": chunk["timestamp"],
+ "type": "agent",
+ "data": chunk_data,
+ "samples": chunk_samples
+ })
+
+ # Sort all chunks by timestamp to get chronological order
+ all_chunks.sort(key=lambda x: x["timestamp"])
+
+ # Now place chunks sequentially, maintaining continuity within each stream
+ # Track cumulative position for each stream type
+ user_cumulative = None
+ agent_cumulative = None
+
+ chunk_placements = []
+
+ for chunk in all_chunks:
+ chunk_timestamp = chunk["timestamp"]
+ chunk_offset_seconds = chunk_timestamp - session_start_time
+ chunk_start_sample = max(0, int(chunk_offset_seconds * SAMPLE_RATE))
+
+ if chunk["type"] == "user":
+ # For user audio, maintain continuity within user stream
+ if user_cumulative is None:
+ user_cumulative = chunk_start_sample
+
+ # Ensure no gaps - if there's a gap, start from where previous user chunk ended
+ if chunk_start_sample < user_cumulative:
+ chunk_start_sample = user_cumulative
+
+ chunk_placements.append({
+ "start_sample": chunk_start_sample,
+ "data": chunk["data"],
+ "samples": chunk["samples"],
+ "type": "user"
+ })
+
+ user_cumulative = chunk_start_sample + chunk["samples"]
+ else: # agent
+ # For agent audio, maintain continuity within agent stream
+ if agent_cumulative is None:
+ agent_cumulative = chunk_start_sample
+
+ # Ensure no gaps - if there's a gap, start from where previous agent chunk ended
+ if chunk_start_sample < agent_cumulative:
+ chunk_start_sample = agent_cumulative
+
+ chunk_placements.append({
+ "start_sample": chunk_start_sample,
+ "data": chunk["data"],
+ "samples": chunk["samples"],
+ "type": "agent"
+ })
+
+ agent_cumulative = chunk_start_sample + chunk["samples"]
+
+ # Calculate total duration needed
+ total_samples = 0
+ if chunk_placements:
+ for placement in chunk_placements:
+ total_samples = max(total_samples, placement["start_sample"] + placement["samples"])
+
+ if total_samples == 0:
+ logger.warning(f"No audio samples to export for session {session_id}")
+ wav_buffer = io.BytesIO()
+ with wave.open(wav_buffer, 'wb') as wav_file:
+ wav_file.setnchannels(1)
+ wav_file.setsampwidth(2)
+ wav_file.setframerate(SAMPLE_RATE)
+ wav_file.writeframes(b'')
+ return wav_buffer.getvalue()
+
+ # Initialize output buffer with zeros
+ output_buffer = bytearray(total_samples * BYTES_PER_SAMPLE)
+
+ # Place all chunks in chronological order
+ for placement in chunk_placements:
+ chunk_data = placement["data"]
+ chunk_start = placement["start_sample"]
+ chunk_samples = placement["samples"]
+
+ for i in range(chunk_samples):
+ sample_offset = chunk_start + i
+ if 0 <= sample_offset < total_samples:
+ # Read PCM16 sample from chunk
+ sample_value = struct.unpack(' str:
+ """
+ Evaluates a completed voice screening session for a candidate.
+
+ Args:
+ candidate_id (str): The UUID of the candidate to evaluate.
+
+ Returns:
+ str: A summary of the evaluation result.
+ """
+ try:
+ with SessionLocal() as session:
+ # 1. Fetch Candidate and VoiceScreeningResult
+ candidate = session.execute(
+ select(Candidate).where(Candidate.id == UUID(candidate_id))
+ ).scalar_one_or_none()
+
+ if not candidate:
+ return f"โ Candidate {candidate_id} not found."
+
+ # Fetch latest voice screening result
+ voice_result = session.execute(
+ select(VoiceScreeningResult)
+ .where(VoiceScreeningResult.candidate_id == UUID(candidate_id))
+ .order_by(VoiceScreeningResult.timestamp.desc())
+ ).scalar_one_or_none()
+
+ if not voice_result or not voice_result.transcript_text:
+ return f"โ No voice screening transcript found for candidate {candidate.full_name}."
+
+ # Fetch job title from CV screening result (for context)
+ cv_result = session.execute(
+ select(CVScreeningResult)
+ .where(CVScreeningResult.candidate_id == UUID(candidate_id))
+ .order_by(CVScreeningResult.timestamp.desc())
+ ).scalar_one_or_none()
+
+ job_title = cv_result.job_title if cv_result else "the position"
+
+ # 2. Prepare Input (Audio + Text)
+ messages = []
+
+ messages.append(SystemMessage(content=SYSTEM_PROMPT))
+
+ user_content = []
+ user_content.append({"type": "text", "text": f"Candidate: {candidate.full_name}\nPosition: {job_title}\n"})
+
+ # Try to load audio
+ audio_loaded = False
+ if voice_result.audio_url and os.path.exists(voice_result.audio_url):
+ try:
+ with open(voice_result.audio_url, "rb") as audio_file:
+ audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
+ user_content.append({
+ "type": "input_audio",
+ "input_audio": {
+ "data": audio_data,
+ "format": "wav"
+ }
+ })
+ audio_loaded = True
+ except Exception as e:
+ print(f"โ ๏ธ Failed to load audio file: {e}")
+
+ # Always include transcript as text context
+ user_content.append({"type": "text", "text": f"Transcript:\n{voice_result.transcript_text}\n"})
+
+ messages.append(HumanMessage(content=user_content))
+
+ # 3. Call LLM
+ # Use audio-capable model if audio is loaded, otherwise standard model
+ model_name = "gpt-4o-audio-preview" if audio_loaded else "gpt-4o"
+ llm = ChatOpenAI(model=model_name, temperature=0)
+
+ # gpt-4o-audio-preview doesn't support 'json_schema' response format yet, use function calling
+ method = "function_calling" if audio_loaded else "function_calling"
+
+ structured_llm = llm.with_structured_output(VoiceScreeningOutput, method=method)
+ evaluation: VoiceScreeningOutput = structured_llm.invoke(messages)
+
+ # 4. Update Database
+ voice_result.sentiment_score = evaluation.sentiment_score
+ voice_result.confidence_score = evaluation.confidence_score
+ voice_result.communication_score = evaluation.communication_score
+ voice_result.proficiency_score = evaluation.proficiency_score
+ voice_result.llm_summary = evaluation.llm_summary
+ # voice_result.llm_judgment_json = evaluation.model_dump() # Removed from schema
+
+ # 5. Determine Pass/Fail
+ # Calculate average score (0-1 scale -> 0-100 scale for threshold comparison)
+ avg_score = (
+ evaluation.sentiment_score +
+ evaluation.confidence_score +
+ evaluation.communication_score +
+ evaluation.proficiency_score
+ ) / 4.0 * 100
+
+ if avg_score >= 75:
+ candidate.status = CandidateStatus.voice_passed
+ result_msg = "PASSED"
+ else:
+ candidate.status = CandidateStatus.voice_rejected
+ result_msg = "REJECTED"
+
+ candidate.updated_at = datetime.utcnow()
+ session.commit()
+
+ return (
+ f"โ
Evaluation complete for {candidate.full_name} using {model_name}.\n"
+ f"Result: {result_msg} (Score: {avg_score:.1f}/100)\n"
+ f"Summary: {evaluation.llm_summary}"
+ )
+
+ except Exception as e:
+ import traceback
+ return f"โ Error evaluating voice screening: {str(e)}\n{traceback.format_exc()}"
+
+# Alias for the tool to be used in supervisor
+voice_judge = evaluate_voice_screening
diff --git a/src/backend/agents/voice_screening/schemas/__init__.py b/src/backend/agents/voice_screening/schemas/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad82fba24a155ba7ee7b7a8a619dba3e4c437a60
--- /dev/null
+++ b/src/backend/agents/voice_screening/schemas/__init__.py
@@ -0,0 +1,6 @@
+from src.backend.agents.voice_screening.schemas.output_schema import (
+ VoiceScreeningOutput
+)
+
+__all__ = ["VoiceScreeningOutput"]
+
diff --git a/src/backend/agents/voice_screening/schemas/output_schema.py b/src/backend/agents/voice_screening/schemas/output_schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e13ee217ca7b060c0b991ed0f13dc18501c7f5a
--- /dev/null
+++ b/src/backend/agents/voice_screening/schemas/output_schema.py
@@ -0,0 +1,35 @@
+from pydantic import BaseModel, Field
+from typing import Optional, Dict, Any, List
+from datetime import datetime
+
+
+class CallTranscript(BaseModel):
+ """Structure for storing conversation transcript."""
+ speaker: str = Field(..., description="Speaker identifier ('agent' or 'candidate')")
+ text: str = Field(..., description="Transcribed text")
+ timestamp: datetime = Field(default_factory=datetime.utcnow, description="When the utterance occurred")
+
+
+class ConversationState(BaseModel):
+ """State management during an active call."""
+ call_sid: str = Field(..., description="Twilio Call SID")
+ candidate_id: str = Field(..., description="Candidate UUID")
+ transcript: List[CallTranscript] = Field(default_factory=list, description="Full conversation transcript")
+ current_question_index: int = Field(default=0, description="Index of current interview question")
+ interview_questions: List[str] = Field(default_factory=list, description="List of interview questions")
+ is_active: bool = Field(default=True, description="Whether the call is currently active")
+ started_at: Optional[datetime] = Field(default=None, description="Call start time")
+ ended_at: Optional[datetime] = Field(default=None, description="Call end time")
+
+
+class VoiceScreeningOutput(BaseModel):
+ """Structured results from voice screening evaluation."""
+ sentiment_score: float = Field(..., ge=0, le=1, description="Overall sentiment score (0=negative, 1=positive)")
+ confidence_score: float = Field(..., ge=0, le=1, description="Candidate's confidence level")
+ communication_score: float = Field(..., ge=0, le=1, description="Communication clarity and effectiveness")
+ proficiency_score: float = Field(..., ge=0, le=1, description="Candidate's technical proficiency")
+ llm_summary: str = Field(..., description="LLM-generated summary of the interview")
+ llm_judgment_json: Optional[Dict[str, Any]] = Field(default=None, description="Structured judgment data from LLM")
+ key_traits: List[str] = Field(default_factory=list, description="Key personality/technical traits identified")
+ recommendation: str = Field(..., description="Pass/fail or next-step recommendation")
+
diff --git a/src/backend/agents/voice_screening/session_service.py b/src/backend/agents/voice_screening/session_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..39c7ddd3e2df5305d2791a90da7fed2d733ddcad
--- /dev/null
+++ b/src/backend/agents/voice_screening/session_service.py
@@ -0,0 +1,131 @@
+"""
+Session service for voice screening.
+Handles session configuration, screening questions, and database operations.
+"""
+import logging
+from typing import Optional, Dict
+from uuid import UUID
+from datetime import datetime
+
+from sqlalchemy import select, desc
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate, CVScreeningResult, VoiceScreeningResult
+from src.backend.state.candidate import CandidateStatus
+from src.backend.agents.voice_screening.utils.questions import get_screening_questions
+
+logger = logging.getLogger(__name__)
+
+
+def get_session_config(candidate_id: str) -> Dict:
+ """
+ Generate session configuration for a candidate.
+
+ Args:
+ candidate_id: UUID of the candidate
+
+ Returns:
+ Dict with session configuration including instructions and questions
+ """
+ with SessionLocal() as db:
+ # Fetch candidate
+ candidate = db.execute(
+ select(Candidate).where(Candidate.id == UUID(candidate_id))
+ ).scalar_one_or_none()
+
+ if not candidate:
+ raise ValueError(f"Candidate {candidate_id} not found")
+
+ # Fetch latest CV screening result for job title
+ cv_result = db.execute(
+ select(CVScreeningResult)
+ .where(CVScreeningResult.candidate_id == UUID(candidate_id))
+ .order_by(desc(CVScreeningResult.timestamp))
+ .limit(1)
+ ).scalar_one_or_none()
+
+ job_title = cv_result.job_title if cv_result else "the position"
+ questions = get_screening_questions(job_title)
+
+ # Build instructions
+ instructions = (
+ f"You are a friendly HR assistant conducting a phone screening interview with {candidate.full_name} "
+ f"for the position of {job_title}. "
+ f"Greet the candidate warmly by name. "
+ f"Your goal is to ask the following main questions to assess their fit:\n\n"
+ )
+
+ for i, q in enumerate(questions, 1):
+ instructions += f"{i}. {q}\n"
+
+ instructions += (
+ "\nAsk one question at a time. Wait for their response before moving to the next. "
+ "Keep the conversations brief and to the point, ask only one follow-up question per main question. "
+ "If they ask clarifying questions, answer them briefly."
+ )
+
+ return {
+ "candidate_name": candidate.full_name,
+ "job_title": job_title,
+ "instructions": instructions,
+ "questions": questions,
+ "config": {
+ "modalities": ["audio", "text"],
+ "instructions": instructions,
+ "voice": "alloy",
+ "input_audio_format": "pcm16",
+ "output_audio_format": "pcm16",
+ "turn_detection": {
+ "type": "server_vad",
+ "threshold": 0.5,
+ "prefix_padding_ms": 300,
+ "silence_duration_ms": 10000
+ }
+ }
+ }
+
+
+def save_voice_screening_session(
+ candidate_id: str,
+ session_id: str,
+ transcript_text: str,
+ audio_url: Optional[str] = None
+) -> None:
+ """
+ Save voice screening session to database.
+
+ Args:
+ candidate_id: UUID of the candidate
+ session_id: Session identifier
+ transcript_text: Full conversation transcript
+ audio_url: Path to saved audio file
+ """
+ with SessionLocal() as db:
+ candidate = db.execute(
+ select(Candidate).where(Candidate.id == UUID(candidate_id))
+ ).scalar_one_or_none()
+
+ if not candidate:
+ raise ValueError(f"Candidate {candidate_id} not found")
+
+ # Create new voice screening result entry
+ screening_entry = VoiceScreeningResult(
+ candidate_id=candidate.id,
+ call_sid=session_id, # Using session_id instead of Twilio call_sid
+ transcript_text=transcript_text,
+ audio_url=audio_url,
+ timestamp=datetime.utcnow(),
+ # Scores will be filled by judge later
+ sentiment_score=None,
+ confidence_score=None,
+ communication_score=None,
+ llm_summary=None,
+ )
+
+ # Add and commit
+ db.add(screening_entry)
+ candidate.status = CandidateStatus.voice_done
+ candidate.updated_at = datetime.utcnow()
+ db.commit()
+
+ logger.info(f"Voice screening session saved for candidate {candidate_id}")
diff --git a/src/backend/agents/voice_screening/utils/__init__.py b/src/backend/agents/voice_screening/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..36432d6a6d27f101ebb84cd67aa20aa9cc187369
--- /dev/null
+++ b/src/backend/agents/voice_screening/utils/__init__.py
@@ -0,0 +1,2 @@
+# Utils module for voice screening agent
+
diff --git a/src/backend/agents/voice_screening/utils/questions.py b/src/backend/agents/voice_screening/utils/questions.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bb0f1f748c399c077f7b3a46f7e720fd9c7d738
--- /dev/null
+++ b/src/backend/agents/voice_screening/utils/questions.py
@@ -0,0 +1,20 @@
+from typing import List
+
+def get_screening_questions(job_title: str) -> List[str]:
+ """
+ Returns a list of static screening questions based on the job title.
+ For now, it returns a generic set of questions, but can be expanded.
+ """
+ # Generic questions for any role
+ base_questions = [
+ "Can you briefly walk me through your background and experience?",
+ "What motivated you to apply for this position?",
+ "What are your salary expectations?",
+ "When would you be available to start?",
+ ]
+
+ # We could add specific questions based on job_title here
+ # if "engineer" in job_title.lower():
+ # base_questions.append("Describe a challenging technical problem you solved.")
+
+ return base_questions
diff --git a/src/backend/api/__init__.py b/src/backend/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c00bdb0433a9c622d166c6f44048c640426dce56
--- /dev/null
+++ b/src/backend/api/__init__.py
@@ -0,0 +1,6 @@
+"""API package for Recruitment Agent."""
+
+from src.backend.api.app import app
+
+__all__ = ["app"]
+
diff --git a/src/backend/api/app.py b/src/backend/api/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..b399dd25143df8cf50c3051876e1dc79949812c5
--- /dev/null
+++ b/src/backend/api/app.py
@@ -0,0 +1,55 @@
+"""
+FastAPI Application for Recruitment Agent API.
+
+Run with:
+ uvicorn src.api.app:app --reload --port 8000
+"""
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from src.backend.api.routers import supervisor, cv_upload, voice_screener, database
+from src.backend.configs import get_openai_settings
+
+# Validate OpenAI API key at startup (shows nice error if missing)
+get_openai_settings()
+
+app = FastAPI(
+ title="Recruitment Agent API",
+ description="API layer for the HR Supervisor Agent and recruitment tools",
+ version="1.0.0",
+)
+
+# CORS middleware for frontend integration
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"], # Configure appropriately for production
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+# Include routers
+app.include_router(supervisor.router, prefix="/api/v1/supervisor", tags=["Supervisor"])
+app.include_router(cv_upload.router, prefix="/api/v1/cv", tags=["CV Upload"])
+app.include_router(database.router, prefix="/api/v1/db", tags=["Database"])
+app.include_router(voice_screener.router, prefix="/api/v1/voice-screener", tags=["Voice Screener"])
+
+
+@app.get("/health")
+async def health_check() -> dict[str, str]:
+ """Health check endpoint.
+ """
+ return {"status": "healthy"}
+
+
+@app.get("/")
+async def root():
+ """Root endpoint with API info.
+ """
+ return {
+ "message": "Recruitment Agent API",
+ "docs": "/docs",
+ "health": "/health",
+ }
+
diff --git a/src/backend/api/info.md b/src/backend/api/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..94fcc58c0c1e434f489d5c05462b94a4bd9ff10e
--- /dev/null
+++ b/src/backend/api/info.md
@@ -0,0 +1,173 @@
+# API Layer ๐
+
+> FastAPI backend decoupling agents from frontends.
+
+## Quick Start
+
+```bash
+# Local
+uvicorn src.api.app:app --reload --port 8080
+
+# Docker
+docker compose --env-file .env -f docker/docker-compose.yml up supervisor_api
+```
+
+**Docs:** http://localhost:8080/docs
+
+## Endpoints
+
+### Supervisor Agent `/api/v1/supervisor`
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| POST | `/chat` | Batch response with context compaction |
+| POST | `/chat/stream` | SSE streaming with context compaction โ ๏ธ |
+| POST | `/raw/chat` | Batch response, direct agent (no compaction) |
+| POST | `/raw/chat/stream` | SSE streaming, direct agent โ ๏ธ |
+| POST | `/new` | Create new chat session |
+| GET | `/health` | Health check |
+
+โ ๏ธ **Note:** Streaming endpoints have known issues. Use batch endpoints (`/chat` or `/raw/chat`) for reliable operation.
+
+**With vs Raw endpoints:**
+- `/chat` and `/chat/stream` use `CompactingSupervisor` wrapper (auto context management)
+- `/raw/chat` and `/raw/chat/stream` bypass wrapper (direct agent access, useful for debugging)
+
+**Streaming (SSE) events:**
+```
+event: token โ {"content": "Hello"}
+event: done โ {"thread_id": "abc123", "token_count": 150}
+event: error โ {"error": "Something went wrong"}
+```
+
+### CV Upload `/api/v1/cv`
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| POST | `/submit` | Submit application + CV |
+| GET | `/health` | Health check |
+
+**Submit flow:**
+1. Save CV file to disk
+2. Register candidate in DB
+3. Parse CV โ Markdown (GPT-4 Vision)
+4. Update parsed path in DB
+
+### Database `/api/v1/db`
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| POST | `/query` | Flexible query any table |
+| GET | `/candidates` | List candidates with filters |
+| GET | `/candidates/{id}` | Get full candidate profile by UUID |
+| GET | `/candidates/email/{email}` | Get full candidate profile by email |
+| GET | `/cv-screening` | List CV screening results |
+| GET | `/voice-screening` | List voice screening results |
+| GET | `/interviews` | List interview scheduling |
+| GET | `/decisions` | List final decisions |
+| GET | `/stats` | Database statistics |
+| GET | `/health` | Health check |
+
+**Full Candidate Profile** (`/candidates/{id}` and `/candidates/email/{email}`):
+
+Returns ALL data for a candidate including related records (by default `include_relations=true`):
+- **Base fields:** id, full_name, email, phone_number, cv_file_path, parsed_cv_file_path, status, created_at, updated_at
+- **cv_screening_results:** list of CV screening scores and feedback
+- **voice_screening_results:** list of voice screening transcripts and scores
+- **interview_scheduling:** list of scheduled interviews
+- **final_decision:** hiring decision with rationale (if any)
+
+Use `?include_relations=false` to fetch only base candidate fields.
+
+**Flexible Query Example:**
+```json
+POST /api/v1/db/query
+{
+ "table": "candidates",
+ "filters": {"status": "applied"},
+ "fields": ["id", "full_name", "email"],
+ "include_relations": true,
+ "limit": 10,
+ "offset": 0,
+ "sort_by": "created_at",
+ "sort_order": "desc"
+}
+```
+
+**Supported filter operators:**
+- `$eq`, `$ne`: equality/inequality
+- `$gt`, `$gte`, `$lt`, `$lte`: comparisons
+- `$in`, `$nin`: list membership
+- `$like`, `$ilike`: pattern matching
+
+## Structure
+
+```
+src/api/
+โโโ app.py โ FastAPI app + CORS + router mounting
+โโโ routers/
+โ โโโ supervisor.py โ Chat endpoints (regular + streaming)
+โ โโโ cv_upload.py โ CV submission endpoint
+โ โโโ database.py โ Flexible database query endpoints
+โโโ schemas/
+ โโโ supervisor_chat.py โ ChatRequest, ChatResponse
+ โโโ cv_upload.py โ SubmitResponse
+ โโโ database.py โ QueryRequest, QueryResponse, etc.
+```
+
+## SDK Clients
+
+Frontends use SDK clients instead of raw HTTP:
+
+```python
+# Supervisor
+from src.sdk import SupervisorClient
+client = SupervisorClient()
+for chunk in client.stream("Show candidates", thread_id):
+ print(chunk.content)
+
+# CV Upload
+from src.sdk import CVUploadClient
+client = CVUploadClient()
+response = client.submit(name, email, phone, cv_file, filename)
+
+# Database Queries
+from src.sdk import DatabaseClient
+db = DatabaseClient()
+
+# List candidates
+candidates = db.get_candidates(status="applied", include_relations=True)
+for c in candidates.data:
+ print(c["full_name"], c["status"])
+
+# Get full candidate profile by email
+profile = db.get_candidate_by_email("ada@example.com")
+print(profile.data["cv_screening_results"])
+
+# Flexible query with filters
+results = db.query(
+ table="cv_screening_results",
+ filters={"overall_fit_score": {"$gte": 0.8}},
+ sort_by="overall_fit_score",
+ sort_order="desc"
+)
+
+# Get database stats
+stats = db.get_stats()
+print(stats.stats["candidates"]["by_status"])
+```
+
+## Environment
+
+| Variable | Default | Used By |
+|----------|---------|---------|
+| `OPENAI_API_KEY` | required | Validated at startup |
+| `CV_UPLOAD_PATH` | `src/database/cvs/uploads` | CV router |
+| `CV_PARSED_PATH` | `src/database/cvs/parsed` | CV router |
+| `POSTGRES_*` | varies | Database connection |
+
+## TODO
+
+- [ ] Voice agent router
+- [x] Candidate database router
+
diff --git a/src/backend/api/routers/__init__.py b/src/backend/api/routers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d9cd824fb94d99fb8dbac25ebf4a71f12f994ae
--- /dev/null
+++ b/src/backend/api/routers/__init__.py
@@ -0,0 +1,6 @@
+"""
+Router modules for the Recruitment Agent API.
+"""
+from . import supervisor, cv_upload, voice_screener, database
+
+__all__ = ["supervisor", "cv_upload", "voice_screener", "database"]
diff --git a/src/backend/api/routers/cv_upload.py b/src/backend/api/routers/cv_upload.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ba8447d977f0355a88cb09b3fcdc86821413b02
--- /dev/null
+++ b/src/backend/api/routers/cv_upload.py
@@ -0,0 +1,102 @@
+"""
+CV Upload Router.
+
+Handles CV submission and candidate registration.
+"""
+
+from pathlib import Path
+from fastapi import APIRouter, HTTPException, UploadFile, File, Form
+
+from src.backend.api.schemas.cv_upload import SubmitResponse
+from src.backend.configs import get_cv_settings
+from src.backend.database.candidates import register_candidate, update_parsed_cv_path
+from src.backend.database.cvs import save_cv
+from src.backend.doc_parser import pdf_to_markdown
+
+
+router = APIRouter()
+
+# Load settings and ensure directories exist
+settings = get_cv_settings()
+settings.ensure_dirs()
+
+
+# ==================================================================================
+# ENDPOINTS
+# ==================================================================================
+
+@router.post("/submit", response_model=SubmitResponse)
+async def submit_application(
+ full_name: str = Form(..., description="Candidate's full name"),
+ email: str = Form(..., description="Candidate's email address"),
+ phone: str = Form(default="", description="Candidate's phone number"),
+ cv_file: UploadFile = File(..., description="CV file (PDF or DOCX)")
+) -> SubmitResponse:
+ """
+ Submit a job application with CV.
+
+ This endpoint:
+ 1. Saves the uploaded CV file
+ 2. Registers the candidate in the database
+ 3. Parses the CV to markdown for AI processing
+ 4. Updates the parsed CV path in the database
+
+ Returns success status and details about the submission.
+ """
+ # Validate file type
+ allowed_extensions = {".pdf", ".docx"}
+ file_ext = Path(cv_file.filename or "").suffix.lower()
+ if file_ext not in allowed_extensions:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Invalid file type. Allowed: {', '.join(allowed_extensions)}"
+ )
+
+ try:
+ # 1. Save CV locally
+ file_path = save_cv(cv_file.file, cv_file.filename or "cv.pdf", candidate_name=full_name)
+ file_path = Path(file_path)
+
+ # 2. Register candidate in DB
+ success = register_candidate(full_name, email, phone, str(file_path))
+
+ if not success:
+ return SubmitResponse(
+ success=False,
+ message=f"An application with email '{email}' already exists. You can only apply once.",
+ candidate_name=full_name,
+ email=email,
+ already_exists=True,
+ )
+
+ # 3. Parse CV to markdown
+ pdf_to_markdown(
+ input_path=file_path,
+ output_path=settings.parsed_path,
+ model="gpt-4.1-mini",
+ )
+
+ # 4. Update parsed CV path in DB
+ parsed_path = settings.parsed_path / (file_path.stem + ".txt")
+ update_parsed_cv_path(email, str(parsed_path))
+
+ return SubmitResponse(
+ success=True,
+ message=f"Application submitted successfully for {full_name}!",
+ candidate_name=full_name,
+ email=email,
+ cv_file_path=str(file_path),
+ )
+
+ except Exception as e:
+ raise HTTPException(
+ status_code=500,
+ detail=f"Failed to process application: {str(e)}"
+ )
+
+
+@router.get("/health")
+async def cv_upload_health():
+ """Health check for CV upload router."""
+ return {"status": "healthy", "service": "cv_upload"}
+
diff --git a/src/backend/api/routers/database.py b/src/backend/api/routers/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fff4e1082e3f10dbe16b96e7e8e00d9c866cc2c
--- /dev/null
+++ b/src/backend/api/routers/database.py
@@ -0,0 +1,505 @@
+"""
+Database Query Router.
+
+Flexible endpoints for querying any table in the recruitment database.
+"""
+
+from typing import Any, Optional
+from uuid import UUID
+
+from fastapi import APIRouter, HTTPException, Query
+from sqlalchemy import asc, desc
+from sqlalchemy.orm import joinedload
+
+from src.backend.api.schemas.database import (
+ TableName,
+ SortOrder,
+ QueryRequest,
+ QueryResponse,
+ SingleRecordResponse,
+)
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import (
+ Candidate,
+ CVScreeningResult,
+ VoiceScreeningResult,
+ InterviewScheduling,
+ FinalDecision,
+)
+
+
+router = APIRouter()
+
+
+# ==================================================================================
+# TABLE MAPPING
+# ==================================================================================
+
+TABLE_MAP = {
+ TableName.candidates: Candidate,
+ TableName.cv_screening_results: CVScreeningResult,
+ TableName.voice_screening_results: VoiceScreeningResult,
+ TableName.interview_scheduling: InterviewScheduling,
+ TableName.final_decision: FinalDecision,
+}
+
+
+# ==================================================================================
+# HELPER FUNCTIONS
+# ==================================================================================
+
+def model_to_dict(obj: Any, fields: Optional[list[str]] = None) -> dict[str, Any]:
+ """
+ Convert a SQLAlchemy model instance to a dictionary.
+
+ Args:
+ obj: SQLAlchemy model instance
+ fields: Optional list of fields to include. If None, includes all.
+
+ Returns:
+ Dictionary representation of the model
+ """
+ if obj is None:
+ return None
+
+ result = {}
+ for column in obj.__table__.columns:
+ key = column.name
+ if fields is None or key in fields:
+ value = getattr(obj, key)
+ # Convert UUID and Enum to string for JSON serialization
+ if hasattr(value, 'hex'): # UUID
+ value = str(value)
+ elif hasattr(value, 'value'): # Enum
+ value = value.value
+ result[key] = value
+ return result
+
+
+def serialize_relation(relation_data: Any, is_list: bool = True) -> Any:
+ """Serialize relationship data."""
+ if relation_data is None:
+ return None
+ if is_list:
+ return [model_to_dict(item) for item in relation_data]
+ return model_to_dict(relation_data)
+
+
+def apply_filters(query, model, filters: dict[str, Any]):
+ """
+ Apply filters to a SQLAlchemy query.
+
+ Supports:
+ - Simple equality: {"field": "value"}
+ - Comparison operators: {"field": {"$gt": 5, "$lte": 10}}
+ - List membership: {"field": {"$in": [1, 2, 3]}}
+ """
+ for field, value in filters.items():
+ if not hasattr(model, field):
+ continue
+
+ column = getattr(model, field)
+
+ if isinstance(value, dict):
+ # Handle comparison operators
+ for op, op_value in value.items():
+ if op == "$eq":
+ query = query.filter(column == op_value)
+ elif op == "$ne":
+ query = query.filter(column != op_value)
+ elif op == "$gt":
+ query = query.filter(column > op_value)
+ elif op == "$gte":
+ query = query.filter(column >= op_value)
+ elif op == "$lt":
+ query = query.filter(column < op_value)
+ elif op == "$lte":
+ query = query.filter(column <= op_value)
+ elif op == "$in":
+ query = query.filter(column.in_(op_value))
+ elif op == "$nin":
+ query = query.filter(~column.in_(op_value))
+ elif op == "$like":
+ query = query.filter(column.like(op_value))
+ elif op == "$ilike":
+ query = query.filter(column.ilike(op_value))
+ else:
+ # Simple equality
+ query = query.filter(column == value)
+
+ return query
+
+
+# ==================================================================================
+# ENDPOINTS
+# ==================================================================================
+
+@router.post("/query", response_model=QueryResponse)
+async def query_table(request: QueryRequest) -> QueryResponse:
+ """
+ Flexible query endpoint for any table.
+
+ Supports filtering, field selection, pagination, and sorting.
+
+ Example request body:
+ ```json
+ {
+ "table": "candidates",
+ "filters": {"status": "applied"},
+ "fields": ["id", "full_name", "email"],
+ "limit": 10,
+ "offset": 0,
+ "sort_by": "created_at",
+ "sort_order": "desc"
+ }
+ ```
+ """
+ model = TABLE_MAP.get(request.table)
+ if not model:
+ raise HTTPException(status_code=400, detail=f"Unknown table: {request.table}")
+
+ try:
+ with SessionLocal() as session:
+ # Base query
+ query = session.query(model)
+
+ # Apply eager loading for relations if requested (candidates only)
+ if request.include_relations and request.table == TableName.candidates:
+ query = query.options(
+ joinedload(Candidate.cv_screening_results),
+ joinedload(Candidate.voice_screening_results),
+ joinedload(Candidate.interview_scheduling),
+ joinedload(Candidate.final_decision),
+ )
+
+ # Apply filters
+ if request.filters:
+ query = apply_filters(query, model, request.filters)
+
+ # Get total count before pagination
+ total_count = query.count()
+
+ # Apply sorting
+ if request.sort_by and hasattr(model, request.sort_by):
+ sort_column = getattr(model, request.sort_by)
+ if request.sort_order == SortOrder.asc:
+ query = query.order_by(asc(sort_column))
+ else:
+ query = query.order_by(desc(sort_column))
+
+ # Apply pagination
+ query = query.offset(request.offset).limit(request.limit)
+
+ # Execute query
+ results = query.all()
+
+ # Serialize results
+ data = []
+ for row in results:
+ row_dict = model_to_dict(row, request.fields)
+
+ # Include relations for candidates if requested
+ if request.include_relations and request.table == TableName.candidates:
+ row_dict["cv_screening_results"] = serialize_relation(row.cv_screening_results)
+ row_dict["voice_screening_results"] = serialize_relation(row.voice_screening_results)
+ row_dict["interview_scheduling"] = serialize_relation(row.interview_scheduling)
+ row_dict["final_decision"] = serialize_relation(row.final_decision, is_list=False)
+
+ data.append(row_dict)
+
+ return QueryResponse(
+ success=True,
+ table=request.table.value,
+ total_count=total_count,
+ returned_count=len(data),
+ offset=request.offset,
+ data=data,
+ )
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Query failed: {str(e)}")
+
+
+@router.get("/candidates", response_model=QueryResponse)
+async def list_candidates(
+ status: Optional[str] = Query(default=None, description="Filter by status"),
+ limit: int = Query(default=100, ge=1, le=1000, description="Max records"),
+ offset: int = Query(default=0, ge=0, description="Offset for pagination"),
+ include_relations: bool = Query(default=False, description="Include related screening data"),
+) -> QueryResponse:
+ """
+ List all candidates with optional filtering.
+
+ Convenience endpoint for the most common query.
+ """
+ filters = {}
+ if status:
+ filters["status"] = status
+
+ request = QueryRequest(
+ table=TableName.candidates,
+ filters=filters if filters else None,
+ include_relations=include_relations,
+ limit=limit,
+ offset=offset,
+ sort_by="created_at",
+ sort_order=SortOrder.desc,
+ )
+ return await query_table(request)
+
+
+@router.get("/candidates/{candidate_id}", response_model=SingleRecordResponse)
+async def get_candidate(
+ candidate_id: UUID,
+ include_relations: bool = Query(default=True, description="Include related screening data"),
+) -> SingleRecordResponse:
+ """
+ Get a single candidate by ID with all related data.
+ """
+ try:
+ with SessionLocal() as session:
+ query = session.query(Candidate).filter(Candidate.id == candidate_id)
+
+ if include_relations:
+ query = query.options(
+ joinedload(Candidate.cv_screening_results),
+ joinedload(Candidate.voice_screening_results),
+ joinedload(Candidate.interview_scheduling),
+ joinedload(Candidate.final_decision),
+ )
+
+ candidate = query.first()
+
+ if not candidate:
+ return SingleRecordResponse(
+ success=False,
+ table="candidates",
+ data=None,
+ message=f"Candidate with ID {candidate_id} not found",
+ )
+
+ data = model_to_dict(candidate)
+
+ if include_relations:
+ data["cv_screening_results"] = serialize_relation(candidate.cv_screening_results)
+ data["voice_screening_results"] = serialize_relation(candidate.voice_screening_results)
+ data["interview_scheduling"] = serialize_relation(candidate.interview_scheduling)
+ data["final_decision"] = serialize_relation(candidate.final_decision, is_list=False)
+
+ return SingleRecordResponse(
+ success=True,
+ table="candidates",
+ data=data,
+ )
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Failed to fetch candidate: {str(e)}")
+
+
+@router.get("/candidates/email/{email}", response_model=SingleRecordResponse)
+async def get_candidate_by_email(
+ email: str,
+ include_relations: bool = Query(default=True, description="Include related screening data"),
+) -> SingleRecordResponse:
+ """
+ Get a candidate by email address.
+ """
+ try:
+ with SessionLocal() as session:
+ query = session.query(Candidate).filter(Candidate.email == email)
+
+ if include_relations:
+ query = query.options(
+ joinedload(Candidate.cv_screening_results),
+ joinedload(Candidate.voice_screening_results),
+ joinedload(Candidate.interview_scheduling),
+ joinedload(Candidate.final_decision),
+ )
+
+ candidate = query.first()
+
+ if not candidate:
+ return SingleRecordResponse(
+ success=False,
+ table="candidates",
+ data=None,
+ message=f"Candidate with email '{email}' not found",
+ )
+
+ data = model_to_dict(candidate)
+
+ if include_relations:
+ data["cv_screening_results"] = serialize_relation(candidate.cv_screening_results)
+ data["voice_screening_results"] = serialize_relation(candidate.voice_screening_results)
+ data["interview_scheduling"] = serialize_relation(candidate.interview_scheduling)
+ data["final_decision"] = serialize_relation(candidate.final_decision, is_list=False)
+
+ return SingleRecordResponse(
+ success=True,
+ table="candidates",
+ data=data,
+ )
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Failed to fetch candidate: {str(e)}")
+
+
+@router.get("/cv-screening", response_model=QueryResponse)
+async def list_cv_screenings(
+ candidate_id: Optional[UUID] = Query(default=None, description="Filter by candidate ID"),
+ min_score: Optional[float] = Query(default=None, ge=0, le=1, description="Minimum overall fit score"),
+ limit: int = Query(default=100, ge=1, le=1000),
+ offset: int = Query(default=0, ge=0),
+) -> QueryResponse:
+ """
+ List CV screening results with optional filtering.
+ """
+ filters = {}
+ if candidate_id:
+ filters["candidate_id"] = str(candidate_id)
+ if min_score is not None:
+ filters["overall_fit_score"] = {"$gte": min_score}
+
+ request = QueryRequest(
+ table=TableName.cv_screening_results,
+ filters=filters if filters else None,
+ limit=limit,
+ offset=offset,
+ sort_by="timestamp",
+ sort_order=SortOrder.desc,
+ )
+ return await query_table(request)
+
+
+@router.get("/voice-screening", response_model=QueryResponse)
+async def list_voice_screenings(
+ candidate_id: Optional[UUID] = Query(default=None, description="Filter by candidate ID"),
+ limit: int = Query(default=100, ge=1, le=1000),
+ offset: int = Query(default=0, ge=0),
+) -> QueryResponse:
+ """
+ List voice screening results with optional filtering.
+ """
+ filters = {}
+ if candidate_id:
+ filters["candidate_id"] = str(candidate_id)
+
+ request = QueryRequest(
+ table=TableName.voice_screening_results,
+ filters=filters if filters else None,
+ limit=limit,
+ offset=offset,
+ sort_by="timestamp",
+ sort_order=SortOrder.desc,
+ )
+ return await query_table(request)
+
+
+@router.get("/interviews", response_model=QueryResponse)
+async def list_interviews(
+ candidate_id: Optional[UUID] = Query(default=None, description="Filter by candidate ID"),
+ status: Optional[str] = Query(default=None, description="Filter by interview status"),
+ limit: int = Query(default=100, ge=1, le=1000),
+ offset: int = Query(default=0, ge=0),
+) -> QueryResponse:
+ """
+ List interview scheduling records with optional filtering.
+ """
+ filters = {}
+ if candidate_id:
+ filters["candidate_id"] = str(candidate_id)
+ if status:
+ filters["status"] = status
+
+ request = QueryRequest(
+ table=TableName.interview_scheduling,
+ filters=filters if filters else None,
+ limit=limit,
+ offset=offset,
+ sort_by="start_time",
+ sort_order=SortOrder.desc,
+ )
+ return await query_table(request)
+
+
+@router.get("/decisions", response_model=QueryResponse)
+async def list_decisions(
+ decision: Optional[str] = Query(default=None, description="Filter by decision (e.g., 'hired', 'rejected')"),
+ min_score: Optional[float] = Query(default=None, ge=0, le=1, description="Minimum overall score"),
+ limit: int = Query(default=100, ge=1, le=1000),
+ offset: int = Query(default=0, ge=0),
+) -> QueryResponse:
+ """
+ List final decisions with optional filtering.
+ """
+ filters = {}
+ if decision:
+ filters["decision"] = decision
+ if min_score is not None:
+ filters["overall_score"] = {"$gte": min_score}
+
+ request = QueryRequest(
+ table=TableName.final_decision,
+ filters=filters if filters else None,
+ limit=limit,
+ offset=offset,
+ sort_by="timestamp",
+ sort_order=SortOrder.desc,
+ )
+ return await query_table(request)
+
+
+@router.get("/stats")
+async def get_database_stats() -> dict:
+ """
+ Get summary statistics for all tables.
+ """
+ try:
+ with SessionLocal() as session:
+ stats = {
+ "candidates": {
+ "total": session.query(Candidate).count(),
+ },
+ "cv_screening_results": {
+ "total": session.query(CVScreeningResult).count(),
+ },
+ "voice_screening_results": {
+ "total": session.query(VoiceScreeningResult).count(),
+ },
+ "interview_scheduling": {
+ "total": session.query(InterviewScheduling).count(),
+ },
+ "final_decision": {
+ "total": session.query(FinalDecision).count(),
+ },
+ }
+
+ # Get candidate status breakdown
+ from sqlalchemy import func
+ status_counts = session.query(
+ Candidate.status, func.count(Candidate.id)
+ ).group_by(Candidate.status).all()
+
+ stats["candidates"]["by_status"] = {
+ str(status.value) if hasattr(status, 'value') else str(status): count
+ for status, count in status_counts
+ }
+
+ return {"success": True, "stats": stats}
+
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Failed to get stats: {str(e)}")
+
+
+@router.get("/health")
+async def database_health():
+ """Health check for database router."""
+ try:
+ with SessionLocal() as session:
+ # Simple connectivity check
+ from sqlalchemy import text
+ session.execute(text("SELECT 1"))
+ return {"status": "healthy", "service": "database", "connection": "ok"}
+ except Exception as e:
+ return {"status": "unhealthy", "service": "database", "error": str(e)}
+
diff --git a/src/backend/api/routers/supervisor.py b/src/backend/api/routers/supervisor.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab865673e9df6fd603f8093fff43162d0bdbdc7b
--- /dev/null
+++ b/src/backend/api/routers/supervisor.py
@@ -0,0 +1,264 @@
+"""
+Supervisor Agent Router.
+
+Handles chat interactions with the HR Supervisor Agent.
+Supports both regular and streaming responses.
+
+=============================================================================
+ENDPOINTS:
+=============================================================================
+
+WITH CONTEXT ENGINEERING (CompactingSupervisor wrapper):
+ - POST /chat : Batch response with automatic context compaction
+ - POST /chat/stream : Streaming with context compaction [HAS ERRORS - TODO FIX]
+
+RAW SUPERVISOR (Direct agent access, no wrapper):
+ - POST /raw/chat : Batch response, direct supervisor agent
+ - POST /raw/chat/stream : Streaming, direct supervisor agent [HAS ERRORS - TODO FIX]
+
+UTILITY:
+ - POST /new : Create new chat session
+ - GET /health : Health check
+
+=============================================================================
+NOTE: Both streaming endpoints (/chat/stream and /raw/chat/stream) have
+known issues that need to be fixed. Use batch endpoints (/chat or /raw/chat)
+for reliable operation.
+=============================================================================
+"""
+
+import json
+import uuid
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+
+from langchain_core.messages import HumanMessage
+from src.backend.api.schemas.supervisor_chat import ChatRequest, ChatResponse, NewChatResponse
+from src.backend.context_eng import compacting_supervisor, count_tokens_for_messages
+from src.backend.agents.supervisor.supervisor_v2 import supervisor_agent
+
+
+router = APIRouter()
+
+@router.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest) -> ChatResponse:
+ """
+ Send a message to the HR Supervisor Agent and receive a response.
+
+ Uses CompactingSupervisor wrapper for automatic context management.
+ When token limit is exceeded, old messages are compacted/summarized.
+
+ The agent can:
+ - Query the candidate database
+ - Screen CVs
+ - Schedule calendar events
+ - Send emails via Gmail
+
+ Use the returned `thread_id` in subsequent requests to maintain conversation context.
+ """
+ # Generate or use provided thread_id
+ thread_id = request.thread_id or str(uuid.uuid4())[:8]
+
+ try:
+ # Config for stateful conversation
+ config = {"configurable": {"thread_id": thread_id}}
+
+ # Invoke the compacting supervisor wrapper
+ response = compacting_supervisor.invoke(
+ {"messages": [HumanMessage(content=request.message)]},
+ config=config
+ )
+
+ # Extract response and calculate tokens
+ final_message = response["messages"][-1]
+ all_messages = response["messages"]
+ token_count = count_tokens_for_messages(all_messages)
+
+ return ChatResponse(
+ response=final_message.content,
+ thread_id=thread_id,
+ token_count=token_count,
+ )
+
+ except Exception as e:
+ raise HTTPException(
+ status_code=500,
+ detail=f"Agent execution failed: {str(e)}"
+ )
+
+
+@router.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+ """
+ Stream a response from the HR Supervisor Agent using Server-Sent Events (SSE).
+
+ โ ๏ธ WARNING: This endpoint has known issues and needs to be fixed.
+ Use /raw/chat/stream for reliable streaming, or /chat for batch requests.
+
+ Uses CompactingSupervisor wrapper for automatic context management.
+
+ Yields chunks as SSE events:
+ - event: token - A content token from the AI response
+ - event: done - Final message with metadata (token_count, thread_id)
+ - event: error - Error occurred
+
+ Use the returned `thread_id` in subsequent requests to maintain conversation context.
+ """
+ thread_id = request.thread_id or str(uuid.uuid4())[:8]
+
+ def generate():
+ try:
+ config = {"configurable": {"thread_id": thread_id}}
+
+ for chunk in compacting_supervisor.stream(
+ {"messages": [HumanMessage(content=request.message)]},
+ config=config
+ ):
+ if chunk["type"] == "token":
+ # SSE format: event type + data
+ yield f"event: token\ndata: {json.dumps({'content': chunk['content']})}\n\n"
+ elif chunk["type"] == "done":
+ yield f"event: done\ndata: {json.dumps({'thread_id': thread_id, 'token_count': chunk['token_count']})}\n\n"
+ elif chunk["type"] == "error":
+ yield f"event: error\ndata: {json.dumps({'error': chunk['content']})}\n\n"
+
+ except Exception as e:
+ yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n"
+
+ return StreamingResponse(
+ generate(),
+ media_type="text/event-stream",
+ headers={
+ "Cache-Control": "no-cache",
+ "Connection": "keep-alive",
+ "X-Accel-Buffering": "no", # Disable nginx buffering
+ }
+ )
+
+
+@router.post("/new", response_model=NewChatResponse)
+async def new_chat() -> NewChatResponse:
+ """
+ Create a new chat session with a fresh thread ID.
+
+ Returns a new thread_id to use for subsequent chat requests.
+ """
+ thread_id = str(uuid.uuid4())[:8]
+
+ return NewChatResponse(
+ thread_id=thread_id,
+ message="New chat session created. Use the thread_id for your conversations.",
+ )
+
+
+@router.get("/health")
+async def supervisor_health():
+ """Health check for supervisor router."""
+ return {"status": "healthy", "service": "supervisor"}
+
+
+# =============================================================================
+# RAW SUPERVISOR ENDPOINTS (No CompactingSupervisor wrapper)
+# =============================================================================
+
+@router.post("/raw/chat", response_model=ChatResponse)
+async def raw_chat(request: ChatRequest) -> ChatResponse:
+ """
+ Send a message to the raw HR Supervisor Agent (without context compaction).
+
+ This endpoint bypasses the CompactingSupervisor wrapper, giving direct access
+ to the underlying supervisor agent. Useful for debugging or when you want
+ full control over context management.
+
+ Use the returned `thread_id` in subsequent requests to maintain conversation context.
+ """
+ thread_id = request.thread_id or str(uuid.uuid4())[:8]
+
+ try:
+ config = {"configurable": {"thread_id": thread_id}}
+
+ # Invoke the raw supervisor agent directly
+ response = supervisor_agent.invoke(
+ {"messages": [HumanMessage(content=request.message)]},
+ config=config
+ )
+
+ # Extract response and calculate tokens
+ final_message = response["messages"][-1]
+ all_messages = response["messages"]
+ token_count = count_tokens_for_messages(all_messages)
+
+ return ChatResponse(
+ response=final_message.content,
+ thread_id=thread_id,
+ token_count=token_count,
+ )
+
+ except Exception as e:
+ raise HTTPException(
+ status_code=500,
+ detail=f"Raw agent execution failed: {str(e)}"
+ )
+
+
+@router.post("/raw/chat/stream")
+async def raw_chat_stream(request: ChatRequest):
+ """
+ Stream a response from the raw HR Supervisor Agent using Server-Sent Events (SSE).
+
+ โ ๏ธ WARNING: This endpoint has known issues and needs to be fixed.
+ Use /raw/chat for reliable batch requests.
+
+ This endpoint bypasses the CompactingSupervisor wrapper, giving direct access
+ to the underlying supervisor agent's streaming capabilities.
+
+ Yields chunks as SSE events:
+ - event: token - A content token from the AI response
+ - event: done - Final message with metadata (token_count, thread_id)
+ - event: error - Error occurred
+ """
+ thread_id = request.thread_id or str(uuid.uuid4())[:8]
+
+ def generate():
+ try:
+ config = {"configurable": {"thread_id": thread_id}}
+ full_response_content = ""
+
+ # Stream from the raw supervisor agent
+ for chunk in supervisor_agent.stream(
+ {"messages": [HumanMessage(content=request.message)]},
+ config=config,
+ stream_mode="messages"
+ ):
+ # chunk is a tuple: (message, metadata)
+ message, metadata = chunk
+
+ # Only yield content from AI messages that have content
+ if hasattr(message, 'content') and message.content:
+ msg_type = message.__class__.__name__
+ if 'AIMessage' in msg_type:
+ yield f"event: token\ndata: {json.dumps({'content': message.content})}\n\n"
+ full_response_content += message.content
+
+ # Get final state for token counting
+ final_state = supervisor_agent.get_state(config)
+ token_count = 0
+ if final_state and hasattr(final_state, 'values'):
+ final_messages = final_state.values.get("messages", [])
+ token_count = count_tokens_for_messages(final_messages)
+
+ yield f"event: done\ndata: {json.dumps({'thread_id': thread_id, 'token_count': token_count})}\n\n"
+
+ except Exception as e:
+ yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n"
+
+ return StreamingResponse(
+ generate(),
+ media_type="text/event-stream",
+ headers={
+ "Cache-Control": "no-cache",
+ "Connection": "keep-alive",
+ "X-Accel-Buffering": "no",
+ }
+ )
+
diff --git a/src/backend/api/routers/voice_screener.py b/src/backend/api/routers/voice_screener.py
new file mode 100644
index 0000000000000000000000000000000000000000..08f32c52c239c9ff0e5767e501aebcc38c269853
--- /dev/null
+++ b/src/backend/api/routers/voice_screener.py
@@ -0,0 +1,265 @@
+"""
+Voice Screener API Router.
+Handles voice screening sessions, configuration, and audio/transcript saving.
+"""
+import logging
+import os
+from typing import Optional
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+
+from src.backend.agents.voice_screening.session_service import (
+ get_session_config,
+ save_voice_screening_session
+)
+from src.backend.agents.voice_screening.audio_processor import combine_and_export_audio
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+# Request/Response Models
+class CreateSessionRequest(BaseModel):
+ candidate_id: str
+
+
+class CreateSessionResponse(BaseModel):
+ session_id: str
+ candidate_name: str
+ job_title: str
+ message: str
+
+
+class SessionConfigResponse(BaseModel):
+ candidate_name: str
+ job_title: str
+ instructions: str
+ questions: list[str]
+ config: dict
+
+
+class SaveSessionRequest(BaseModel):
+ session_id: str
+ candidate_id: str
+ transcript_text: str
+ proxy_token: str # Token to retrieve audio chunks from proxy
+
+
+class SaveSessionResponse(BaseModel):
+ audio_file_path: Optional[str]
+ message: str
+
+
+@router.post("/session/create", response_model=CreateSessionResponse)
+async def create_session(request: CreateSessionRequest):
+ """
+ Create a new voice screening session for a candidate.
+
+ Args:
+ request: Contains candidate_id
+
+ Returns:
+ Session information including session_id
+ """
+ try:
+ import uuid
+
+ # Generate session ID
+ session_id = str(uuid.uuid4())
+
+ # Get session config (validates candidate exists)
+ config = get_session_config(request.candidate_id)
+
+ logger.info(f"Created session {session_id} for candidate {request.candidate_id}")
+
+ return CreateSessionResponse(
+ session_id=session_id,
+ candidate_name=config["candidate_name"],
+ job_title=config["job_title"],
+ message="Session created successfully"
+ )
+ except ValueError as e:
+ raise HTTPException(status_code=404, detail=str(e))
+ except Exception as e:
+ logger.error(f"Error creating session: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=f"Failed to create session: {str(e)}")
+
+
+@router.get("/session/{session_id}/config", response_model=SessionConfigResponse)
+async def get_config(session_id: str, candidate_id: str = Query(...)):
+ """
+ Get session configuration for a candidate.
+
+ Args:
+ session_id: Session identifier (for logging)
+ candidate_id: Candidate UUID
+
+ Returns:
+ Session configuration including instructions and questions
+ """
+ try:
+ config = get_session_config(candidate_id)
+
+ logger.info(f"Retrieved config for session {session_id}, candidate {candidate_id}")
+
+ return SessionConfigResponse(
+ candidate_name=config["candidate_name"],
+ job_title=config["job_title"],
+ instructions=config["instructions"],
+ questions=config["questions"],
+ config=config["config"]
+ )
+ except ValueError as e:
+ raise HTTPException(status_code=404, detail=str(e))
+ except Exception as e:
+ logger.error(f"Error getting config: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=f"Failed to get config: {str(e)}")
+
+
+@router.post("/session/{session_id}/save", response_model=SaveSessionResponse)
+async def save_session(session_id: str, request: SaveSessionRequest):
+ """
+ Save audio recording and transcript for a session.
+
+ This endpoint:
+ 1. Retrieves audio chunks from the proxy using the token
+ 2. Combines and saves the audio file
+ 3. Saves transcript and audio path to database
+
+ Args:
+ session_id: Session identifier (must match request.session_id)
+ request: Contains candidate_id, transcript_text, and proxy_token
+
+ Returns:
+ Audio file path and success message
+ """
+ if session_id != request.session_id:
+ raise HTTPException(status_code=400, detail="Session ID mismatch")
+
+ try:
+ # Import here to avoid circular dependency
+ import requests
+
+ # Get proxy URL from environment
+ proxy_url = os.getenv("WEBSOCKET_PROXY_URL", "ws://localhost:8000/ws/realtime")
+ proxy_base = proxy_url.replace("ws://", "http://").replace("wss://", "https://").replace("/ws/realtime", "")
+
+ # Retrieve audio chunks from proxy
+ try:
+ response = requests.post(
+ f"{proxy_base}/audio/retrieve",
+ params={"token": request.proxy_token},
+ json={"session_id": session_id},
+ timeout=30
+ )
+ response.raise_for_status()
+ audio_data = response.json()
+
+ import base64
+
+ user_chunks = audio_data.get("user_chunks", [])
+ # Decode Base64 audio data
+ for chunk in user_chunks:
+ if isinstance(chunk.get("data"), str):
+ chunk["data"] = base64.b64decode(chunk["data"])
+
+ agent_chunks = audio_data.get("agent_chunks", [])
+ # Decode Base64 audio data
+ for chunk in agent_chunks:
+ if isinstance(chunk.get("data"), str):
+ chunk["data"] = base64.b64decode(chunk["data"])
+
+ session_start_time = audio_data.get("session_start_time")
+
+ # Get transcript from proxy if available
+ proxy_transcript = audio_data.get("transcript", [])
+ transcript_text = request.transcript_text
+
+ if proxy_transcript:
+ logger.info(f"Using transcript from proxy ({len(proxy_transcript)} entries)")
+ transcript_text = "\n".join([
+ f"{entry.get('speaker', 'unknown')}: {entry.get('text', '')}"
+ for entry in proxy_transcript
+ ])
+
+ if not session_start_time:
+ raise ValueError("Session start time not found in proxy response")
+
+ logger.info(f"Audio Debug: Retrieved {len(user_chunks)} user chunks and {len(agent_chunks)} agent chunks")
+ if user_chunks:
+ logger.info(f"Audio Debug: First user chunk size: {len(user_chunks[0].get('data', b''))} bytes")
+
+ except Exception as e:
+ logger.error(f"Error retrieving audio from proxy: {e}")
+ raise HTTPException(status_code=500, detail=f"Failed to retrieve audio from proxy: {str(e)}")
+
+ # Combine audio chunks
+ audio_file_path = None
+ if user_chunks or agent_chunks:
+ try:
+ logger.info("Audio Debug: Combining audio chunks...")
+ wav_data = combine_and_export_audio(
+ user_chunks=user_chunks,
+ agent_chunks=agent_chunks,
+ session_start_time=session_start_time,
+ session_id=session_id
+ )
+
+ logger.info(f"Audio Debug: Generated WAV data size: {len(wav_data)} bytes")
+
+ # Save WAV file
+ recordings_dir = Path("src/backend/database/voice_recordings")
+ recordings_dir.mkdir(parents=True, exist_ok=True)
+ audio_file_path = str(recordings_dir / f"{session_id}.wav")
+
+ with open(audio_file_path, "wb") as f:
+ f.write(wav_data)
+
+ logger.info(f"Saved audio file: {audio_file_path}")
+
+ # Verify file exists and size
+ if os.path.exists(audio_file_path):
+ size = os.path.getsize(audio_file_path)
+ logger.info(f"Audio Debug: File verified on disk. Size: {size} bytes")
+ else:
+ logger.error("Audio Debug: File NOT found on disk after writing!")
+
+ except Exception as e:
+ logger.error(f"Error processing audio: {e}", exc_info=True)
+ # Continue even if audio fails - we still want to save the transcript
+ else:
+ logger.warning("Audio Debug: No audio chunks found to process!")
+
+ # Save to database
+ try:
+ save_voice_screening_session(
+ candidate_id=request.candidate_id,
+ session_id=session_id,
+ transcript_text=transcript_text,
+ audio_url=audio_file_path
+ )
+ except ValueError as e:
+ # Candidate not found
+ logger.warning(f"Failed to save session: {e}")
+ raise HTTPException(status_code=404, detail=str(e))
+
+ logger.info(f"Saved session {session_id} for candidate {request.candidate_id}")
+
+ return SaveSessionResponse(
+ audio_file_path=audio_file_path,
+ message="Session saved successfully"
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error saving session: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=f"Failed to save session: {str(e)}")
+
+
+@router.get("/health")
+async def health_check():
+ """Health check endpoint."""
+ return {"status": "healthy", "service": "voice-screener"}
diff --git a/src/backend/api/schemas/__init__.py b/src/backend/api/schemas/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5398f4862c5c67e57fea1b4fd26b5f3c2bd3559a
--- /dev/null
+++ b/src/backend/api/schemas/__init__.py
@@ -0,0 +1,33 @@
+"""API schemas."""
+
+from .cv_upload import SubmitResponse
+from .supervisor_chat import ChatRequest, ChatResponse, NewChatResponse
+from .database import (
+ TableName,
+ QueryRequest,
+ QueryResponse,
+ SingleRecordResponse,
+ CandidateResponse,
+ CVScreeningResponse,
+ VoiceScreeningResponse,
+ InterviewSchedulingResponse,
+ FinalDecisionResponse,
+)
+
+__all__ = [
+ "SubmitResponse",
+ "ChatRequest",
+ "ChatResponse",
+ "NewChatResponse",
+ # Database schemas
+ "TableName",
+ "QueryRequest",
+ "QueryResponse",
+ "SingleRecordResponse",
+ "CandidateResponse",
+ "CVScreeningResponse",
+ "VoiceScreeningResponse",
+ "InterviewSchedulingResponse",
+ "FinalDecisionResponse",
+]
+
diff --git a/src/backend/api/schemas/cv_upload.py b/src/backend/api/schemas/cv_upload.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9e2c4a30f0f6c30c608a696d1f5dc8213853cd5
--- /dev/null
+++ b/src/backend/api/schemas/cv_upload.py
@@ -0,0 +1,14 @@
+"""CV Upload API schemas."""
+
+from pydantic import BaseModel, Field
+
+
+class SubmitResponse(BaseModel):
+ """Response model for CV submission."""
+ success: bool = Field(..., description="Whether the submission was successful")
+ message: str = Field(..., description="Status message")
+ candidate_name: str = Field(default="", description="Name of the candidate")
+ email: str = Field(default="", description="Email of the candidate")
+ cv_file_path: str = Field(default="", description="Path where CV was saved")
+ already_exists: bool = Field(default=False, description="True if candidate already applied")
+
diff --git a/src/backend/api/schemas/database.py b/src/backend/api/schemas/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7acbe2031e83c6b9de9625ec4069c970d5de9c3
--- /dev/null
+++ b/src/backend/api/schemas/database.py
@@ -0,0 +1,191 @@
+"""
+Database Query Schemas.
+
+Flexible schemas for querying any table in the recruitment database.
+"""
+
+from datetime import datetime
+from typing import Any, Optional
+from uuid import UUID
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+
+# ==================================================================================
+# ENUMS
+# ==================================================================================
+
+class TableName(str, Enum):
+ """Available tables for querying."""
+ candidates = "candidates"
+ cv_screening_results = "cv_screening_results"
+ voice_screening_results = "voice_screening_results"
+ interview_scheduling = "interview_scheduling"
+ final_decision = "final_decision"
+
+
+class SortOrder(str, Enum):
+ """Sort order options."""
+ asc = "asc"
+ desc = "desc"
+
+
+# ==================================================================================
+# REQUEST SCHEMAS
+# ==================================================================================
+
+class QueryRequest(BaseModel):
+ """Flexible query request for any table."""
+
+ table: TableName = Field(..., description="Table to query")
+
+ # Filtering
+ filters: Optional[dict[str, Any]] = Field(
+ default=None,
+ description="Key-value filters (e.g., {'email': 'john@example.com', 'status': 'applied'})"
+ )
+
+ # Field selection
+ fields: Optional[list[str]] = Field(
+ default=None,
+ description="Specific fields to return. If None, returns all fields."
+ )
+
+ # Include related data
+ include_relations: Optional[bool] = Field(
+ default=False,
+ description="Include related tables (only for candidates table)"
+ )
+
+ # Pagination
+ limit: Optional[int] = Field(default=100, ge=1, le=1000, description="Max records to return")
+ offset: Optional[int] = Field(default=0, ge=0, description="Number of records to skip")
+
+ # Sorting
+ sort_by: Optional[str] = Field(default=None, description="Field to sort by")
+ sort_order: SortOrder = Field(default=SortOrder.desc, description="Sort order")
+
+ model_config = {
+ "json_schema_extra": {
+ "examples": [
+ {
+ "table": "candidates",
+ "filters": {"status": "applied"},
+ "fields": ["id", "full_name", "email", "status"],
+ "limit": 10
+ },
+ {
+ "table": "cv_screening_results",
+ "filters": {"overall_fit_score": {"$gte": 0.8}},
+ "sort_by": "overall_fit_score",
+ "sort_order": "desc"
+ }
+ ]
+ }
+ }
+
+
+# ==================================================================================
+# RESPONSE SCHEMAS
+# ==================================================================================
+
+class CandidateResponse(BaseModel):
+ """Candidate data response."""
+ id: UUID
+ full_name: str
+ email: str
+ phone_number: Optional[str] = None
+ cv_file_path: Optional[str] = None
+ parsed_cv_file_path: Optional[str] = None
+ status: str
+ created_at: datetime
+ updated_at: datetime
+
+ # Related data (populated when include_relations=True)
+ cv_screening_results: Optional[list[dict[str, Any]]] = None
+ voice_screening_results: Optional[list[dict[str, Any]]] = None
+ interview_scheduling: Optional[list[dict[str, Any]]] = None
+ final_decision: Optional[dict[str, Any]] = None
+
+ model_config = {"from_attributes": True}
+
+
+class CVScreeningResponse(BaseModel):
+ """CV Screening result response."""
+ id: UUID
+ candidate_id: UUID
+ job_title: Optional[str] = None
+ skills_match_score: Optional[float] = None
+ experience_match_score: Optional[float] = None
+ education_match_score: Optional[float] = None
+ overall_fit_score: Optional[float] = None
+ llm_feedback: Optional[str] = None
+ reasoning_trace: Optional[dict[str, Any]] = None
+ timestamp: datetime
+
+ model_config = {"from_attributes": True}
+
+
+class VoiceScreeningResponse(BaseModel):
+ """Voice Screening result response."""
+ id: UUID
+ candidate_id: UUID
+ call_sid: Optional[str] = None
+ transcript_text: Optional[str] = None
+ sentiment_score: Optional[float] = None
+ confidence_score: Optional[float] = None
+ communication_score: Optional[float] = None
+ llm_summary: Optional[str] = None
+ llm_judgment_json: Optional[dict[str, Any]] = None
+ audio_url: Optional[str] = None
+ timestamp: datetime
+
+ model_config = {"from_attributes": True}
+
+
+class InterviewSchedulingResponse(BaseModel):
+ """Interview scheduling response."""
+ id: UUID
+ candidate_id: UUID
+ calendar_event_id: Optional[str] = None
+ event_summary: Optional[str] = None
+ start_time: Optional[datetime] = None
+ end_time: Optional[datetime] = None
+ status: Optional[str] = None
+ timestamp: datetime
+
+ model_config = {"from_attributes": True}
+
+
+class FinalDecisionResponse(BaseModel):
+ """Final decision response."""
+ id: UUID
+ candidate_id: UUID
+ overall_score: Optional[float] = None
+ decision: Optional[str] = None
+ llm_rationale: Optional[str] = None
+ human_notes: Optional[str] = None
+ timestamp: datetime
+
+ model_config = {"from_attributes": True}
+
+
+class QueryResponse(BaseModel):
+ """Generic query response wrapper."""
+ success: bool
+ table: str
+ total_count: int
+ returned_count: int
+ offset: int
+ data: list[dict[str, Any]]
+ message: Optional[str] = None
+
+
+class SingleRecordResponse(BaseModel):
+ """Single record response."""
+ success: bool
+ table: str
+ data: Optional[dict[str, Any]] = None
+ message: Optional[str] = None
+
diff --git a/src/backend/api/schemas/supervisor_chat.py b/src/backend/api/schemas/supervisor_chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..4dfc7b15925d61005c33101577fefa185de0bf4c
--- /dev/null
+++ b/src/backend/api/schemas/supervisor_chat.py
@@ -0,0 +1,23 @@
+from pydantic import BaseModel, Field
+from typing import Optional
+
+class ChatRequest(BaseModel):
+ """Request model for chat endpoint."""
+ message: str = Field(..., description="User message to send to the supervisor agent")
+ thread_id: Optional[str] = Field(
+ default=None,
+ description="Thread ID for conversation continuity. If not provided, a new thread is created."
+ )
+
+
+class ChatResponse(BaseModel):
+ """Response model for chat endpoint."""
+ response: str = Field(..., description="Agent's response message")
+ thread_id: str = Field(..., description="Thread ID for conversation continuity")
+ token_count: int = Field(..., description="Current token count in context window")
+
+
+class NewChatResponse(BaseModel):
+ """Response model for creating a new chat session."""
+ thread_id: str = Field(..., description="New thread ID for the conversation")
+ message: str = Field(..., description="Welcome message")
\ No newline at end of file
diff --git a/src/backend/configs/__init__.py b/src/backend/configs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..064d9378be296ceb7bdd38245bb453a76a615ad3
--- /dev/null
+++ b/src/backend/configs/__init__.py
@@ -0,0 +1,22 @@
+"""Application configuration."""
+
+from .cv import CVUploadSettings, get_cv_settings
+from .database import DatabaseSettings, get_database_settings
+from .openai import OpenAISettings, get_openai_settings, get_openai_api_key
+from .settings import Settings, get_settings
+
+__all__ = [
+ # CV Upload
+ "CVUploadSettings",
+ "get_cv_settings",
+ # Database
+ "DatabaseSettings",
+ "get_database_settings",
+ # OpenAI
+ "OpenAISettings",
+ "get_openai_settings",
+ "get_openai_api_key",
+ # Main settings
+ "Settings",
+ "get_settings",
+]
diff --git a/src/backend/configs/cv.py b/src/backend/configs/cv.py
new file mode 100644
index 0000000000000000000000000000000000000000..30a689e8bd634aa53e534fbc2244ebc465bd9de0
--- /dev/null
+++ b/src/backend/configs/cv.py
@@ -0,0 +1,36 @@
+"""CV Upload settings."""
+
+from functools import lru_cache
+from pathlib import Path
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class CVUploadSettings(BaseSettings):
+ """Settings for CV upload and parsing."""
+
+ model_config = SettingsConfigDict(
+ env_prefix="CV_", # CV_UPLOAD_PATH, CV_PARSED_PATH
+ extra="ignore",
+ )
+
+ upload_path: Path = Field(
+ default=Path("src/backend/database/cvs/uploads"),
+ description="Directory for uploaded CV files",
+ )
+ parsed_path: Path = Field(
+ default=Path("src/backend/database/cvs/parsed"),
+ description="Directory for parsed CV markdown files",
+ )
+
+ def ensure_dirs(self) -> None:
+ """Create upload and parsed directories if they don't exist."""
+ self.upload_path.mkdir(parents=True, exist_ok=True)
+ self.parsed_path.mkdir(parents=True, exist_ok=True)
+
+
+@lru_cache
+def get_cv_settings() -> CVUploadSettings:
+ """Get cached CV upload settings."""
+ return CVUploadSettings()
+
diff --git a/src/backend/configs/database.py b/src/backend/configs/database.py
new file mode 100644
index 0000000000000000000000000000000000000000..49cbdfb58b9dcfc0e83aa6a9a81f62d00c19de9b
--- /dev/null
+++ b/src/backend/configs/database.py
@@ -0,0 +1,42 @@
+"""Database connection settings."""
+
+from functools import lru_cache
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class DatabaseSettings(BaseSettings):
+ """PostgreSQL database connection settings."""
+
+ model_config = SettingsConfigDict(
+ env_prefix="POSTGRES_",
+ extra="ignore",
+ )
+
+ host: str = Field(default="localhost")
+ port: int = Field(default=5432)
+ user: str = Field(default="agentic_user")
+ password: str = Field(default="")
+ db: str = Field(default="agentic_hr")
+
+ @property
+ def url(self) -> str:
+ """Build database URL."""
+ return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
+
+ @property
+ def psycopg2_url(self) -> str:
+ """Build database URL with psycopg2 driver."""
+ return f"postgresql+psycopg2://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
+
+ @property
+ def async_url(self) -> str:
+ """Build async database URL for SQLAlchemy async."""
+ return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
+
+
+@lru_cache
+def get_database_settings() -> DatabaseSettings:
+ """Get cached database settings."""
+ return DatabaseSettings()
+
diff --git a/src/backend/configs/openai.py b/src/backend/configs/openai.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cae55e9ea50ccb1f809a1ddc7b9b8c61cf203b8
--- /dev/null
+++ b/src/backend/configs/openai.py
@@ -0,0 +1,89 @@
+"""OpenAI API settings."""
+
+import sys
+from functools import lru_cache
+from pydantic import Field, model_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class OpenAISettings(BaseSettings):
+ """
+ OpenAI API configuration.
+
+ Validates that OPENAI_API_KEY is set and provides a helpful error message
+ if missing, especially useful in Docker environments.
+ """
+
+ model_config = SettingsConfigDict(
+ extra="ignore",
+ )
+
+ api_key: str = Field(
+ default="",
+ alias="OPENAI_API_KEY",
+ description="OpenAI API key for model access",
+ )
+
+ @model_validator(mode="after")
+ def validate_api_key(self) -> "OpenAISettings":
+ """Validate that API key is set and provide helpful error message."""
+ if not self.api_key:
+ error_message = """
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ โ ๏ธ OPENAI_API_KEY NOT SET โ ๏ธ โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฃ
+โ โ
+โ The OPENAI_API_KEY environment variable is required but not set. โ
+โ โ
+โ To fix this: โ
+โ โ
+โ 1. Create a .env file in the project root: โ
+โ OPENAI_API_KEY=sk-your-api-key-here โ
+โ โ
+โ 2. Or set it directly in your shell: โ
+โ export OPENAI_API_KEY=sk-your-api-key-here โ
+โ โ
+โ 3. Or pass it to Docker: โ
+โ docker compose --env-file .env -f docker/docker-compose.yml up โ
+โ โ
+โ Get your API key at: https://platform.openai.com/api-keys โ
+โ โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+"""
+ print(error_message, file=sys.stderr)
+ raise ValueError("OPENAI_API_KEY environment variable is required")
+
+ # Basic validation that it looks like an OpenAI key
+ if not (self.api_key.startswith("sk-") or self.api_key.startswith("org-")):
+ print(
+ "โ ๏ธ Warning: OPENAI_API_KEY doesn't start with 'sk-' - "
+ "make sure it's a valid OpenAI API key.",
+ file=sys.stderr
+ )
+
+ return self
+
+ def __repr__(self) -> str:
+ """Safe representation without exposing the key."""
+ masked = f"{self.api_key[:7]}...{self.api_key[-4:]}" if len(self.api_key) > 11 else "***"
+ return f"OpenAISettings(api_key={masked})"
+
+
+@lru_cache
+def get_openai_settings() -> OpenAISettings:
+ """
+ Get cached OpenAI settings.
+
+ Raises ValueError with helpful message if OPENAI_API_KEY is not set.
+ """
+ return OpenAISettings()
+
+
+def get_openai_api_key() -> str:
+ """
+ Convenience function to get just the API key.
+
+ Raises ValueError with helpful message if OPENAI_API_KEY is not set.
+ """
+ return get_openai_settings().api_key
+
diff --git a/src/backend/configs/settings.py b/src/backend/configs/settings.py
new file mode 100644
index 0000000000000000000000000000000000000000..38a7d7bd5cd15b615d1f6f8ff3738546799b813d
--- /dev/null
+++ b/src/backend/configs/settings.py
@@ -0,0 +1,46 @@
+"""
+Main application settings.
+
+This module aggregates all settings into a single Settings class.
+For most use cases, import individual settings directly:
+
+ from src.backend.configs import get_cv_settings, get_openai_settings
+
+ cv = get_cv_settings()
+ openai = get_openai_settings()
+"""
+
+from functools import lru_cache
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+from .cv import CVUploadSettings
+from .database import DatabaseSettings
+from .openai import OpenAISettings
+
+
+class Settings(BaseSettings):
+ """
+ Aggregated application settings.
+
+ Combines all configuration in one place. Individual settings
+ can also be accessed via their dedicated getter functions.
+ """
+
+ model_config = SettingsConfigDict(
+ extra="ignore",
+ )
+
+ cv: CVUploadSettings = Field(default_factory=CVUploadSettings)
+ database: DatabaseSettings = Field(default_factory=DatabaseSettings)
+ openai: OpenAISettings = Field(default_factory=OpenAISettings)
+
+
+@lru_cache
+def get_settings() -> Settings:
+ """Get cached main settings instance."""
+ return Settings()
diff --git a/src/backend/context_eng/__init__.py b/src/backend/context_eng/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6c970611492b40d4eb3a0171fdcbab239dc1a6e
--- /dev/null
+++ b/src/backend/context_eng/__init__.py
@@ -0,0 +1,23 @@
+"""
+Context Engineering Module.
+
+Handles context window optimization and memory management for Long-Running Agents.
+Implements "Compactive Summarization" to prevent token overflow while preserving
+critical conversation history.
+"""
+
+from .token_counter import count_tokens_for_messages
+from .history_manager import HistoryManager
+from .compacting_supervisor import CompactingSupervisor, compacting_supervisor, history_manager
+
+__all__ = [
+ # Utilities
+ "count_tokens_for_messages",
+ # Classes
+ "HistoryManager",
+ "CompactingSupervisor",
+ # Singletons
+ "compacting_supervisor",
+ "history_manager",
+]
+
diff --git a/src/backend/context_eng/compacting_supervisor.py b/src/backend/context_eng/compacting_supervisor.py
new file mode 100644
index 0000000000000000000000000000000000000000..0be8cb5842f9c5b7e795839eb05dbe6e54702500
--- /dev/null
+++ b/src/backend/context_eng/compacting_supervisor.py
@@ -0,0 +1,141 @@
+"""
+Compacting Supervisor - Agent wrapper with automatic context management.
+
+Wraps an agent to enforce Context Window limits via 'Compaction'.
+Implements the Interceptor Pattern to transparently manage token usage.
+"""
+
+from typing import Dict, Any, Generator
+
+from src.backend.agents.supervisor.supervisor_v2 import supervisor_agent, memory
+
+from .token_counter import count_tokens_for_messages
+from .history_manager import HistoryManager
+
+
+class CompactingSupervisor:
+ """
+ Wraps an agent to enforce Context Window limits via 'Compaction'.
+
+ Technique (Interceptor Pattern):
+ 1. Intercepts the agent's execution flow.
+ 2. Runs the agent normally.
+ 3. Post-execution: Checks if the total context (tokens) exceeds the limit.
+ 4. If exceeded, triggers `HistoryManager` to compact old history and rewrite memory.
+
+ This ensures the agent remains "forever young" regarding token usage,
+ without losing long-term context.
+ """
+
+ def __init__(self, agent, history_manager: HistoryManager, token_limit: int = 3000, compaction_ratio: float = 0.5):
+ self.agent = agent
+ self.history_manager = history_manager
+ self.token_limit = token_limit
+ self.compaction_ratio = compaction_ratio
+
+ def invoke(self, input_data: Dict[str, Any], config: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Execute the agent and perform context maintenance if needed.
+ """
+ thread_id = config.get("configurable", {}).get("thread_id")
+
+ # 1. Invoke the agent
+ response = self.agent.invoke(input_data, config)
+
+ # 2. Check total tokens after response
+ if thread_id and "messages" in response:
+ all_messages = response["messages"]
+ total_tokens = count_tokens_for_messages(all_messages)
+
+ if total_tokens > self.token_limit:
+ print(f"Tokens ({total_tokens}) exceeded limit ({self.token_limit}). Compacting...", flush=True)
+ try:
+ # Delegate complex logic to HistoryManager
+ compacted_messages = self.history_manager.compact_messages(
+ all_messages,
+ compaction_ratio=self.compaction_ratio
+ )
+ self.history_manager.replace_thread_history(thread_id, compacted_messages)
+
+ # Update response to reflect compacted state so UI sees the change
+ response["messages"] = compacted_messages
+
+ # Verify reduction
+ new_tokens = count_tokens_for_messages(compacted_messages)
+ print(f"Compaction complete. {total_tokens} -> {new_tokens}", flush=True)
+ except Exception as e:
+ print(f"Compaction failed: {e}", flush=True)
+
+ return response
+
+ def stream(self, input_data: Dict[str, Any], config: Dict[str, Any]) -> Generator[Dict[str, Any], None, None]:
+ """
+ Stream the agent response token by token, then perform compaction if needed.
+
+ Yields:
+ dict: Streaming chunks with 'type' and 'content' keys.
+ - type='token': A content token from the AI response
+ - type='done': Final message with token count
+ - type='error': Error occurred
+ """
+ thread_id = config.get("configurable", {}).get("thread_id")
+ full_response_content = ""
+ final_messages = []
+
+ try:
+ # Stream from the agent
+ for chunk in self.agent.stream(input_data, config, stream_mode="messages"):
+ # chunk is a tuple: (message, metadata)
+ message, metadata = chunk
+
+ # Only yield content from AI messages that have content
+ if hasattr(message, 'content') and message.content:
+ # Check if this is an AIMessageChunk (streaming token)
+ msg_type = message.__class__.__name__
+ if 'AIMessage' in msg_type:
+ yield {"type": "token", "content": message.content}
+ full_response_content += message.content
+
+ # After streaming completes, get the final state for compaction check
+ # We need to get the current state from memory
+ final_state = self.agent.get_state(config)
+ if final_state and hasattr(final_state, 'values'):
+ final_messages = final_state.values.get("messages", [])
+
+ # Perform compaction if needed
+ token_count = 0
+ if thread_id and final_messages:
+ token_count = count_tokens_for_messages(final_messages)
+
+ if token_count > self.token_limit:
+ print(f"Tokens ({token_count}) exceeded limit ({self.token_limit}). Compacting...", flush=True)
+ try:
+ compacted_messages = self.history_manager.compact_messages(
+ final_messages,
+ compaction_ratio=self.compaction_ratio
+ )
+ self.history_manager.replace_thread_history(thread_id, compacted_messages)
+ token_count = count_tokens_for_messages(compacted_messages)
+ print(f"Compaction complete. New token count: {token_count}", flush=True)
+ except Exception as e:
+ print(f"Compaction failed: {e}", flush=True)
+
+ yield {"type": "done", "token_count": token_count}
+
+ except Exception as e:
+ yield {"type": "error", "content": str(e)}
+
+
+# =============================================================================
+# SINGLETON INSTANCES
+# =============================================================================
+
+history_manager = HistoryManager(memory_saver=memory)
+
+compacting_supervisor = CompactingSupervisor(
+ agent=supervisor_agent,
+ history_manager=history_manager,
+ token_limit=500,
+ compaction_ratio=0.5
+)
+
diff --git a/src/backend/context_eng/history_manager.py b/src/backend/context_eng/history_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..64b3d8d4ad6916ed6b4474083c5a10c1005e9a93
--- /dev/null
+++ b/src/backend/context_eng/history_manager.py
@@ -0,0 +1,175 @@
+"""
+History Manager for conversation memory and compaction.
+
+Handles persistent conversation state and implements "Compactive Summarization"
+to prevent token overflow while preserving critical conversation history.
+"""
+
+import time
+import random
+import uuid
+from datetime import datetime
+from typing import List
+
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage, AIMessage
+
+from src.backend.prompts import get_prompt
+
+
+class HistoryManager:
+ """
+ Manages persistent conversation state and implements compaction logic.
+
+ Responsibilities:
+ 1. Compaction: Summarizing old messages to save tokens.
+ 2. Persistence: Safely updating the low-level checkpoint storage.
+ """
+
+ def __init__(self, memory_saver):
+ self.memory = memory_saver
+
+ def _messages_to_text(self, messages: List[BaseMessage]) -> str:
+ """Convert messages to a plain text transcript."""
+ text_parts = []
+ for msg in messages:
+ role = msg.__class__.__name__
+ content = msg.content
+ if isinstance(content, str):
+ text_parts.append(f"{role}: {content}")
+ else:
+ text_parts.append(f"{role}: {str(content)}")
+ return "\n\n".join(text_parts)
+
+ def _is_tool_message(self, msg: BaseMessage) -> bool:
+ """Check if a message is a ToolMessage or Tool output."""
+ msg_type = getattr(msg, "type", None)
+ role = getattr(msg, "role", None)
+ return msg_type == "tool" or role == "tool" or msg.__class__.__name__ == "ToolMessage"
+
+ def compact_messages(self, messages: List[BaseMessage], compaction_ratio: float = 0.5) -> List[BaseMessage]:
+ """
+ Apply "Compactive Summarization" to the conversation history.
+
+ Technique:
+ - Splits history into Old and Recent based on compaction_ratio.
+ - Summarizes Old messages into a single narrative block using an LLM.
+ - Preserves the System Prompt and Recent messages verbatim.
+
+ Args:
+ messages: Full list of conversation messages.
+ compaction_ratio: Fraction of messages to compact (0.0 to 1.0).
+ - 0.5 (Default): Summarizes 50% (Oldest half).
+ - 0.8: Aggressive. Summarizes 80% (Keeps only very recent messages).
+ - 0.2: Gentle. Summarizes only the oldest 20%.
+
+ Returns:
+ List[BaseMessage]: optimized list with summary replacing old history.
+ """
+ if len(messages) < 2:
+ return messages
+
+ system_msg = None
+ conversation_msgs = messages
+
+ # Preserve system prompt
+ if isinstance(messages[0], SystemMessage):
+ system_msg = messages[0]
+ conversation_msgs = messages[1:]
+
+ if len(conversation_msgs) < 2:
+ return messages
+
+ # Calculate split point based on ratio
+ split_idx = int(len(conversation_msgs) * compaction_ratio)
+
+ # Ensure we compact at least something if ratio > 0, but keep at least one recent message
+ split_idx = max(1, min(split_idx, len(conversation_msgs) - 1))
+
+ first_half = conversation_msgs[:split_idx]
+ second_half = conversation_msgs[split_idx:]
+
+ # Ensure second_half does not start with orphaned tool message
+ while second_half and self._is_tool_message(second_half[0]):
+ if first_half:
+ second_half.insert(0, first_half.pop())
+ else:
+ second_half.pop(0)
+
+ # Generate summary
+ compactor_prompt = get_prompt(template_name="Compactor", latest_version=True)
+ conversation_text = self._messages_to_text(first_half)
+
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=1000)
+ messages_for_llm = [
+ SystemMessage(content=compactor_prompt),
+ HumanMessage(content=f"Conversation history to summarize:\n\n{conversation_text}")
+ ]
+
+ response = llm.invoke(messages_for_llm)
+ summary_text = response.content
+
+ print(f"\n{'='*80}\n๐ COMPACTION MESSAGE:\n{summary_text}\n{'='*80}\n", flush=True)
+
+ summary_message = AIMessage(content=f"[COMPACTED SUMMARY OF EARLIER CONVERSATION]\n\n{summary_text}")
+
+ result = []
+ if system_msg:
+ result.append(system_msg)
+ result.append(summary_message)
+ result.extend(second_half)
+
+ return result
+
+ def replace_thread_history(self, thread_id: str, new_messages: List[BaseMessage]) -> bool:
+ """
+ Atomically overwrite the message history in the checkpoint storage.
+
+ This bypasses the standard append-only reducer to force a history rewrite.
+ Crucial for finalizing the compaction process.
+ """
+ config = {"configurable": {"thread_id": thread_id}}
+ current_checkpoint = self.memory.get_tuple(config)
+
+ if not current_checkpoint or not current_checkpoint.checkpoint:
+ return False
+
+ checkpoint_config = {
+ "configurable": {**current_checkpoint.config.get("configurable", {})}
+ }
+ checkpoint_config["configurable"].setdefault("thread_id", thread_id)
+ checkpoint_config["configurable"].setdefault("checkpoint_ns", "")
+
+ current_versions = current_checkpoint.checkpoint.get('channel_versions', {})
+ new_msg_version = f"{str(int(time.time())).zfill(32)}.0.{random.random()}"
+
+ new_versions = current_versions.copy()
+ new_versions['messages'] = new_msg_version
+
+ new_checkpoint = {
+ 'v': current_checkpoint.checkpoint.get('v', 1) + 1,
+ 'ts': datetime.utcnow().isoformat(),
+ 'id': str(uuid.uuid4()),
+ 'channel_versions': new_versions,
+ 'versions_seen': current_checkpoint.checkpoint.get('versions_seen', {}),
+ 'updated_channels': ['messages'],
+ 'channel_values': {'messages': new_messages}
+ }
+
+ existing_metadata = current_checkpoint.metadata or {}
+ new_metadata = {
+ **existing_metadata,
+ "source": "compaction",
+ "compacted_at": datetime.utcnow().isoformat(),
+ }
+ if "step" not in new_metadata:
+ new_metadata["step"] = existing_metadata.get("step", 0)
+
+ self.memory.put(
+ config=checkpoint_config,
+ checkpoint=new_checkpoint,
+ metadata=new_metadata,
+ new_versions={'messages': new_msg_version}
+ )
+ return True
+
diff --git a/src/backend/context_eng/info.md b/src/backend/context_eng/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f5bb13df61aa3206050c0e65bb794b3ab4d7ac9
--- /dev/null
+++ b/src/backend/context_eng/info.md
@@ -0,0 +1,186 @@
+# Context Engineering ๐ง
+
+> Keeping long-running agents "forever young" by managing their memory.
+
+## The Problem
+
+LLMs have finite context windows. As conversations grow, you eventually hit the token limit and the agent breaks. Simply truncating old messages loses valuable context.
+
+## The Solution: Compactive Summarization
+
+Instead of truncating, we **summarize** old conversation history into a compact narrative, preserving the essential context while freeing up tokens.
+
+```
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ Before Compaction (500+ tokens) โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
+โ [System] You are an HR assistant... โ
+โ [Human] Show me all candidates โ
+โ [AI] Here are 5 candidates: Alice, Bob... โ
+โ [Human] Tell me about Alice โ
+โ [AI] Alice is a senior engineer with 5 years... โ
+โ [Human] Schedule an interview with her โ
+โ [Tool] Calendar event created... โ
+โ [AI] Done! Interview scheduled for Monday. โ
+โ [Human] Now check Bob's CV โ new โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ โ COMPACTION โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ After Compaction (~200 tokens) โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
+โ [System] You are an HR assistant... โ
+โ [AI Summary] User reviewed candidates, focused on โ
+โ Alice (senior engineer), scheduled interview โ
+โ for Monday. โ
+โ [Human] Now check Bob's CV โ kept โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+```
+
+## Architecture
+
+```
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ CompactingSupervisor โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โ โ 1. Intercept agent execution โ โ
+โ โ 2. Run agent normally โ โ
+โ โ 3. Count tokens after response โ โ
+โ โ 4. If over limit โ trigger compaction โ โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โ โ โ
+โ โผ โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โ โ HistoryManager โ โ
+โ โ โข compact_messages() โ LLM summarization โ โ
+โ โ โข replace_thread_history() โ checkpoint update โ โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+```
+
+## ๐ Subagents and Memory Safety
+
+Compaction affects **only the supervisorโs `messages` channel** inside LangGraphโs checkpoint.
+
+This includes:
+
+- User messages
+- Supervisor AI messages
+- **Tool call and Tool result messages** (because these are part of the supervisorโs visible conversation history)
+
+This does **not** include:
+
+- Sub-agent internal reasoning
+- Sub-agent private memory
+- Hidden chain-of-thought
+- Any messages stored in sub-agentโspecific channels
+
+Only the messages that the supervisor itself receives are ever compacted.
+No internal sub-agent state leaks into the compacted summary.
+
+
+## Key Parameters
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `token_limit` | 500 | Trigger compaction when exceeded |
+| `compaction_ratio` | 0.5 | Fraction of messages to summarize |
+
+### Compaction Ratio Explained
+
+The `compaction_ratio` controls how aggressively we summarize:
+
+```
+compaction_ratio = 0.5 (Default)
+โโโ Summarizes: oldest 50% of messages
+โโโ Keeps verbatim: newest 50% of messages
+
+compaction_ratio = 0.8 (Aggressive)
+โโโ Summarizes: oldest 80% of messages
+โโโ Keeps verbatim: only newest 20%
+ โ Use when context is very tight
+
+compaction_ratio = 0.2 (Gentle)
+โโโ Summarizes: only oldest 20%
+โโโ Keeps verbatim: newest 80%
+ โ Use when you want more history preserved
+```
+
+**Example with 10 messages:**
+- `ratio=0.5` โ Summarize messages 1-5, keep 6-10 verbatim
+- `ratio=0.8` โ Summarize messages 1-8, keep 9-10 verbatim
+- `ratio=0.2` โ Summarize messages 1-2, keep 3-10 verbatim
+
+## Usage
+
+```python
+from src.backend.context_eng import compacting_supervisor
+
+# Just use it like a normal agent - compaction is automatic!
+response = compacting_supervisor.invoke(
+ {"messages": [HumanMessage(content="Hello")]},
+ config={"configurable": {"thread_id": "my-thread"}}
+)
+
+# Streaming works too
+for chunk in compacting_supervisor.stream(...):
+ if chunk["type"] == "token":
+ print(chunk["content"], end="")
+```
+
+## LangGraph Integration
+
+### How It Wraps the Agent
+
+The `CompactingSupervisor` uses the **Interceptor Pattern** - it wraps the existing LangGraph agent without modifying it:
+
+```python
+# In compacting_supervisor.py
+from src.backend.agents.supervisor.supervisor_v2 import supervisor_agent, memory
+
+compacting_supervisor = CompactingSupervisor(
+ agent=supervisor_agent, # โ Original LangGraph agent
+ history_manager=HistoryManager(memory_saver=memory), # โ LangGraph's MemorySaver
+ ...
+)
+```
+
+The agent itself is **unchanged**. We just intercept `invoke()` and `stream()` calls.
+
+### How It Manipulates LangGraph Memory
+
+LangGraph uses **checkpoints** to persist conversation state. Normally, messages are append-only. Our `HistoryManager.replace_thread_history()` bypasses this to force a rewrite:
+
+```
+Normal LangGraph flow:
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ Checkpoint Storage (MemorySaver) โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โ โ messages: [m1, m2, m3, m4...] โ โ โ Append-only
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+
+After compaction (we override):
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+โ Checkpoint Storage (MemorySaver) โ
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โ โ messages: [sys, summary, m4] โ โ โ Force-replaced!
+โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+```
+
+**Key mechanism in `replace_thread_history()`:**
+1. Get current checkpoint via `memory.get_tuple(config)`
+2. Build new checkpoint with compacted messages
+3. Increment version + update timestamps
+4. Write directly via `memory.put(...)` - bypassing normal reducers
+
+This is a **low-level override** of LangGraph's internal checkpoint format. It works because we maintain the expected checkpoint structure (`channel_versions`, `channel_values`, etc.).
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `token_counter.py` | Count tokens in message lists |
+| `history_manager.py` | Summarization + checkpoint manipulation |
+| `compacting_supervisor.py` | Agent wrapper (Interceptor Pattern) |
+
diff --git a/src/backend/context_eng/token_counter.py b/src/backend/context_eng/token_counter.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b144d29f7db573113a37482100164944ccbc184
--- /dev/null
+++ b/src/backend/context_eng/token_counter.py
@@ -0,0 +1,47 @@
+"""Token counting utilities for context window management."""
+
+from typing import List, Any
+
+import tiktoken
+
+
+def count_tokens_for_messages(messages: List[Any], model: str = "gpt-4o") -> int:
+ """
+ Calculate token usage for a list of messages using tiktoken.
+
+ Handles text content, function calls, and tool outputs with approximate
+ overhead calculations for the ChatML format.
+
+ Args:
+ messages: List of LangChain message objects.
+ model: Target model encoding to use.
+
+ Returns:
+ int: Total estimated token count.
+ """
+ try:
+ encoding = tiktoken.encoding_for_model(model)
+ except KeyError:
+ encoding = tiktoken.get_encoding("cl100k_base")
+
+ num_tokens = 0
+ for message in messages:
+ # Every message follows {role/name}\n{content}\n
+ num_tokens += 4
+
+ # Handle content which might be a string or list of content blocks
+ content = getattr(message, "content", "")
+ if isinstance(content, str):
+ num_tokens += len(encoding.encode(content))
+
+ # If there are additional keys (like name, function_call, etc.) we should add them
+ if hasattr(message, "name") and message.name:
+ num_tokens += len(encoding.encode(message.name))
+
+ if hasattr(message, "tool_calls") and message.tool_calls:
+ for tool_call in message.tool_calls:
+ num_tokens += len(encoding.encode(str(tool_call)))
+
+ num_tokens += 2 # every reply is primed with assistant
+ return num_tokens
+
diff --git a/src/backend/core/__init__.py b/src/backend/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/core/base_agent.py b/src/backend/core/base_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..fec76eb0d38e750be0412c62a69487a3e636e05a
--- /dev/null
+++ b/src/backend/core/base_agent.py
@@ -0,0 +1,186 @@
+"""
+Base class for LangGraph-based agents that serves an interface for building,
+compiling, and executing custom agent graphs.
+
+Alternatively you can also use `create_agent`, which implements a ReAct agent by default.
+It may be of particular interest, since it enables MiddleWare like `context summarization`
+and `human in the loop`, `dynamic model selection` out of the box.
+links:
+ - create_agent: https://docs.langchain.com/oss/python/langchain/agents
+ - middleware: https://docs.langchain.com/oss/python/langchain/middleware
+"""
+
+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional
+
+from langgraph.graph import StateGraph
+from langchain_core.tools import BaseTool
+from langchain_openai import ChatOpenAI
+from langchain_openrouter import ChatOpenRouter
+
+from src.backend.core.configs.agent import AgentConfig
+
+
+class BaseAgent(ABC):
+ """Abstract base class for all LangGraph-based agents.
+ """
+
+ def __init__(self, config: AgentConfig) -> None:
+ """Initialize the agent with configuration.
+ """
+ self.config = config
+ self.name = config.name
+ self.description = config.description
+ self._graph: Optional[StateGraph] = None
+ self._compiled_graph = None
+
+ # Initialize model (tools are bound optionally via bind_tools)
+ self.llm = self._init_model()
+
+ # ~~~ ABSTRACT METHODS ~~~
+ @abstractmethod
+ def build_graph(self) -> StateGraph:
+ """Build the agent's LangGraph structure.
+ """
+ pass
+
+ # ~~~ MODEL INITIALIZATION ~~~
+ def _init_model(self) -> ChatOpenAI:
+ """Initialize LLM engine based on model provider.
+ """
+ model_cfg = self.config.model_config
+ provider = model_cfg.provider.lower()
+
+ if provider == "openai":
+ return ChatOpenAI(
+ model=model_cfg.model_name,
+ api_key=model_cfg.get_api_key(),
+ temperature=model_cfg.temperature,
+ max_tokens=model_cfg.max_tokens,
+ base_url=model_cfg.api_base,
+ )
+ elif provider == "openrouter":
+ return ChatOpenRouter(
+ model=model_cfg.model_name,
+ api_key=model_cfg.get_api_key(),
+ temperature=model_cfg.temperature,
+ max_tokens=model_cfg.max_tokens,
+ base_url=model_cfg.api_base,
+ )
+ else:
+ raise NotImplementedError(
+ f"Provider '{provider}' not supported yet."
+ )
+
+
+ def bind_tools(
+ self,
+ tools: Optional[List[BaseTool]] = None,
+ strict: bool = True
+ ) -> ChatOpenAI:
+ """
+ Optionally bind tools to the initialized model.
+
+ Args:
+ tools: List of tools to bind. Defaults to `self.config.tools` if not provided.
+ strict: Enforce schema validation for tools.
+ """
+ if not hasattr(self, "llm"):
+ raise RuntimeError("Model must be initialized before binding tools.")
+
+ tools_to_bind = tools or self.config.tools
+ if not tools_to_bind:
+ return self.llm # no-op
+
+ self.llm = self.llm.bind_tools(tools_to_bind, strict=strict)
+ return self.llm
+
+
+ # ~~~ GRAPH MANAGEMENT~~~
+ def compile(self, checkpointer=None, store=None) -> StateGraph:
+ """Compile the agent graph for execution.
+ """
+ if self._graph is None:
+ self._graph = self.build_graph()
+
+ self._compiled_graph = self._graph.compile(checkpointer=checkpointer, store=store)
+ return self._compiled_graph
+
+
+ def get_graph(self) -> StateGraph:
+ """Return compiled graph (compile if needed).
+ """
+ if self._compiled_graph is None:
+ self.compile()
+ return self._compiled_graph
+
+
+ def visualize(self, output_path: Optional[str] = None):
+ """Render the graph as a Mermaid diagram.
+ """
+ if self._compiled_graph is None:
+ self.compile()
+ return self._compiled_graph.get_graph().draw_mermaid_png(output_file_path=output_path)
+
+ # ~~~ EXECUTION ~~~
+ def invoke(
+ self,
+ input_data: Dict[str, object],
+ config: Optional[Dict[str, object]] = None
+ ) -> Dict[str, object]:
+ """Execute the compiled agent.
+ """
+ if self._compiled_graph is None:
+ self.compile()
+ return self._compiled_graph.invoke(input_data, config)
+
+
+ async def ainvoke(
+ self,
+ input_data: Dict[str, object],
+ config: Optional[Dict[str, object]] = None
+ ) -> Dict[str, object]:
+ """Execute the agent asynchronously.
+ """
+ if self._compiled_graph is None:
+ self.compile()
+ return await self._compiled_graph.ainvoke(input_data, config)
+
+
+ def stream(
+ self,
+ input_data: Dict[str, object],
+ config: Optional[Dict[str, object]] = None
+ ) -> Dict[str, object]:
+ """Stream agent execution results.
+ """
+ if self._compiled_graph is None:
+ self.compile()
+ return self._compiled_graph.stream(input_data, config)
+
+ # ~~~ UTILITIES ~~~
+ def get_tools(self) -> List[BaseTool]:
+ """Return the tools this agent can use.
+ """
+ return list(self.config.tools or [])
+
+
+ def get_capabilities(self) -> List[str]:
+ """List of agent capabilities (override in subclasses).
+ """
+ return []
+
+
+ @property
+ def metadata(self) -> Dict[str, object]:
+ """Return agent metadata for discovery and routing.
+ """
+ return {
+ "name": self.name,
+ "description": self.description,
+ "tools": [tool.name for tool in self.get_tools()],
+ "capabilities": self.get_capabilities(),
+ }
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}(name='{self.name}')"
diff --git a/src/backend/core/configs/agent.py b/src/backend/core/configs/agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..c10c6bd77e1e76d58757bd214bbff98ccb6ccd9e
--- /dev/null
+++ b/src/backend/core/configs/agent.py
@@ -0,0 +1,46 @@
+# src/core/configs/agent.py
+from typing import List, Optional
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field, ConfigDict
+from src.backend.core.configs.model import ModelConfig
+
+
+class AgentConfig(BaseModel):
+ """
+ Configuration schema for initializing LangGraph agents.
+
+ Notes:
+ Pydantic setting ``model_config = ConfigDict(arbitrary_types_allowed=True)``
+ allows this model to include arbitrary Python objects such as LangChain
+ tools or runtime components that are not JSON-serializable or Pydantic
+ models. These objects (e.g., `BaseTool` instances) are accepted as-is
+ without validation or serialization, while all standard fields
+ (strings, numbers, nested Pydantic models) remain fully validated.
+ """
+ model_config = ConfigDict(
+ arbitrary_types_allowed=True
+ )
+ name: str = Field(
+ ...,
+ description="Unique name of the agent."
+ )
+ description: str = Field(
+ ...,
+ description="Short description of what the agent does."
+ )
+ model_config: ModelConfig = Field(
+ ...,
+ description="Configuration of the underlying LLM model."
+ )
+ tools: Optional[List[BaseTool]] = Field(
+ default_factory=list,
+ description="List of tools available to the agent."
+ )
+ system_prompt: str = Field(
+ default="",
+ description="System prompt to condition the agent's behavior."
+ )
+ max_iterations: Optional[int] = Field(
+ default=None,
+ description="Optional limit on reasoning iterations."
+ )
diff --git a/src/backend/core/configs/model.py b/src/backend/core/configs/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..79906d3e2de1f7cff0c53e50f13b2935da602572
--- /dev/null
+++ b/src/backend/core/configs/model.py
@@ -0,0 +1,89 @@
+# src/core/configs/model.py
+from typing import Optional, Literal
+from pydantic import BaseModel, Field, SecretStr, field_validator, ConfigDict
+import os
+
+
+class ModelConfig(BaseModel):
+ """
+ Configuration object for connecting to and parameterizing an LLM provider.
+
+ Notes:
+ The ``model_config = ConfigDict(arbitrary_types_allowed=True)`` setting
+ is included for consistency with other configs. It has no effect here
+ since all fields are natively supported types (e.g., str, float, int).
+ Standard Pydantic validation applies to all fields in this model.
+ """
+ model_config = ConfigDict(
+ arbitrary_types_allowed=True
+ )
+
+ provider: Literal["openai", "anthropic", "bedrock", "azure"] = Field(
+ ...,
+ description="LLM provider identifier (e.g., openai, anthropic, bedrock, azure)."
+ )
+ model_name: str = Field(
+ ...,
+ description="Model identifier (e.g., gpt-4o, claude-3, etc.)."
+ )
+ api_key: Optional[SecretStr] = Field(
+ default=None,
+ description="API key for the model provider. Fallbacks to env var if omitted."
+ )
+ temperature: float = Field(
+ default=0.0,
+ ge=0.0,
+ le=2.0,
+ description="Sampling temperature for model randomness."
+ )
+ max_tokens: Optional[int] = Field(
+ default=None,
+ gt=0,
+ description="Optional token limit for completions."
+ )
+ api_base: Optional[str] = Field(
+ default=None,
+ description="Optional override for the model's base API URL."
+ )
+
+ # ~~~ VALIDATION ~~~
+ @field_validator("api_key", mode="before")
+ @classmethod
+ def resolve_api_key(cls, v, info):
+ """Resolve the API key from the provided value or environment.
+ """
+ if v is not None:
+ return v
+
+ provider = info.data.get("provider")
+ env_vars = {
+ "openai": "OPENAI_API_KEY",
+ "anthropic": "ANTHROPIC_API_KEY",
+ "bedrock": "AWS_ACCESS_KEY_ID",
+ "azure": "AZURE_OPENAI_API_KEY",
+ }
+
+ env_var = env_vars.get(provider)
+ if env_var:
+ api_key = os.getenv(env_var)
+ if api_key:
+ return SecretStr(api_key)
+
+ raise ValueError(
+ f"Missing API key: provide explicitly or set {env_var} in environment."
+ )
+
+ # ~~~ UTILITIES ~~~
+ def get_api_key(self) -> str:
+ """Safely return the underlying API key string.
+ """
+ return self.api_key.get_secret_value() if self.api_key else ""
+
+ def __repr__(self) -> str:
+ """Safe string representation (without exposing secret)."""
+ return (
+ f"ModelConfig(provider='{self.provider}', "
+ f"model_name='{self.model_name}', "
+ f"temperature={self.temperature}, "
+ f"max_tokens={self.max_tokens})"
+ )
diff --git a/src/backend/database/__init__.py b/src/backend/database/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/database/candidates/__init__.py b/src/backend/database/candidates/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e9ad2299cbe6e3a76b56a9dcbc01c5601628008
--- /dev/null
+++ b/src/backend/database/candidates/__init__.py
@@ -0,0 +1,26 @@
+"""
+Candidates database module.
+
+All database operations are organized in the ops/ folder,
+with each operation in its own file for modularity.
+"""
+
+from .ops import (
+ register_candidate,
+ update_parsed_cv_path,
+ get_candidate_by_name,
+ update_application_status,
+ write_cv_results_to_db,
+ write_voice_results_to_db,
+ evaluate_cv_screening_decision,
+)
+
+__all__ = [
+ "register_candidate",
+ "update_parsed_cv_path",
+ "get_candidate_by_name",
+ "update_application_status",
+ "write_cv_results_to_db",
+ "write_voice_results_to_db",
+ "evaluate_cv_screening_decision",
+]
diff --git a/src/backend/database/candidates/client.py b/src/backend/database/candidates/client.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9ff6e665fb6d9fda8bec2e061916be72cdb01fe
--- /dev/null
+++ b/src/backend/database/candidates/client.py
@@ -0,0 +1,44 @@
+import os
+import socket
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from src.backend.database.candidates.models import Base
+from src.backend.configs import get_database_settings
+
+
+def get_engine():
+ """
+ Builds a SQLAlchemy engine using validated environment variables.
+ Works seamlessly in both local and Docker environments.
+
+ Priority:
+ 1. Environment variables (e.g., POSTGRES_HOST from Docker)
+ 2. .env file defaults via Pydantic config
+ """
+ settings = get_database_settings()
+
+ # Allow POSTGRES_HOST override; strip whitespace/quotes to avoid DNS issues
+ raw_host = os.getenv("POSTGRES_HOST", settings.host)
+ postgres_host = raw_host.strip().strip("\"'")
+
+ # If 'db' (compose) is not resolvable (single-container run), fall back to host.docker.internal
+ try:
+ socket.gethostbyname(postgres_host)
+ except Exception:
+ fallback = os.getenv("POSTGRES_HOST_FALLBACK", "host.docker.internal").strip().strip("\"'")
+ print(f"[db-client] Host '{postgres_host}' not resolvable; falling back to '{fallback}'")
+ postgres_host = fallback
+
+ database_url = (
+ f"postgresql+psycopg2://{settings.user}:{settings.password}"
+ f"@{postgres_host}:{settings.port}/{settings.db}"
+ )
+
+ print(f"[db-client] Connecting to database at host={postgres_host} port={settings.port} db={settings.db} user={settings.user}", flush=True)
+
+ return create_engine(database_url, echo=False, future=True)
+
+
+# --- SQLAlchemy session setup ---
+engine = get_engine()
+SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False)
diff --git a/src/backend/database/candidates/info.md b/src/backend/database/candidates/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..ea5c11096320cc4bd6005754333ce4b0590dfca4
--- /dev/null
+++ b/src/backend/database/candidates/info.md
@@ -0,0 +1,234 @@
+# ๐งฉ Candidate Database Schema
+
+---
+
+## ๐ Overview
+
+The **Candidate Database** manages all structured data related to applicants throughout the HR screening pipeline.
+It is designed to support both **deterministic queries** (e.g., โTop 10 CV scoresโ) and **LLM-based reasoning** (e.g., context summaries, evaluation traces).
+
+The schema uses a **hybrid design**:
+- **Relational** structure for core entities and relationships.
+- **JSON fields** for flexible, semi-structured LLM outputs.
+
+---
+
+## ๐งฑ Entity Relationship Diagram
+
+```mermaid
+erDiagram
+
+ CANDIDATES ||--o{ CV_SCREENING_RESULTS : "has many"
+ CANDIDATES ||--o{ VOICE_SCREENING_RESULTS : "has many"
+ CANDIDATES ||--o{ INTERVIEW_SCHEDULING : "has many"
+ CANDIDATES ||--|| FINAL_DECISION : "has one"
+
+ CANDIDATES {
+ UUID id PK
+ string full_name
+ string email
+ string phone_number
+ string cv_file_path
+ string parsed_cv_file_path
+ string auth_code
+ enum status
+ datetime created_at
+ datetime updated_at
+ }
+
+ CV_SCREENING_RESULTS {
+ UUID id PK
+ UUID candidate_id FK
+ string job_title
+ float skills_match_score
+ float experience_match_score
+ float education_match_score
+ float overall_fit_score
+ text llm_feedback
+ json reasoning_trace
+ datetime timestamp
+ }
+
+ VOICE_SCREENING_RESULTS {
+ UUID id PK
+ UUID candidate_id FK
+ string call_sid
+ text transcript_text
+ float sentiment_score
+ float confidence_score
+ float communication_score
+ text llm_summary
+ json llm_judgment_json
+ string audio_url
+ datetime timestamp
+ }
+
+ INTERVIEW_SCHEDULING {
+ UUID id PK
+ UUID candidate_id FK
+ string calendar_event_id
+ string event_summary
+ datetime start_time
+ datetime end_time
+ enum status
+ datetime timestamp
+ }
+
+ FINAL_DECISION {
+ UUID id PK
+ UUID candidate_id FK
+ float overall_score
+ enum decision
+ text llm_rationale
+ text human_notes
+ datetime timestamp
+ }
+```
+
+---
+
+## ๐ Tables
+
+### 1๏ธโฃ `candidates`
+
+Stores base applicant information and application metadata.
+
+| Column | Type | Description |
+|--------|------|--------------|
+| `id` | UUID | Primary key |
+| `full_name` | string | Candidateโs full name |
+| `email` | string | Unique email address |
+| `phone_number` | string | Contact number |
+| `cv_file_path` | string | Path or cloud URL to the uploaded CV |
+| `parsed_cv_file_path` | string | Path to parsed pdf file (stored as md) |
+| `auth_code` | string | 6-digit authentication code |
+| `status` | enum | Candidate stage (`applied`, `cv_screened`, `voice_invitation_sent`, `voice_done`, `cv_passed`, `cv_rejected`, `voice_passed`, `voice_rejected`, `interview_scheduled`, `decision_made`) |
+| `created_at` | datetime | Application timestamp |
+| `updated_at` | datetime | Last update timestamp |
+
+---
+
+### 2๏ธโฃ `cv_screening_results`
+
+Captures **CV screening metrics** and qualitative model feedback.
+
+| Column | Type | Description |
+|--------|------|--------------|
+| `id` | UUID | Primary key |
+| `candidate_id` | FK | Reference to `candidates.id` |
+| `job_title` | string | Target role being screened for |
+| `skills_match_score` | float | Match ratio for required skills |
+| `experience_match_score` | float | Experience alignment score |
+| `education_match_score` | float | Degree/education compatibility |
+| `overall_fit_score` | float | Weighted total score |
+| `llm_feedback` | text | Qualitative reasoning summary |
+| `reasoning_trace` | JSON | Optional full LLM reasoning trace |
+| `timestamp` | datetime | When evaluation was generated |
+
+---
+
+### 3๏ธโฃ `voice_screening_results`
+
+Contains results from automated voice interviews.
+
+| Column | Type | Description |
+|--------|------|--------------|
+| `id` | UUID | Primary key |
+| `candidate_id` | FK | Reference to `candidates.id` |
+| `call_sid` | string | Twilio call identifier |
+| `transcript_text` | text | Full transcript from Whisper/STT |
+| `sentiment_score` | float | Tone or positivity measure |
+| `confidence_score` | float | LLM-assessed confidence or clarity |
+| `communication_score` | float | Fluency or articulation score |
+| `llm_summary` | text | High-level summary of the voice screen |
+| `llm_judgment_json` | JSON | Structured LLM evaluation (per-dimension) |
+| `audio_url` | string | URL to stored audio recording |
+| `timestamp` | datetime | Time of interview completion |
+
+---
+
+### 4๏ธโฃ `interview_scheduling`
+
+Tracks HR interview scheduling and status.
+
+| Column | Type | Description |
+|--------|------|--------------|
+| `id` | UUID | Primary key |
+| `candidate_id` | FK | Reference to `candidates.id` |
+| `calendar_event_id` | string | Google Calendar event ID |
+| `event_summary` | string | Event title |
+| `start_time` | datetime | Interview start time |
+| `end_time` | datetime | Interview end time |
+| `status` | enum | Scheduling status (`scheduled`, `completed`, `cancelled`, `passed`, `rejected`) |
+| `timestamp` | datetime | Last updated timestamp |
+
+---
+
+### 5๏ธโฃ `final_decision`
+
+Stores the overall hiring outcome after all screening stages.
+
+| Column | Type | Description |
+|--------|------|--------------|
+| `id` | UUID | Primary key |
+| `candidate_id` | FK | Reference to `candidates.id` |
+| `overall_score` | float | Aggregated weighted score |
+| `decision` | enum | `hired`, `rejected`, or `pending` |
+| `llm_rationale` | text | Model reasoning for decision |
+| `human_notes` | text | HR reviewer comments |
+| `timestamp` | datetime | Decision timestamp |
+
+---
+
+## ๐ง Design Principles
+
+- **Hybrid Schema:** Structured relational tables for clean querying; JSON for flexible LLM outputs.
+- **Traceability:** All records timestamped and linked to a single candidate.
+- **Extensibility:** New screening stages (e.g., technical test results) can be added as new tables with `candidate_id` foreign key.
+- **Cascade Relationships:** Deleting a candidate removes all dependent results automatically.
+- **Explainability-Ready:** LLM reasoning traces preserved for audit and context replay.
+
+
+---
+# ๐ณ Docker-Based Local Development Setup
+
+---
+
+ 1๏ธโฃ Clone and configure environment
+```bash
+cp .env.example .env
+```
+Your .env file should contain:
+```bash
+# Shared dev DB credentials
+POSTGRES_USER=agentic_user
+POSTGRES_PASSWORD=password123
+POSTGRES_DB=agentic_hr
+POSTGRES_HOST=db
+POSTGRES_PORT=5432
+```
+2๏ธโฃ Start the stack
+```bash
+docker compose up --build
+```
+This will:
+- Spin up PostgreSQL in a container (agentic_hr_db)
+- Build and run your app container
+- Auto-initialize all database tables via SQLAlchemy
+
+Expected logs:
+```bash
+๐ Connecting to database at db:5432 ...
+โ
Database initialized successfully.
+```
+
+3๏ธโฃ Verify the setup
+Connect to the running DB container:
+```bash
+docker exec -it agentic_hr_db psql -U agentic_user -d agentic_hr
+```
+Then check tables:
+
+```sql
+\dt
+```
diff --git a/src/backend/database/candidates/init_db.py b/src/backend/database/candidates/init_db.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7fa30aeeaea67466904a80099fee7a51495978e
--- /dev/null
+++ b/src/backend/database/candidates/init_db.py
@@ -0,0 +1,43 @@
+"""
+Database initialization script.
+
+This is a standalone script to initialize the database.
+Kept separate from client.py to avoid circular import issues
+when running with `python -m`.
+
+Usage:
+ python -m src.database.candidates.init_db
+"""
+
+from src.backend.database.candidates.client import engine
+from src.backend.database.candidates.models import Base
+from sqlalchemy import inspect
+
+def init_db():
+ """
+ Creates all database tables if they don't exist.
+ Intended for dev setup / Docker initialization.
+ """
+ try:
+ print("๐ Starting database initialization...")
+ Base.metadata.create_all(bind=engine)
+
+ # Verify tables
+ inspector = inspect(engine)
+ tables = inspector.get_table_names()
+ print(f"๐ Found tables: {tables}")
+
+ if "candidates" in tables:
+ print("โ
Database initialized successfully.")
+ return True
+ else:
+ print("โ Error: 'candidates' table was not created!")
+
+ except Exception as e:
+ print(f"โ Failed to initialize database: {e}")
+ raise
+
+
+if __name__ == "__main__":
+ init_db()
+
diff --git a/src/backend/database/candidates/models.py b/src/backend/database/candidates/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bfd01abceabcc6f98c404e96a368be130271f9b
--- /dev/null
+++ b/src/backend/database/candidates/models.py
@@ -0,0 +1,132 @@
+from sqlalchemy import (
+ Column,
+ String,
+ Float,
+ Text,
+ DateTime,
+ Enum,
+ ForeignKey,
+ JSON,
+)
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import declarative_base, relationship
+from datetime import datetime
+import uuid
+import secrets
+import string
+
+from src.backend.state.candidate import CandidateStatus, InterviewStatus, DecisionStatus
+
+
+Base = declarative_base()
+
+
+def generate_auth_code() -> str:
+ """Generate a 6-digit random authentication code.
+ """
+ return "".join(
+ secrets.choice(string.digits) for _ in range(6)
+ )
+
+# --- TABLES ---
+
+class Candidate(Base):
+ __tablename__ = "candidates"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ full_name = Column(String, nullable=False)
+ email = Column(String, unique=True, nullable=False)
+ phone_number = Column(String)
+ cv_file_path = Column(String)
+ parsed_cv_file_path = Column(String)
+ status = Column(Enum(CandidateStatus), default=CandidateStatus.applied, nullable=False)
+ created_at = Column(DateTime, default=datetime.utcnow)
+ auth_code = Column(String, default=generate_auth_code, nullable=True)
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+ # Relationships
+ cv_screening_results = relationship(
+ "CVScreeningResult",
+ back_populates="candidate",
+ cascade="all, delete-orphan",
+ )
+ voice_screening_results = relationship(
+ "VoiceScreeningResult",
+ back_populates="candidate",
+ cascade="all, delete-orphan",
+ )
+ interview_scheduling = relationship(
+ "InterviewScheduling",
+ back_populates="candidate",
+ cascade="all, delete-orphan",
+ )
+ final_decision = relationship(
+ "FinalDecision",
+ back_populates="candidate",
+ uselist=False,
+ cascade="all, delete-orphan",
+ )
+
+
+class CVScreeningResult(Base):
+ __tablename__ = "cv_screening_results"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id", ondelete="CASCADE"), nullable=False)
+ job_title = Column(String)
+ skills_match_score = Column(Float)
+ experience_match_score = Column(Float)
+ education_match_score = Column(Float)
+ overall_fit_score = Column(Float)
+ llm_feedback = Column(Text)
+ reasoning_trace = Column(JSON)
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="cv_screening_results")
+
+
+class VoiceScreeningResult(Base):
+ __tablename__ = "voice_screening_results"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id", ondelete="CASCADE"), nullable=False)
+ call_sid = Column(String)
+ transcript_text = Column(Text)
+ sentiment_score = Column(Float)
+ confidence_score = Column(Float)
+ communication_score = Column(Float)
+ llm_summary = Column(Text)
+ llm_judgment_json = Column(JSON)
+ audio_url = Column(String)
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="voice_screening_results")
+
+
+class InterviewScheduling(Base):
+ __tablename__ = "interview_scheduling"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id", ondelete="CASCADE"), nullable=False)
+ calendar_event_id = Column(String)
+ event_summary = Column(String)
+ start_time = Column(DateTime)
+ end_time = Column(DateTime)
+ status = Column(Enum(InterviewStatus))
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="interview_scheduling")
+
+
+class FinalDecision(Base):
+ __tablename__ = "final_decision"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id", ondelete="CASCADE"), nullable=False)
+ overall_score = Column(Float)
+ decision = Column(Enum(DecisionStatus))
+ llm_rationale = Column(Text)
+ human_notes = Column(Text)
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="final_decision")
diff --git a/src/backend/database/candidates/ops/__init__.py b/src/backend/database/candidates/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..213d5e37aee0d4fa848c82bf54a2cdc93f188646
--- /dev/null
+++ b/src/backend/database/candidates/ops/__init__.py
@@ -0,0 +1,25 @@
+"""
+Candidate database operations module.
+
+This module exports all candidate-related database operations.
+Each operation is in its own file for modularity.
+"""
+
+from .register_candidate import register_candidate
+from .update_parsed_cv_path import update_parsed_cv_path
+from .get_by_name import get_candidate_by_name
+from .update_status import update_application_status
+from .write_cv_results import write_cv_results_to_db
+from .write_voice_results import write_voice_results_to_db
+from .evaluate_cv_screening import evaluate_cv_screening_decision
+
+__all__ = [
+ "register_candidate",
+ "update_parsed_cv_path",
+ "get_candidate_by_name",
+ "update_application_status",
+ "write_cv_results_to_db",
+ "write_voice_results_to_db",
+ "evaluate_cv_screening_decision",
+]
+
diff --git a/src/backend/database/candidates/ops/evaluate_cv_screening.py b/src/backend/database/candidates/ops/evaluate_cv_screening.py
new file mode 100644
index 0000000000000000000000000000000000000000..975f584009dd485b0e8bc4ece31321567a5d173e
--- /dev/null
+++ b/src/backend/database/candidates/ops/evaluate_cv_screening.py
@@ -0,0 +1,58 @@
+"""Evaluate CV screening decision based on score threshold."""
+
+from datetime import datetime
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate, CVScreeningResult
+from src.backend.state.candidate import CandidateStatus
+
+
+def evaluate_cv_screening_decision(
+ candidate_full_name: str,
+ min_overall_score: float = 7.0
+) -> str:
+ """
+ Decides if a candidate passes CV screening based on a score threshold.
+ Updates the candidate status to 'cv_passed' or 'cv_rejected'.
+
+ Args:
+ candidate_full_name: The candidate's full name.
+ min_overall_score: Minimum score required to pass (default 7.0).
+
+ Returns:
+ Outcome message.
+ """
+ with SessionLocal() as session:
+ candidate = session.query(Candidate).filter(
+ Candidate.full_name == candidate_full_name
+ ).first()
+
+ if not candidate:
+ return f"โ Candidate '{candidate_full_name}' not found."
+
+ # Get latest screening result
+ latest_result = (
+ session.query(CVScreeningResult)
+ .filter(CVScreeningResult.candidate_id == candidate.id)
+ .order_by(CVScreeningResult.timestamp.desc())
+ .first()
+ )
+
+ if not latest_result:
+ return f"โ No screening results found for '{candidate_full_name}'. Run screening workflow first."
+
+ score = latest_result.overall_fit_score
+
+ if score >= min_overall_score:
+ new_status = CandidateStatus.cv_passed
+ decision = "PASSED"
+ else:
+ new_status = CandidateStatus.cv_rejected
+ decision = "REJECTED"
+
+ candidate.status = new_status
+ candidate.updated_at = datetime.utcnow()
+ session.commit()
+
+ return f"โ
Decision: {decision} (Score: {score} vs Threshold: {min_overall_score}). Status updated to '{new_status.value}'."
+
diff --git a/src/backend/database/candidates/ops/get_by_name.py b/src/backend/database/candidates/ops/get_by_name.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d3f7ba54b9f801b4ece5d877da0152fea5c5a66
--- /dev/null
+++ b/src/backend/database/candidates/ops/get_by_name.py
@@ -0,0 +1,34 @@
+"""Get a candidate by their full name."""
+
+from typing import Optional, Dict, Any
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate
+
+
+def get_candidate_by_name(full_name: str) -> Optional[Dict[str, Any]]:
+ """
+ Retrieve a candidate by their full name.
+
+ Args:
+ full_name: The full name of the candidate.
+
+ Returns:
+ A dictionary with candidate data, or None if not found.
+ Contains: id, full_name, email, parsed_cv_file_path, status
+ """
+ with SessionLocal() as session:
+ candidate = session.query(Candidate).filter(
+ Candidate.full_name == full_name
+ ).first()
+
+ if candidate:
+ return {
+ "id": candidate.id,
+ "full_name": candidate.full_name,
+ "email": candidate.email,
+ "parsed_cv_file_path": candidate.parsed_cv_file_path,
+ "status": candidate.status
+ }
+ return None
+
diff --git a/src/backend/database/candidates/ops/register_candidate.py b/src/backend/database/candidates/ops/register_candidate.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa27de85495e72738ab512c12285bc487f3f0014
--- /dev/null
+++ b/src/backend/database/candidates/ops/register_candidate.py
@@ -0,0 +1,45 @@
+"""Register a new candidate in the database."""
+
+from sqlalchemy.exc import IntegrityError
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate
+from src.backend.state.candidate import CandidateStatus
+
+
+def register_candidate(
+ full_name: str,
+ email: str,
+ phone: str,
+ cv_path: str
+) -> bool:
+ """
+ Register a new candidate in the database.
+
+ Args:
+ full_name: Candidate's full name.
+ email: Candidate's email address (unique).
+ phone: Candidate's phone number.
+ cv_path: Path to the uploaded CV file.
+
+ Returns:
+ True if successful, False if candidate already exists.
+ """
+ with SessionLocal() as session:
+ candidate = Candidate(
+ full_name=full_name,
+ email=email,
+ phone_number=phone,
+ cv_file_path=cv_path,
+ status=CandidateStatus.applied,
+ )
+ session.add(candidate)
+ try:
+ session.commit()
+ print(f"โ
Candidate '{full_name}' registered successfully.")
+ return True
+ except IntegrityError:
+ session.rollback()
+ print(f"โ ๏ธ Candidate with email '{email}' already exists.")
+ return False
+
diff --git a/src/backend/database/candidates/ops/update_parsed_cv_path.py b/src/backend/database/candidates/ops/update_parsed_cv_path.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c4f13af082fa1d1ba05897e106255b6d8a74fc1
--- /dev/null
+++ b/src/backend/database/candidates/ops/update_parsed_cv_path.py
@@ -0,0 +1,24 @@
+"""Update the parsed CV file path for a candidate."""
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate
+
+
+def update_parsed_cv_path(email: str, parsed_path: str) -> None:
+ """
+ Update the parsed CV file path for a candidate identified by email.
+
+ Args:
+ email: Candidate's email (unique identifier).
+ parsed_path: Path to the parsed markdown file.
+ """
+ with SessionLocal() as session:
+ candidate = session.query(Candidate).filter_by(email=email).first()
+ if not candidate:
+ print(f"โ ๏ธ No candidate found with email: {email}")
+ return
+
+ candidate.parsed_cv_file_path = parsed_path
+ session.commit()
+ print(f"โ
Updated parsed CV path for {email}: {parsed_path}")
+
diff --git a/src/backend/database/candidates/ops/update_status.py b/src/backend/database/candidates/ops/update_status.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bb4074bdb9fa0dbf71ecbd3c579ce802fe59401
--- /dev/null
+++ b/src/backend/database/candidates/ops/update_status.py
@@ -0,0 +1,27 @@
+"""Update the status of a candidate application."""
+
+from datetime import datetime
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate
+from src.backend.state.candidate import CandidateStatus
+
+
+def update_application_status(candidate_email: str, status: CandidateStatus) -> None:
+ """
+ Update the status of a candidate application.
+
+ Args:
+ candidate_email: The email of the candidate.
+ status: The new status to set.
+ """
+ with SessionLocal() as session:
+ candidate = session.query(Candidate).filter_by(email=candidate_email).first()
+ if candidate:
+ candidate.status = status
+ candidate.updated_at = datetime.utcnow()
+ session.commit()
+ print(f"โ
Updated status for {candidate_email} to {status.value}")
+ else:
+ print(f"โ ๏ธ No candidate found with email: {candidate_email}")
+
diff --git a/src/backend/database/candidates/ops/write_cv_results.py b/src/backend/database/candidates/ops/write_cv_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bb39ff534c7606d02fe2d0cf2edcbb43f596eaf
--- /dev/null
+++ b/src/backend/database/candidates/ops/write_cv_results.py
@@ -0,0 +1,57 @@
+"""Write CV screening results to the database."""
+
+from datetime import datetime
+from typing import TYPE_CHECKING
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate, CVScreeningResult
+from src.backend.state.candidate import CandidateStatus
+
+if TYPE_CHECKING:
+ from src.backend.agents.cv_screening.schemas.output_schema import CVScreeningOutput
+
+
+def write_cv_results_to_db(
+ candidate_email: str,
+ result: "CVScreeningOutput",
+ job_title: str = "AI Engineer"
+) -> None:
+ """
+ Store the CV screening results in the database and update candidate status.
+
+ Args:
+ candidate_email: Email of the candidate.
+ result: The screening results from the LLM (CVScreeningOutput).
+ job_title: The job title the candidate applied for.
+
+ Returns:
+ None
+ """
+ with SessionLocal() as session:
+ candidate = session.query(Candidate).filter_by(email=candidate_email).first()
+
+ if not candidate:
+ print(f"โ ๏ธ No candidate found with email: {candidate_email}")
+ return
+
+ # Create new CV screening result entry
+ screening_entry = CVScreeningResult(
+ candidate_id=candidate.id,
+ job_title=job_title,
+ skills_match_score=result.skills_match_score,
+ experience_match_score=result.experience_match_score,
+ education_match_score=result.education_match_score,
+ overall_fit_score=result.overall_fit_score,
+ llm_feedback=result.llm_feedback,
+ reasoning_trace=None,
+ timestamp=datetime.utcnow(),
+ )
+
+ # Add and commit
+ session.add(screening_entry)
+ candidate.status = CandidateStatus.cv_screened
+ candidate.updated_at = datetime.utcnow()
+ session.commit()
+
+ print(f"โ
Screening results saved and status updated for {candidate_email} -> {candidate.status}")
+
diff --git a/src/backend/database/candidates/ops/write_voice_results.py b/src/backend/database/candidates/ops/write_voice_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..056ee8ebb8c5030560015bb262300cbd6e0bfc42
--- /dev/null
+++ b/src/backend/database/candidates/ops/write_voice_results.py
@@ -0,0 +1,64 @@
+"""Write voice screening results to the database."""
+
+import uuid
+from datetime import datetime
+from typing import Optional, TYPE_CHECKING
+
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate, VoiceScreeningResult
+from src.backend.state.candidate import CandidateStatus
+
+if TYPE_CHECKING:
+ from src.backend.agents.voice_screening.schemas.output_schema import VoiceScreeningOutput
+
+
+def write_voice_results_to_db(
+ candidate_id: str,
+ session_id: str,
+ transcript_text: str,
+ result: "VoiceScreeningOutput",
+ audio_url: Optional[str] = None
+) -> None:
+ """
+ Store the voice screening results in the database and update candidate status.
+
+ Args:
+ candidate_id: UUID of the candidate.
+ session_id: Session identifier (call_sid for Twilio, session_id for web).
+ transcript_text: Full conversation transcript.
+ result: The screening results from the LLM (VoiceScreeningOutput).
+ audio_url: URL to the call recording if available.
+
+ Returns:
+ None
+ """
+ with SessionLocal() as session:
+ candidate = session.query(Candidate).filter_by(
+ id=uuid.UUID(candidate_id)
+ ).first()
+
+ if not candidate:
+ print(f"โ ๏ธ No candidate found with ID: {candidate_id}")
+ return
+
+ # Create new voice screening result entry
+ screening_entry = VoiceScreeningResult(
+ candidate_id=candidate.id,
+ call_sid=session_id,
+ transcript_text=transcript_text,
+ sentiment_score=result.sentiment_score,
+ confidence_score=result.confidence_score,
+ communication_score=result.communication_score,
+ llm_summary=result.llm_summary,
+ llm_judgment_json=result.llm_judgment_json,
+ audio_url=audio_url,
+ timestamp=datetime.utcnow(),
+ )
+
+ # Add and commit
+ session.add(screening_entry)
+ candidate.status = CandidateStatus.voice_done
+ candidate.updated_at = datetime.utcnow()
+ session.commit()
+
+ print(f"โ
Voice screening results saved and status updated for candidate {candidate_id}")
diff --git a/src/backend/database/context/__init__.py b/src/backend/database/context/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..55fc2948a0d0910bf8d024b3995232c2d2b8c9a5
--- /dev/null
+++ b/src/backend/database/context/__init__.py
@@ -0,0 +1,153 @@
+from sqlalchemy import (
+ Column, String, Integer, Float, Enum, DateTime, Text, ForeignKey, JSON
+)
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import declarative_base, relationship
+from datetime import datetime
+import enum
+import uuid
+
+Base = declarative_base()
+
+
+# ==============================================================
+# ENUM DEFINITIONS
+# ==============================================================
+
+class CandidateStatus(enum.Enum):
+ APPLIED = "applied"
+ CV_SCREENED = "cv_screened"
+ INVITED_VOICE = "invited_voice"
+ VOICE_DONE = "voice_done"
+ SCHEDULED_HR = "scheduled_hr"
+ DECISION_PENDING = "decision_pending"
+ REJECTED = "rejected"
+ HIRED = "hired"
+
+
+class InterviewStatus(enum.Enum):
+ SCHEDULED = "scheduled"
+ COMPLETED = "completed"
+ CANCELLED = "cancelled"
+
+
+class Decision(enum.Enum):
+ HIRE = "hire"
+ REJECT = "reject"
+ MAYBE = "maybe"
+
+
+# ==============================================================
+# MAIN TABLES
+# ==============================================================
+
+class Candidate(Base):
+ __tablename__ = "candidates"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ full_name = Column(String(255), nullable=False)
+ email = Column(String(255), nullable=False, unique=True)
+ phone_number = Column(String(50), nullable=True)
+ cv_file_path = Column(String(500), nullable=True)
+ parsed_cv_json = Column(JSON, nullable=True)
+ status = Column(Enum(CandidateStatus), default=CandidateStatus.APPLIED)
+ created_at = Column(DateTime, default=datetime.utcnow)
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+ # Relationships
+ cv_results = relationship("CVScreeningResult", back_populates="candidate", cascade="all, delete-orphan")
+ voice_results = relationship("VoiceScreeningResult", back_populates="candidate", cascade="all, delete-orphan")
+ interviews = relationship("InterviewScheduling", back_populates="candidate", cascade="all, delete-orphan")
+ decision = relationship("FinalDecision", back_populates="candidate", uselist=False, cascade="all, delete-orphan")
+
+
+# ==============================================================
+# CV SCREENING RESULTS
+# ==============================================================
+
+class CVScreeningResult(Base):
+ __tablename__ = "cv_screening_results"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id"), nullable=False)
+ job_title = Column(String(255), nullable=True)
+
+ skills_match_score = Column(Float, nullable=True)
+ experience_match_score = Column(Float, nullable=True)
+ education_match_score = Column(Float, nullable=True)
+ overall_fit_score = Column(Float, nullable=True)
+
+ llm_feedback = Column(Text, nullable=True)
+ reasoning_trace = Column(JSON, nullable=True)
+
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="cv_results")
+
+
+# ==============================================================
+# VOICE SCREENING RESULTS
+# ==============================================================
+
+class VoiceScreeningResult(Base):
+ __tablename__ = "voice_screening_results"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id"), nullable=False)
+
+ call_sid = Column(String(255), nullable=True)
+ transcript_text = Column(Text, nullable=True)
+
+ sentiment_score = Column(Float, nullable=True)
+ confidence_score = Column(Float, nullable=True)
+ communication_score = Column(Float, nullable=True)
+
+ llm_summary = Column(Text, nullable=True)
+ llm_judgment_json = Column(JSON, nullable=True)
+ audio_url = Column(String(500), nullable=True)
+
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="voice_results")
+
+
+# ==============================================================
+# INTERVIEW SCHEDULING
+# ==============================================================
+
+class InterviewScheduling(Base):
+ __tablename__ = "interview_scheduling"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id"), nullable=False)
+
+ calendar_event_id = Column(String(255), nullable=True)
+ event_summary = Column(String(255), nullable=True)
+
+ start_time = Column(DateTime, nullable=True)
+ end_time = Column(DateTime, nullable=True)
+ status = Column(Enum(InterviewStatus), default=InterviewStatus.SCHEDULED)
+
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="interviews")
+
+
+# ==============================================================
+# FINAL DECISION
+# ==============================================================
+
+class FinalDecision(Base):
+ __tablename__ = "final_decision"
+
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+ candidate_id = Column(UUID(as_uuid=True), ForeignKey("candidates.id"), nullable=False, unique=True)
+
+ overall_score = Column(Float, nullable=True)
+ decision = Column(Enum(Decision), default=Decision.MAYBE)
+ llm_rationale = Column(Text, nullable=True)
+ human_notes = Column(Text, nullable=True)
+
+ timestamp = Column(DateTime, default=datetime.utcnow)
+
+ candidate = relationship("Candidate", back_populates="decision")
diff --git a/src/backend/database/conversations/info.md b/src/backend/database/conversations/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..290c36de0177ca0a67286ba77bd7da7c84b0708f
--- /dev/null
+++ b/src/backend/database/conversations/info.md
@@ -0,0 +1 @@
+storage for audio files
\ No newline at end of file
diff --git a/src/backend/database/cvs/__init__.py b/src/backend/database/cvs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4eeae245315a1a41e5c1d8c8107cbf9f79c4404
--- /dev/null
+++ b/src/backend/database/cvs/__init__.py
@@ -0,0 +1,6 @@
+"""CV storage module."""
+
+from .storage import save_cv, ensure_upload_dir, UPLOAD_DIR
+
+__all__ = ["save_cv", "ensure_upload_dir", "UPLOAD_DIR"]
+
diff --git a/src/backend/database/cvs/parsed/info.md b/src/backend/database/cvs/parsed/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb39a698a3995c534661aa7205c4b586ef274211
--- /dev/null
+++ b/src/backend/database/cvs/parsed/info.md
@@ -0,0 +1 @@
+Parsed CVs are stored here
\ No newline at end of file
diff --git a/src/backend/database/cvs/storage.py b/src/backend/database/cvs/storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1754a09b362427dff788f2256ade697692e08f1
--- /dev/null
+++ b/src/backend/database/cvs/storage.py
@@ -0,0 +1,52 @@
+"""
+CV file storage operations.
+
+This module handles saving and managing CV files on disk.
+"""
+
+import os
+from typing import BinaryIO
+
+# Default upload directory (can be overridden via env var)
+UPLOAD_DIR = os.getenv("CV_UPLOAD_PATH", "src/backend/database/cvs/uploads")
+
+
+def ensure_upload_dir() -> None:
+ """Ensure the CV upload directory exists."""
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
+
+
+def save_cv(file_obj: BinaryIO, original_filename: str, candidate_name: str = "") -> str:
+ """
+ Save an uploaded CV to the local uploads directory.
+
+ Args:
+ file_obj: The file-like object (from Streamlit upload or HTTP request).
+ original_filename: The original name of the uploaded file.
+ candidate_name: The full name of the candidate (optional).
+
+ Returns:
+ The full path where the file was saved.
+ """
+ ensure_upload_dir()
+
+ # Generate unique filename
+ _, file_ext = os.path.splitext(original_filename)
+
+ if candidate_name:
+ # Sanitize candidate name: remove non-alphanumeric (except space/hyphen), replace spaces with underscores
+ safe_candidate_name = "".join(c for c in candidate_name if c.isalnum() or c in (" ", "-", "_"))
+ safe_candidate_name = safe_candidate_name.replace(" ", "_")
+ safe_name = f"{safe_candidate_name}_CV{file_ext}"
+ else:
+ safe_name = f"{os.path.basename(original_filename)}"
+
+ file_path = os.path.join(UPLOAD_DIR, safe_name)
+
+ # Save binary content
+ with open(file_path, "wb") as f:
+ f.write(file_obj.read())
+
+ print(f"๐ Saved CV to {file_path}")
+ return file_path
+
diff --git a/src/backend/database/cvs/uploads/info.md b/src/backend/database/cvs/uploads/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..73ee06f9e2078e50b0984470897dbab88109d5b6
--- /dev/null
+++ b/src/backend/database/cvs/uploads/info.md
@@ -0,0 +1 @@
+Uploaded CVs are stored here
\ No newline at end of file
diff --git a/src/backend/database/job_postings/ai_engineer.txt b/src/backend/database/job_postings/ai_engineer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbef19e185e55e5d06e63491fc545a4478f77ad1
--- /dev/null
+++ b/src/backend/database/job_postings/ai_engineer.txt
@@ -0,0 +1,11 @@
+### ๐ง Position: AI Engineer
+**Location:** Remote / Wiesbaden HQ
+**About the Role:**
+Join our AI R&D team to develop, fine-tune, and deploy ML models for production.
+You will work on projects involving LLMs, LangGraph agents, and context engineering.
+
+**Requirements:**
+ - Proficiency in Python & modern AI frameworks (PyTorch, LangChain, etc.)
+ - Solid understanding of NLP and ML pipelines
+ - Experience deploying models or building intelligent systems
+ - Strong communication and teamwork skills
\ No newline at end of file
diff --git a/src/backend/database/voice_recordings/info.md b/src/backend/database/voice_recordings/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/doc_parser/__init__.py b/src/backend/doc_parser/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..830fdd3ed021d901b37ddc5a6a2d89697b693bc7
--- /dev/null
+++ b/src/backend/doc_parser/__init__.py
@@ -0,0 +1,6 @@
+"""Document parsing module."""
+
+from .pdf_to_markdown import pdf_to_markdown
+
+__all__ = ["pdf_to_markdown"]
+
diff --git a/src/backend/doc_parser/pdf_to_markdown.py b/src/backend/doc_parser/pdf_to_markdown.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb29ccd8518f64fca39de892355e63623baba730
--- /dev/null
+++ b/src/backend/doc_parser/pdf_to_markdown.py
@@ -0,0 +1,265 @@
+"""
+PDF to Markdown converter using GPT-4 Vision.
+
+---------------------------------------------------------------------------
+------------------------------ How to Use It ------------------------------
+---------------------------------------------------------------------------
+Process a single file:
+>>> python pdf_to_markdown.py data_cv/max_mustermann_cv.pdf
+
+Process a folder:
+>>> python pdf_to_markdown.py data_cv/
+
+
+Customize model or rendering:
+>>> python pdf_to_markdown.py data_cv/ --model gpt-4.1 --target-width 1800 --batch-size 3
+
+
+Disable column splitting:
+>>> python pdf_to_markdown.py my_resume.pdf --no-halves
+
+
+Set a custom output folder:
+>>> python pdf_to_markdown.py data_cv/ --output processed/
+
+
+๐ง Summary of Configurable Options
+| Option | Description | Default |
+| --------------------- | ------------------------------- | ------------------ |
+| `path` | PDF file or folder path | required |
+| `--output` | Output directory | `results/` |
+| `--model` | OpenAI model | `gpt-4.1-mini` |
+| `--target-width` | Render width per page | `2000` |
+| `--batch-size` | Pages per API request | `2` |
+| `--max-output-tokens` | Max tokens returned | `8192` |
+| `--no-halves` | Disable left/right column crops | Enabled by default |
+"""
+
+import argparse
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List
+
+from dotenv import load_dotenv
+from openai import OpenAI
+from PIL import Image
+
+from .utils import (
+ render_pdf_to_images,
+ pil_to_png_data_uri,
+ split_halves,
+ parse_sections_from_json_text,
+ normalize_sections,
+ merge_duplicate_titles,
+ build_contact_section_from_filename,
+ process_section,
+ apply_postprocessing,
+)
+
+
+def pdf_to_markdown(
+ input_path: Path,
+ output_path: Path,
+ model: str = "gpt-4.1-mini",
+ target_width: int = 2000,
+ batch_size: int = 2,
+ max_output_tokens: int = 8192,
+ add_halves: bool = True,
+) -> None:
+ """
+ Process a single PDF or all PDFs in a directory and export Markdown sections.
+
+ 1. Render PDF pages to images.
+ 2. Send images in batches to GPT-4 Vision for section parsing.
+ 3. Normalize and post-process the returned sections.
+ 4. Save the final sections as a Markdown text file.
+ 5. Repeat for all PDFs in the input path.
+ 6. Output files are saved in the specified output directory.
+
+ Args:
+ input_path: Path to a single PDF file or a directory of PDFs.
+ output_path: Directory to save the output Markdown files.
+ model: OpenAI model to use for processing.
+ target_width: Target width for rendering PDF pages.
+ batch_size: Number of pages to send per API request.
+ max_output_tokens: Maximum tokens in model output.
+ add_halves: Whether to add left/right column crops.
+ """
+ load_dotenv()
+
+ def log_step(message: str) -> None:
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+ print(f"[{timestamp}] {message}")
+
+ log_step("Vision-based PDF โ Markdown extraction started...")
+
+ api_key = os.getenv("OPENAI_API_KEY")
+ if not api_key:
+ raise RuntimeError("OPENAI_API_KEY is not set. Add it to your environment or .env file.")
+
+ # --- Determine which PDFs to process ---
+ if input_path.is_file() and input_path.suffix.lower() == ".pdf":
+ pdf_files = [input_path]
+ elif input_path.is_dir():
+ pdf_files = sorted(input_path.glob("*.pdf"))
+ else:
+ raise ValueError(f"Invalid input path: {input_path}")
+
+ if not pdf_files:
+ log_step(f"No PDF files found at {input_path}")
+ return
+
+ output_path.mkdir(parents=True, exist_ok=True)
+ log_step(f"Found {len(pdf_files)} PDF file(s) in {input_path}.")
+ log_step(f"Using model={model}, batch_size={batch_size}, target_width={target_width}px.")
+
+ client = OpenAI()
+
+ # -------------------------- Inner helper --------------------------
+ def call_batch(imgs: List[Image.Image]) -> List[Dict[str, str]]:
+ """Process a batch of page images โ STRICT JSON sections."""
+ image_contents = []
+ for img in imgs:
+ data_uri = pil_to_png_data_uri(img)
+ image_contents.append({"type": "input_image", "image_url": data_uri})
+
+ if add_halves:
+ for half in split_halves(img):
+ image_contents.append(
+ {"type": "input_image", "image_url": pil_to_png_data_uri(half)}
+ )
+
+ system = "You are a precise document structure parser. Output ONLY valid JSON."
+ user = (
+ "From these page images, return a STRICT JSON array where each item has 'title' and 'body'. "
+ "Group human-meaningful sections, merge multi-line headings (two-column layouts), preserve reading order. "
+ "Do NOT summarize or omit content. Include headers/footers if they contain contact data. "
+ "Preserve bullet/numbered lists and render tables as Markdown where possible. "
+ "Use proper UTF-8 German diacritics (รค, รถ, รผ, ร). "
+ "Include small sidebar/column blocks and deduplicate content across full pages and crops."
+ )
+
+ response = client.responses.create(
+ model=model,
+ temperature=0,
+ max_output_tokens=max_output_tokens,
+ input=[
+ {"role": "system", "content": [{"type": "input_text", "text": system}]},
+ {"role": "user", "content": [{"type": "input_text", "text": user}] + image_contents},
+ ],
+ )
+
+ text = getattr(response, "output_text", "") or ""
+ return parse_sections_from_json_text(text)
+
+ # -------------------------- Main processing --------------------------
+ total_files = len(pdf_files)
+ for index, pdf_file in enumerate(pdf_files, start=1):
+ log_step(f"[{index}/{total_files}] Processing {pdf_file.name}...")
+ pages = render_pdf_to_images(pdf_file, target_width=target_width)
+
+ if not pages:
+ raise RuntimeError(f"Failed to render any PDF pages for {pdf_file}.")
+
+ log_step(f"Rendered {len(pages)} page(s).")
+
+ all_sections: List[Dict[str, str]] = []
+ for start in range(0, len(pages), batch_size):
+ end = min(len(pages), start + batch_size)
+ batch_num = (start // batch_size) + 1
+ log_step(f"Batch {batch_num}: pages {start + 1}โ{end}.")
+ secs = call_batch(pages[start:end])
+ if secs:
+ all_sections.extend(secs)
+ log_step(f"Batch {batch_num} returned {len(secs)} section(s).")
+ else:
+ log_step(f"Batch {batch_num} returned no sections.")
+
+ if not all_sections:
+ raise RuntimeError(f"No sections parsed from vision model output for {pdf_file}.")
+
+ log_step(f"Received {len(all_sections)} raw section(s).")
+ normalized = normalize_sections(all_sections)
+ merged = merge_duplicate_titles(normalized)
+ final_sections = apply_postprocessing(merged)
+ contact_section = process_section(build_contact_section_from_filename(pdf_file))
+ final_sections.insert(0, contact_section)
+
+ out_txt = output_path / f"{pdf_file.stem}.txt"
+ log_step(f"Writing output to {out_txt}...")
+
+ lines: List[str] = []
+ for sec in final_sections:
+ title = (sec.get("title") or "").strip()
+ body = (sec.get("body") or "").strip()
+ if title:
+ lines.append(f"## {title}")
+ if body:
+ lines.append(body)
+ lines.append("")
+
+ while lines and lines[-1] == "":
+ lines.pop()
+
+ out_txt.write_text("\n".join(lines), encoding="utf-8")
+ log_step(f"โ
Completed processing for {pdf_file.name}.")
+
+ log_step("๐ All PDF files processed successfully.")
+ print(f"\nResults saved in: {output_path.resolve()}")
+
+
+# ----------------------------- CLI entrypoint -----------------------------
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Convert PDFs to structured Markdown using GPT-4 Vision."
+ )
+ parser.add_argument(
+ "path",
+ help="Path to a single PDF file or a directory containing PDF files.",
+ )
+ parser.add_argument(
+ "-o", "--output",
+ default="results",
+ help="Output directory for the Markdown files (default: results/)",
+ )
+ parser.add_argument(
+ "--model",
+ default=os.getenv("OPENAI_MODEL", "gpt-4.1-mini"),
+ help="OpenAI model to use (default: gpt-4.1-mini)",
+ )
+ parser.add_argument(
+ "--target-width",
+ type=int,
+ default=int(os.getenv("VISION_TARGET_WIDTH", "2000")),
+ help="Target width for rendering PDF pages (default: 2000 px)",
+ )
+ parser.add_argument(
+ "--batch-size",
+ type=int,
+ default=int(os.getenv("VISION_BATCH_PAGES", "2")),
+ help="Number of pages to send to the model per request (default: 2)",
+ )
+ parser.add_argument(
+ "--max-output-tokens",
+ type=int,
+ default=int(os.getenv("MAX_OUTPUT_TOKENS", "8192")),
+ help="Maximum tokens in model output (default: 8192)",
+ )
+ parser.add_argument(
+ "--no-halves",
+ action="store_true",
+ help="Disable left/right column splitting (default: enabled)",
+ )
+
+ args = parser.parse_args()
+
+ pdf_to_markdown(
+ input_path=Path(args.path),
+ output_path=Path(args.output),
+ model=args.model,
+ target_width=args.target_width,
+ batch_size=args.batch_size,
+ max_output_tokens=args.max_output_tokens,
+ add_halves=not args.no_halves,
+ )
diff --git a/src/backend/doc_parser/utils/__init__.py b/src/backend/doc_parser/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e0e84fff6c28a97d57effafa7e5951c8da9b2f8
--- /dev/null
+++ b/src/backend/doc_parser/utils/__init__.py
@@ -0,0 +1,33 @@
+"""Document parser utilities."""
+
+from .text import normalize_bullets, tag_contacts, EMAIL_RE, PHONE_RE, URL_RE
+from .image import render_pdf_to_images, pil_to_png_data_uri, split_halves
+from .sections import (
+ parse_sections_from_json_text,
+ normalize_sections,
+ merge_duplicate_titles,
+ build_contact_section_from_filename,
+ process_section,
+ apply_postprocessing,
+)
+
+__all__ = [
+ # Text
+ "normalize_bullets",
+ "tag_contacts",
+ "EMAIL_RE",
+ "PHONE_RE",
+ "URL_RE",
+ # Image
+ "render_pdf_to_images",
+ "pil_to_png_data_uri",
+ "split_halves",
+ # Sections
+ "parse_sections_from_json_text",
+ "normalize_sections",
+ "merge_duplicate_titles",
+ "build_contact_section_from_filename",
+ "process_section",
+ "apply_postprocessing",
+]
+
diff --git a/src/backend/doc_parser/utils/image.py b/src/backend/doc_parser/utils/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d73ecd0150fa25b3b817423476cd04dae5c2097
--- /dev/null
+++ b/src/backend/doc_parser/utils/image.py
@@ -0,0 +1,65 @@
+"""Image and PDF rendering utilities."""
+
+import base64
+import io
+from pathlib import Path
+from typing import List
+
+import pypdfium2 as pdfium
+from PIL import Image
+
+
+def render_pdf_to_images(pdf_path: Path, target_width: int = 2000) -> List[Image.Image]:
+ """
+ Render PDF pages to PIL images (layout-preserving).
+
+ Args:
+ pdf_path: Path to the PDF file.
+ target_width: Target width for rendering (scales proportionally).
+
+ Returns:
+ List of PIL Image objects, one per page.
+ """
+ doc = pdfium.PdfDocument(str(pdf_path))
+ images: List[Image.Image] = []
+
+ for index in range(len(doc)):
+ page = doc[index]
+ width_pt, height_pt = page.get_size()
+ scale = max(1.0, float(target_width) / float(max(1.0, width_pt)))
+ bitmap = page.render(scale=scale)
+ img = bitmap.to_pil()
+ images.append(img)
+
+ page.close()
+ return images
+
+
+def pil_to_png_data_uri(img: Image.Image) -> str:
+ """Convert a PIL image to a PNG data URI (base64)."""
+ buf = io.BytesIO()
+ img.save(buf, format="PNG")
+ b64 = base64.b64encode(buf.getvalue()).decode("ascii")
+ return f"data:image/png;base64,{b64}"
+
+
+def split_halves(img: Image.Image, overlap_px: int = 40) -> List[Image.Image]:
+ """
+ Create left/right column crops with small overlap.
+
+ Useful for two-column CV layouts where GPT-4 Vision might
+ miss content in narrow columns.
+
+ Args:
+ img: PIL Image to split.
+ overlap_px: Pixels of overlap in the middle.
+
+ Returns:
+ List of [left_half, right_half] images.
+ """
+ w, h = img.size
+ mid = w // 2
+ left_box = (0, 0, min(mid + overlap_px, w), h)
+ right_box = (max(mid - overlap_px, 0), 0, w, h)
+ return [img.crop(left_box), img.crop(right_box)]
+
diff --git a/src/backend/doc_parser/utils/sections.py b/src/backend/doc_parser/utils/sections.py
new file mode 100644
index 0000000000000000000000000000000000000000..322bc5c4d8030307fe56e54bfb700f6f8b654ac7
--- /dev/null
+++ b/src/backend/doc_parser/utils/sections.py
@@ -0,0 +1,120 @@
+"""Section parsing and processing utilities."""
+
+import json
+import re
+from collections import OrderedDict
+from pathlib import Path
+from typing import Dict, List
+
+from ftfy import fix_text
+
+from .text import normalize_bullets, tag_contacts
+
+
+def parse_sections_from_json_text(text: str) -> List[Dict[str, str]]:
+ """
+ Parse STRICT JSON from the API response.
+
+ Attempts direct JSON parsing first, then falls back to
+ extracting JSON array from surrounding text.
+
+ Args:
+ text: Raw text that should contain a JSON array.
+
+ Returns:
+ List of section dicts with 'title' and 'body' keys.
+ """
+ # Try direct parse
+ try:
+ data = json.loads(text)
+ if isinstance(data, list):
+ out: List[Dict[str, str]] = []
+ for item in data:
+ if isinstance(item, dict):
+ out.append(
+ {
+ "title": str(item.get("title", "")).strip(),
+ "body": str(item.get("body", "")).strip(),
+ }
+ )
+ return out
+ except Exception:
+ pass
+
+ # Try to extract JSON array from text
+ m = re.search(r"\[\s*\{[\s\S]*\}\s*\]", text)
+ if m:
+ try:
+ data = json.loads(m.group(0))
+ if isinstance(data, list):
+ out: List[Dict[str, str]] = []
+ for item in data:
+ if isinstance(item, dict):
+ out.append(
+ {
+ "title": str(item.get("title", "")).strip(),
+ "body": str(item.get("body", "")).strip(),
+ }
+ )
+ return out
+ except Exception:
+ pass
+ return []
+
+
+def normalize_sections(sections: List[Dict[str, str]]) -> List[Dict[str, str]]:
+ """Normalize text encoding with ftfy (fixes mojibake, etc.)."""
+ norm: List[Dict[str, str]] = []
+ for s in sections:
+ title = fix_text((s.get("title") or "").strip())
+ body = fix_text((s.get("body") or "").strip())
+ norm.append({"title": title, "body": body})
+ return norm
+
+
+def merge_duplicate_titles(sections: List[Dict[str, str]]) -> List[Dict[str, str]]:
+ """Merge sections with duplicate titles while preserving order."""
+ merged: "OrderedDict[str, str]" = OrderedDict()
+
+ for s in sections:
+ title = s.get("title", "").strip()
+ body = (s.get("body", "") or "").strip()
+
+ if title in merged:
+ if body:
+ prev = merged[title]
+ merged[title] = (prev + ("\n\n" if prev else "") + body).strip()
+ else:
+ merged[title] = body
+
+ return [{"title": t, "body": b} for t, b in merged.items()]
+
+
+def build_contact_section_from_filename(pdf_file: Path) -> Dict[str, str]:
+ """
+ Create a simple 'Adresse' section based on the PDF filename.
+
+ Useful as a fallback when contact info isn't parsed from the document.
+ """
+ stem = pdf_file.stem.replace("_", " ").strip()
+ tokens = stem.split(maxsplit=1)
+ if tokens and len(tokens[0]) == 1 and tokens[0].isalpha():
+ stem = tokens[1] if len(tokens) > 1 else ""
+ name = stem.strip() or pdf_file.name
+ return {"title": "Adresse", "body": f"Name: {name}"}
+
+
+def process_section(section: Dict[str, str]) -> Dict[str, str]:
+ """Normalize bullets and tag contact info for a single section."""
+ title = section.get("title", "")
+ body = section.get("body", "")
+ return {
+ "title": tag_contacts(normalize_bullets(title)),
+ "body": tag_contacts(normalize_bullets(body)),
+ }
+
+
+def apply_postprocessing(sections: List[Dict[str, str]]) -> List[Dict[str, str]]:
+ """Apply bullet normalization and contact tagging to all sections."""
+ return [process_section(s) for s in sections]
+
diff --git a/src/backend/doc_parser/utils/text.py b/src/backend/doc_parser/utils/text.py
new file mode 100644
index 0000000000000000000000000000000000000000..84951075b241f792513ae077eb0056f022655974
--- /dev/null
+++ b/src/backend/doc_parser/utils/text.py
@@ -0,0 +1,50 @@
+"""Text processing utilities for document parsing."""
+
+import re
+from typing import List
+
+
+# Regex patterns for contact detection
+EMAIL_RE = re.compile(r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b")
+PHONE_RE = re.compile(r"(?:(?<=\s)|^)(\+\d{1,3}[\s()./-]?)?(?:\d[\s()/.-]?){6,}\d(?=\s|$)")
+URL_RE = re.compile(r"(?i)\b(?:https?://|www\.)[^\s<>'\"]+\.[^\s<>'\"]+")
+
+# Bullet characters to normalize
+_BULLET_CHARS = {"โข", "ยท", "-", "โ", "โ", "โช", "โฆ", "โฃ", "โ", "โ", ""}
+
+
+def normalize_bullets(text: str) -> str:
+ """Coerce common bullet characters to '- ' while keeping numbering."""
+ lines = text.splitlines()
+ normalized: List[str] = []
+
+ for line in lines:
+ stripped = line.lstrip()
+ if not stripped:
+ normalized.append(line)
+ continue
+
+ # Keep numbered lists as-is
+ if re.match(r"^\d+[\.)]\s+", stripped):
+ normalized.append(line)
+ continue
+
+ first = stripped[0]
+ if first in _BULLET_CHARS or stripped.startswith(("- ", "* ")):
+ content = re.sub(
+ r"^([\-\*\u2022\u2023\u2043\u2219\u25E6\u25AA\u25CB\u25CF\u25A0]+\s+)", "", stripped
+ )
+ normalized.append(f"- {content.strip()}")
+ else:
+ normalized.append(line)
+
+ return "\n".join(normalized)
+
+
+def tag_contacts(text: str) -> str:
+ """Wrap detected email/phone/URL values with simple tags."""
+ tagged = EMAIL_RE.sub(lambda m: f"[EMAIL]{m.group(0)}[/EMAIL]", text)
+ tagged = PHONE_RE.sub(lambda m: f"[PHONE]{m.group(0)}[/PHONE]", tagged)
+ tagged = URL_RE.sub(lambda m: f"[URL]{m.group(0)}[/URL]", tagged)
+ return tagged
+
diff --git a/src/backend/prompts/__init__.py b/src/backend/prompts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..83d2911fc44849bfe9378d0fd703c1e1d7b3e928
--- /dev/null
+++ b/src/backend/prompts/__init__.py
@@ -0,0 +1,66 @@
+"""
+Prompt Registry Module
+======================
+
+Centralized prompt management using PromptLayer with optional local files.
+Provides a singleton PromptManager instance for loading prompts from:
+1. Local prompt files (if local_prompt_path is provided)
+2. PromptLayer cloud service (if PROMPTLAYER_API_KEY is set)
+"""
+
+import os
+from .prompt_layer import PromptManager
+
+# Path to the templates folder inside the package
+TEMPLATES_DIR = os.path.join(os.path.dirname(__file__), "templates")
+
+# Singleton PromptManager instance
+_prompt_manager = PromptManager(environment=os.getenv("PROMPT_ENVIRONMENT", "production"))
+# Force local-only prompts; disable remote PromptLayer client if present
+_prompt_manager.client = None
+
+
+def get_prompt(
+ template_name: str,
+ version: int = None,
+ label: str = None,
+ local_prompt_path: str = None,
+ latest_version: bool = False,
+) -> str:
+ """
+ Load a prompt from local templates only (PromptLayer disabled here).
+
+ Strategy:
+ - If local_prompt_path is provided, use it.
+ - Otherwise, use the default templates directory.
+ """
+ # Normalize template name to match folder names (lowercase)
+ if template_name:
+ template_name = template_name.lower()
+
+ if local_prompt_path:
+ if not os.path.isabs(local_prompt_path):
+ local_prompt_path = os.path.join(TEMPLATES_DIR, local_prompt_path)
+ else:
+ local_prompt_path = TEMPLATES_DIR
+
+ return _prompt_manager.get_prompt(
+ template_name=template_name,
+ version=version,
+ label=label,
+ local_prompt_path=local_prompt_path,
+ latest_version=latest_version,
+ )
+
+
+def get_prompt_manager() -> PromptManager:
+ """Return singleton PromptManager."""
+ return _prompt_manager
+
+
+__all__ = [
+ "get_prompt",
+ "get_prompt_manager",
+ "PromptManager",
+ "TEMPLATES_DIR"
+]
diff --git a/src/backend/prompts/info.md b/src/backend/prompts/info.md
new file mode 100644
index 0000000000000000000000000000000000000000..1fd016bef9564f26e5ba382faca2074aa7ec908e
--- /dev/null
+++ b/src/backend/prompts/info.md
@@ -0,0 +1,69 @@
+# Prompt Management System
+
+This module provides a centralized way to manage prompts using **PromptLayer** with a local filesystem fallback. It allows you to version prompts, manage environments (dev/staging/prod), and easily switch between local development and cloud-managed prompts.
+
+## ๐ Usage
+
+Import the `get_prompt` function to load prompts anywhere in your application.
+
+```python
+from src.backend.prompts import get_prompt
+```
+
+### 1. Load from PromptLayer (Default)
+By default, if `PROMPTLAYER_API_KEY` is set in your environment, it will fetch the prompt from PromptLayer using the configured environment label (default: `production`).
+
+```python
+# Fetches 'DB_Executor' tagged with current environment (e.g., 'production')
+system_prompt = get_prompt("DB_Executor")
+```
+
+### 2. Load Latest Version (Ignore Environment)
+Useful for testing or when you want to ensure you have the absolute latest saved version from PromptLayer, ignoring any 'prod' or 'dev' tags.
+
+```python
+# Fetches the absolute latest version of the template
+system_prompt = get_prompt("DB_Executor", latest_version=True)
+```
+
+### 3. Force Load from Local File
+You can force loading from a local file, which is useful for local development without an internet connection or for testing new prompts before pushing to PromptLayer.
+
+```python
+# Loads from src/prompts/templates/db_executor/v1.txt
+# (Assuming 'v1.txt' is the file name in that directory, or provide full path)
+system_prompt = get_prompt("db_executor/v1", local_prompt_path="src/prompts/templates")
+```
+
+If you don't provide a `local_prompt_path` but also don't have a `PROMPTLAYER_API_KEY` set, it defaults to looking in `src/prompts/templates`.
+
+## ๐ Directory Structure
+
+Store your local prompt backups in `src/prompts/templates/`.
+
+```
+src/prompts/
+โโโ __init__.py # Exposes get_prompt
+โโโ prompt_layer.py # Core logic
+โโโ templates/ # Local prompt storage
+โ โโโ db_executor/
+โ โ โโโ v1.txt
+โ โโโ supervisor/
+โ โ โโโ v1.txt
+โ โโโ ...
+โโโ info.md # This file
+```
+
+## โ๏ธ Configuration
+
+- **`PROMPTLAYER_API_KEY`**: Set this env var to enable PromptLayer.
+- **`PROMPT_ENVIRONMENT`**: Set to `dev`, `staging`, or `production` (default) to control which tagged version is loaded.
+
+## ๐ Debugging
+
+The system prints clear logs to stdout so you know where your prompt came from:
+
+- `๐ Loaded prompt '...' from PromptLayer (env=production)`
+- `๐ Loaded prompt '...' from PromptLayer (latest version)`
+- `๐ Loaded prompt '...' from local file: ...`
+
diff --git a/src/backend/prompts/prompt_layer.py b/src/backend/prompts/prompt_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8b478a02a26abeb0b17c221aaf09345fe034beb
--- /dev/null
+++ b/src/backend/prompts/prompt_layer.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""
+PromptLayer Integration for Prompt Management
+==============================================
+
+This module provides a centralized way to manage prompts using PromptLayer platform.
+Allows for versioned, labeled prompts that can be easily updated without code changes.
+"""
+
+import promptlayer
+from promptlayer import PromptLayer
+from dotenv import load_dotenv
+import os
+from typing import Dict, Any, Optional
+from functools import lru_cache
+
+load_dotenv()
+
+
+class PromptManager:
+ """
+ Centralized prompt management using PromptLayer platform.
+ link:
+ - https://www.promptlayer.com
+
+ Features:
+ - Version control for prompts
+ - Environment-based prompt labels (dev, staging, production)
+ - Caching for performance
+ - Fallback to local files if PromptLayer unavailable
+ """
+
+ def __init__(self, api_key: Optional[str] = None, environment: str = "production"):
+ """
+ Initialize PromptManager.
+
+ Args:
+ api_key: PromptLayer API key (defaults to PROMPTLAYER_API_KEY env var)
+ environment: Environment label for prompts (dev, staging, production)
+ """
+ self.api_key = api_key or os.getenv("PROMPTLAYER_API_KEY")
+ self.environment = environment
+ self.client = None
+
+ # Initialize client if API key is available
+ if self.api_key:
+ try:
+ self.client = PromptLayer(api_key=self.api_key)
+ print(f"โ
PromptLayer connected (environment: {environment})")
+
+ except Exception as e:
+ print(f"โ ๏ธ PromptLayer connection failed: {e}")
+ self.client = None
+ else:
+ print("โ ๏ธ No PROMPTLAYER_API_KEY found, using local fallback")
+
+ @lru_cache(maxsize=128)
+ def get_prompt(
+ self,
+ template_name: str,
+ version: Optional[int] = None,
+ label: Optional[str] = None,
+ local_prompt_path: Optional[str] = None,
+ latest_version: bool = False,
+ ) -> str:
+ """
+ Load a prompt from:
+ 1. A local prompt file (if local_prompt_path is provided)
+ 2. PromptLayer (if no local path provided)
+
+ Args:
+ template_name: Name of the prompt template
+ version: Version for PromptLayer
+ label: Environment label
+ local_prompt_path: Full path to local file OR directory containing prompt files
+ latest_version: If True, explicitly fetch the latest version (ignoring label)
+
+ Returns:
+ str: Prompt content
+ """
+
+ # 1๏ธโฃ Try PromptLayer FIRST if client is available
+ label = label or self.environment
+
+ if self.client:
+ try:
+ if latest_version:
+ # Fetch the latest template definition directly without execution
+ response = self.client.templates.get(template_name)
+
+ # Extract the prompt text from llm_kwargs (preferred) or prompt_template
+ prompt_content = None
+
+ # Strategy 1: Try llm_kwargs (cleanest format)
+ if isinstance(response, dict) and "llm_kwargs" in response:
+ messages = response["llm_kwargs"].get("messages", [])
+ # Try to find system message
+ for msg in messages:
+ if msg.get("role") == "system":
+ prompt_content = msg.get("content")
+ break
+ # Fallback to first message
+ if prompt_content is None and messages:
+ prompt_content = messages[0].get("content")
+
+ # Strategy 2: Try prompt_template dictionary structure
+ if prompt_content is None and isinstance(response, dict) and "prompt_template" in response:
+ pt = response["prompt_template"]
+ if isinstance(pt, dict) and "messages" in pt:
+ messages = pt["messages"]
+ for msg in messages:
+ # Check role if available
+ if msg.get("role") == "system" and "content" in msg:
+ content_list = msg["content"]
+ if isinstance(content_list, list) and content_list:
+ # Extract text from content list [{'type': 'text', 'text': '...'}]
+ for item in content_list:
+ if item.get("type") == "text":
+ prompt_content = item.get("text")
+ break
+ if prompt_content: break
+
+ # Fallback: first message content
+ if prompt_content is None and messages and "content" in messages[0]:
+ content_list = messages[0]["content"]
+ if isinstance(content_list, list) and content_list:
+ for item in content_list:
+ if item.get("type") == "text":
+ prompt_content = item.get("text")
+ break
+
+ # Fallback: Stringify if nothing else found
+ if prompt_content is None:
+ prompt_content = str(response)
+
+ # Try to extract version metadata if available
+ version_info = ""
+ if isinstance(response, dict) and "version" in response:
+ version_info = f" (v{response.get('version')})"
+ elif hasattr(response, "version"): # Some client objects might have it
+ version_info = f" (v{response.version})"
+
+ print(
+ f"๐ Loaded prompt '{template_name}' from PromptLayer (latest version){version_info}",
+ flush=True
+ )
+ return prompt_content
+
+ # Standard flow using labels (existing logic)
+ response = self.client.run(
+ prompt_name=template_name,
+ input_variables={},
+ tags=[label],
+ )
+
+ if isinstance(response, dict):
+ prompt_content = response.get("output") or str(response)
+ else:
+ prompt_content = str(response)
+
+ print(
+ f"๐ Loaded prompt '{template_name}' from PromptLayer (env={label})",
+ flush=True # force the output to the buffer immediately,
+ # ensuring it shows up in the docker compose log stream immediately.
+ )
+ return prompt_content
+
+ except Exception as e:
+ print(f"โ ๏ธ PromptLayer failed: {e}. Falling back to local templates...", flush=True)
+
+ # 2๏ธโฃ Fall back to local files if PromptLayer failed or unavailable
+ if local_prompt_path:
+ try:
+ # If a directory is passed, append template_name + .txt
+ if os.path.isdir(local_prompt_path):
+ # Try exact match first: template_name.txt (case-sensitive)
+ file_path = os.path.join(local_prompt_path, f"{template_name}.txt")
+
+ # If not found, try subdirectory with lowercase template_name
+ if not os.path.exists(file_path):
+ lowercase_name = template_name.lower()
+ file_path = os.path.join(local_prompt_path, lowercase_name, "v1.txt")
+
+ # If still not found, try subdirectory with original template_name
+ if not os.path.exists(file_path):
+ file_path = os.path.join(local_prompt_path, template_name, "v1.txt")
+ else:
+ file_path = local_prompt_path
+
+ with open(file_path, "r", encoding="utf-8") as f:
+ print(f"๐ Loaded prompt '{template_name}' from local file: {file_path}", flush=True)
+ return f.read()
+
+ except Exception as e:
+ raise ValueError(
+ f"โ Failed to load '{template_name}' from local path '{local_prompt_path}': {e}"
+ )
+
+ raise ValueError(
+ f"โ Failed to load '{template_name}': PromptLayer unavailable and no local_prompt_path provided."
+ )
+
+
+
+ def list_available_prompts(self) -> Dict[str, Any]:
+ """
+ List all available prompts from PromptLayer.
+
+ Returns:
+ Dictionary of available prompts with metadata
+ """
+ if not self.client:
+ return {"error": "PromptLayer client not available"}
+
+ try:
+ # This would depend on PromptLayer's API for listing templates
+ # Placeholder implementation
+ return {
+ "message": "PromptLayer template listing not implemented in this version",
+ "available_methods": [
+ "get_judge_prompt(simple=True/False)",
+ "get_agent_prompt(version=int)",
+ "get_prompt(template_name, version, label, fallback_path)"
+ ]
+ }
+ except Exception as e:
+ return {"error": f"Failed to list prompts: {e}"}
+
+ def clear_cache(self) -> None:
+ """Clear the prompt cache.
+ """
+ self.get_prompt.cache_clear()
+ print("๐๏ธ Prompt cache cleared")
+
+
+ def set_environment(self, environment: str) -> None:
+ """
+ Change the environment label for subsequent prompt requests.
+
+ Args:
+ environment: New environment (dev, staging, production)
+ """
+ self.environment = environment
+ self.clear_cache() # Clear cache since environment changed
+ print(f"๐ Environment changed to: {environment}")
diff --git a/src/backend/prompts/templates/compactor/v1.txt b/src/backend/prompts/templates/compactor/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b929407f6c5e30a3d06d1430c6b87a519381a544
--- /dev/null
+++ b/src/backend/prompts/templates/compactor/v1.txt
@@ -0,0 +1,7 @@
+You are a summarizer. You take a long chain of messages between an assistant and a user, and compact them into a summary. You speak from a first person perspective as if you were the assistant.
+
+Your goal is to put high level summaries of the conversation to reduce the token count of the conversation. For important topics, use more tokens. For less important topics, summarize to few tokens. In addition, for newer conversation items, use more tokens. For older ones, use less.
+
+Always include emails, names, phone numbers, and other key info. You don't need to use them verbatim more than once in the compaction.
+
+You are not speaking to the user. You may NOT add ANY new message to ANYONE. Summarize and stop there.
\ No newline at end of file
diff --git a/src/backend/prompts/templates/cv_screener/v1.txt b/src/backend/prompts/templates/cv_screener/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8ce573eb06114ed16c0b3830359a7c989f6846a
--- /dev/null
+++ b/src/backend/prompts/templates/cv_screener/v1.txt
@@ -0,0 +1,10 @@
+You are an HR assistant evaluating how well a candidate's CV matches a given
+job description. Generate a concise assessment summary first to ground your
+reasoning. Then assign calibrated match scores between 0 and 1.
+
+The scores should be based on the following criteria:
+ 1. Skills Match Score: How well the candidate's skills match the job description."
+ 2. Experience Match Score: How well the candidate's experience matches the job description.
+ 3. Education Match Score: How well the candidate's education matches the job description.
+ 4. Overall Fit Score: How well the candidate's CV fits the job description.
+
diff --git a/src/backend/prompts/templates/db_executor/v1.txt b/src/backend/prompts/templates/db_executor/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9948549597b3372cd7fc824d816e5515eab838b4
--- /dev/null
+++ b/src/backend/prompts/templates/db_executor/v1.txt
@@ -0,0 +1,74 @@
+You are the **Database Executor Agent**, responsible for generating
+and executing **SQLAlchemy ORM-style** Python code on behalf of the HR Supervisor Agent.
+
+Your job: perform safe and deterministic **read/write/update operations**
+in the HR recruitment database, based on clear natural-language requests.
+
+---
+
+### โ
Rules
+1. Use SQLAlchemy ORM โ not raw SQL.
+2. Use `session` (provided) for all queries.
+3. Return clean Python dict or list results โ no ORM objects.
+4. Commit only when needed (`session.commit()`).
+5. Never alter schema, connection, or delete/drop tables.
+6. Validate record existence before updating or inserting.
+7. Briefly explain what was done in plain English.
+
+---
+
+### ๐งฉ Database Overview (ORM Models)
+**Note**: All these models are already imported and available in the global context.
+**DO NOT** try to import them again. Use them directly (e.g. `session.query(Candidate)...`).
+
+**Candidate**
+- id (UUID, PK)
+- full_name, email (unique), phone_number
+- cv_file_path, parsed_cv_file_path
+- status (Enum: `applied`, `cv_screened`, `cv_passed`, `cv_rejected`, `voice_passed`, `voice_rejected`, `interview_scheduled`, `decision_made`)
+- Relationships โ `cv_screening_results`, `voice_screening_results`, `interview_scheduling`, `final_decision`
+
+**CVScreeningResult**
+- candidate_id โ Candidate.id
+- skills_match_score, experience_match_score, education_match_score, overall_fit_score
+- llm_feedback, reasoning_trace (JSON), timestamp
+
+**VoiceScreeningResult**
+- candidate_id โ Candidate.id
+- transcript_text, sentiment_score, communication_score, confidence_score
+- llm_summary, llm_judgment_json, audio_url, timestamp
+
+**InterviewScheduling**
+- candidate_id โ Candidate.id
+- calendar_event_id, start_time, end_time
+- status (Enum: `scheduled`, `completed`, `cancelled`)
+
+**FinalDecision**
+- candidate_id โ Candidate.id
+- overall_score, decision (Enum: `hire`, `reject`, `maybe`)
+- llm_rationale, human_notes, timestamp
+
+---
+
+๐งพ Expected Execution Pattern
+When asked to perform a task, you must:
+1. Construct ORM-based Python code using session and the given models.
+2. Store final results in a variable named result.
+3. Print the results using:
+```python
+import json
+print(json.dumps(result, indent=2, default=str))
+```
+4. Optionally, include a short explanatory comment after the code.
+
+### ๐งพ Output Format
+1. **Execution:** Your Python code must `print()` the results so they are visible in the tool output.
+2. **Final Response:** After the code runs, provide a **clear, natural language summary** of what you found or did.
+ - *Example:* "I successfully updated the status for Sebastian Wefers to 'scheduled'."
+ - *Example:* "I retrieved 3 candidates: John, Jane, and Bob."
+
+### ๐จ Error Handling
+If you encounter errors:
+1. **Self-Correction:** Attempt to fix the code and retry within the reasoning loop.
+2. **Terminal Failure:** If you cannot resolve the issue, explain the problem clearly to the user in plain English.
+ - *Example:* "I tried to update the record, but I could not find a candidate with that email address."
\ No newline at end of file
diff --git a/src/backend/prompts/templates/db_executor/v2.txt b/src/backend/prompts/templates/db_executor/v2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f971b402b5a0b5fce8fd734c54f075aa7ef62cff
--- /dev/null
+++ b/src/backend/prompts/templates/db_executor/v2.txt
@@ -0,0 +1,59 @@
+You are the **Database Executor Agent**, responsible for generating and executing **SQLAlchemy ORM-style** Python code on behalf of the HR Supervisor Agent.
+Your job: perform safe and deterministic **read/write/update operations** in the HR recruitment database, based on clear natural-language requests.
+---
+# Rules
+1. Use SQLAlchemy ORM not raw SQL.
+2. **MANDATORY**: Use the pre-provided `session` object from the global context.
+ - โ NEVER do `create_engine` or `sessionmaker`.
+ - โ NEVER import `sqlite3` or `psycopg2` directly.
+ - โ
ALWAYS use `session.query(...)`.
+3. Return clean Python dict or list results, no ORM objects.
+4. Commit only when needed (`session.commit()`).
+5. Never alter schema, connection, or delete/drop tables.
+6. Validate record existence before updating or inserting.
+7. Briefly explain what was done in plain English.
+---
+# Database Overview (ORM Models)
+**Note**: All these models AND Enums (`CandidateStatus`, `InterviewStatus`, `DecisionStatus`) are already imported and available in the global context.
+**DO NOT** try to import them again. Use them directly (e.g. `session.query(Candidate)...` or `status=CandidateStatus.hired`).
+**DATABASE TYPE**: PostgreSQL (managed by the system). DO NOT assume SQLite.
+**Candidate**
+- id (UUID, PK)
+- full_name, email (unique), phone_number
+- cv_file_path, parsed_cv_file_path, created_at, updated_at, auth_code
+- status (Enum `CandidateStatus`: `applied`, `cv_screened`, `cv_passed`, `cv_rejected`, `voice_invitation_sent`, `voice_done`, `voice_passed`, `voice_rejected`, `interview_scheduled`, `interview_passed`, `interview_rejected`, `decision_made`, `hired`, `rejected`)
+- Relationships โ `cv_screening_results`, `voice_screening_results`, `interview_scheduling`, `final_decision`
+**CVScreeningResult**
+- candidate_id โ Candidate.id
+- job_title, skills_match_score, experience_match_score, education_match_score, overall_fit_score
+- llm_feedback, reasoning_trace (JSON), timestamp
+**VoiceScreeningResult**
+- candidate_id โ Candidate.id
+- call_sid, transcript_text, sentiment_score, communication_score, confidence_score
+- llm_summary, llm_judgment_json, audio_url, timestamp
+**InterviewScheduling**
+- candidate_id โ Candidate.id
+- calendar_event_id, event_summary, start_time, end_time
+- status (Enum `InterviewStatus`: `scheduled`, `completed`, `cancelled`, `passed`, `rejected`)
+**FinalDecision**
+- candidate_id โ Candidate.id
+- overall_score, decision (Enum `DecisionStatus`: `hired`, `rejected`, `pending`)
+- llm_rationale, human_notes, timestamp
+---
+Expected Execution Pattern
+When asked to perform a task, you must:
+1. Construct ORM-based Python code using session and the given models.
+2. Store final results in a variable named result.
+3. Print the results using:
+```python
+import json
+print(json.dumps(result, indent=2, default=str))
+```
+4. Optionally, include a short explanatory comment after the code.
+# Output Format
+1. **Execution:** Your Python code must `print()` the results so they are visible in the tool output.
+2. **Final Response:** After the code runs, provide a **clear, natural language summary** of what you found or did. It should be clear enough that a random person would understand.
+# Error Handling
+If you encounter errors:
+1. **Self-Correction:** Attempt to fix the code and retry within the reasoning loop.
+2. **Terminal Failure:** If you cannot resolve the issue, explain the problem clearly in plain English. Provide verbatim snippets of the error.
\ No newline at end of file
diff --git a/src/backend/prompts/templates/gcalendar/v1.txt b/src/backend/prompts/templates/gcalendar/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ea82387205fd47a6d7485d0e31c3af5e9f0cc6b
--- /dev/null
+++ b/src/backend/prompts/templates/gcalendar/v1.txt
@@ -0,0 +1,10 @@
+You are a scheduling assistant authorized to use Google Calendar MCP tools.
+You can for instance list, create, and analyze events.
+
+IMPORTANT:
+- For any requests regarding "my calendar", "my availability", or general scheduling without specific attendees, assume the "primary" calendar.
+- You do NOT need to ask for a calendar ID for the user; the system defaults to their primary calendar.
+- Only ask for calendar IDs if the user asks about a specific third party whose email/ID is not known.
+
+Always confirm the action taken and if an error occurs report it back
+for transparency and troubleshooting.
\ No newline at end of file
diff --git a/src/backend/prompts/templates/gmail/v1.txt b/src/backend/prompts/templates/gmail/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..815f72879758b76c945e0db69520daa26916800e
--- /dev/null
+++ b/src/backend/prompts/templates/gmail/v1.txt
@@ -0,0 +1,4 @@
+You are an agent authorized to use Gmail MCP tools.
+For instance you can read, search, create drafts, and send emails, etc.
+When asked to send an email, always confirm the details before sending if ambiguous,
+but if the instruction is clear, proceed.
\ No newline at end of file
diff --git a/src/backend/prompts/templates/supervisor/v1.txt b/src/backend/prompts/templates/supervisor/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..401c2d8d6a248f1a86682adc101fe771c1849a6c
--- /dev/null
+++ b/src/backend/prompts/templates/supervisor/v1.txt
@@ -0,0 +1,50 @@
+You are the **Supervisor Agent** overseeing the entire recruitment workflow.
+You act on behalf of the HR manager **Casey Jordan** (`hr.cjordan.agent.hack.winter25@gmail.com`),
+who is the only person talking to you.
+
+Understand the candidate lifecycle status flow:
+`applied` โ `cv_screened` โ `interview_scheduled` โ `decision_made`.
+
+---
+
+### ๐ฏ Your Role
+You coordinate and supervise the hiring process from CV submission to final decision.
+You have access to specialized sub-agents that handle:
+- Database operations (querying, updating, reporting)
+- CV screening and evaluation
+- Email communication (for candidates and Casey)
+- Calendar scheduling (for HR meetings and interviews)
+
+You do **not** perform these actions yourself โ instead, you **delegate** to sub-agents when needed.
+---
+
+### โ๏ธ Recruitment Process Overview
+1. **Application submitted** โ Candidate starts with status `applied`.
+2. **CV screening** โ
+ - Run `cv_screening_workflow` (updates status to `cv_screened` automatically).
+ - Ask `db_executor` to "evaluate screening results" (updates status to `cv_passed` or `cv_rejected`).
+ Here you can optionally specify a minimum passing score (default is 7.0).
+3. **Notification** โ
+ - If `cv_rejected`, send a polite rejection email.
+ - If `cv_passed`, send an email requesting available time slots for a voice or in-person interview.
+4. **Scheduling** โ
+ - Use the calendar agent to check **our (HR)** availability (`primary` calendar).
+ - You CANNOT check the candidate's calendar. You must **ask** the candidate for their preferred times via email.
+ - Once a time is agreed upon, use the calendar agent to schedule the interview.
+5. **Decision** โ Once interviews are complete, record and communicate the final decision.
+
+Always notify Casey what a status was updated to.
+---
+
+### ๐ง Reasoning & Planning Strategy
+Before calling tools, **THINK**:
+1. **Sequential Dependencies (Action A โ Action B):** If Action B requires data (like an email address), perform Action A (fetch data) first.
+ - **Example:** Before asking `gmail_agent` to send an email, you **must always** ask `db_executor` to retrieve the candidate's email address first.
+2. **Robust DB Instructions:** ALWAYS ask the `db_executor` to "**Create or update** the record" when changing status. NEVER just ask to "Update", as the record might not exist yet.
+
+
+### ๐ง Your Behavior
+- Use the available sub-agents for all database queries, screenings, email sends, and calendar operations.
+- Respond clearly, professionally and comprehensively to Caseyโs requests.
+- Always share with Casey what actions you have taken and what results were produced.
+- If you or any sub-agent encounter an error, **notify the Casey immediately** for troubleshooting.
diff --git a/src/backend/prompts/templates/supervisor/v2.txt b/src/backend/prompts/templates/supervisor/v2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9388eb1cc0aca745fe24faa55d1c1d54d53706e
--- /dev/null
+++ b/src/backend/prompts/templates/supervisor/v2.txt
@@ -0,0 +1,64 @@
+You are the **Supervisor Agent** overseeing the entire recruitment workflow. You act on behalf of the HR manager **Casey Jordan** (`hr.cjordan.agent.hack.winter25@gmail.com`), who is the only person talking to you.
+Understand the candidate lifecycle status flow:
+1. `applied` (Application received)
+2. `cv_screened` (CV Analyzed)
+3. `cv_passed` or `cv_rejected` (Outcome of CV Screening)
+4. `voice_invitation_sent` (If CV Passed)
+5. `voice_done` (Candidate completed AI Voice Interview)
+6. `voice_passed` or `voice_rejected` (Outcome of Voice Analysis)
+7. `interview_scheduled` (Final Human Interview)
+8. `decision_made` (Final Offer or Rejection)
+9. `hired` (If the decision is positive and confirmed)
+10. `rejected` (If the decision is negative)
+---
+# Your Role
+You coordinate and supervise the hiring process from CV submission to final decision.
+You have access to specialized sub-agents that handle:
+- Database operations (querying, updating, reporting)
+- CV screening and evaluation
+- Voice screening and analysis
+- Email communication (for candidates and Casey)
+- Calendar scheduling (for HR meetings and interviews)
+You do **not** perform these actions yourself: instead, you **delegate** to sub-agents when needed.
+---
+# Recruitment Process Overview
+1. **Application submitted** โ Candidate starts with status `applied`.
+2. **CV screening** โ
+ - Run `cv_screening_workflow` (updates status to `cv_screened` automatically).
+ - Ask `db_executor` to "evaluate screening results" (updates status to `cv_passed` or `cv_rejected`).
+ Here you can optionally specify a minimum passing score (default is 7.0).
+3. **Voice Screening Invitation** โ
+ - If `cv_rejected`, send a polite rejection email.
+ - If `cv_passed`, fetch `auth_code` from DB and email the candidate the voice screening invitation including this code for login.
+ - Update status to `voice_invitation_sent` via `db_executor`.
+4. **Voice Screening** โ
+ - Candidates complete the AI voice interview.
+ - The system updates status to `voice_done` automatically.
+ - Ask `voice_judge` to "evaluate voice screening results" (this automatically updates status to `voice_passed` or `voice_rejected`).
+5. **Interview Invitation (Person-to-Person)** โ
+ - If `voice_rejected`, send a polite rejection email.
+ - If `voice_passed`:
+ - Use the calendar agent to check **HR availability** for this and next week (`primary` calendar).
+ - Send a success email to the candidate suggesting these available time slots and asking for their preference.
+6. **Scheduling** โ
+ - Once the candidate replies with a preferred time, use the calendar agent to schedule the interview.
+ - Update status to `interview_scheduled`.
+7. **Final Decision** โ
+ - Once interviews are complete, record the final decision in the database.
+ - Update candidate status to `decision_made`.
+ - Create or update the `FinalDecision` record with the decision (`hired`, `rejected`, or `pending`).
+ - If `hired`, update candidate status to `hired`.
+ - If `rejected`, update candidate status to `rejected`.
+ - Communicate the result to the candidate via email.
+Always notify Casey what a status was updated to.
+---
+# Reasoning & Planning Strategy
+Before calling tools, **THINK**:
+1. **Sequential Dependencies (Action A โ Action B):** If Action B requires data (like an email address), perform Action A (fetch data) first.
+ - **Example:** Before asking `gmail_agent` to send an email, you **must always** ask `db_executor` to retrieve the candidate's email address first.
+2. **Robust DB Instructions:** ALWAYS ask the `db_executor` to "**create or update** the record" when changing status. NEVER just ask to "Update", as the record might not exist yet.
+# Your Behavior
+- Use the available sub-agents for all database queries, screenings, email sends, and calendar operations.
+- Respond clearly, professionally and comprehensively to the user's requests.
+- Always share with the user what actions you have taken and what results were produced.
+- If you or any sub-agent encounter an error, **notify the user immediately**.
\ No newline at end of file
diff --git a/src/backend/prompts/templates/voice_screening/v1.txt b/src/backend/prompts/templates/voice_screening/v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/prompts/test.py b/src/backend/prompts/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..32bd041add4a440448ede3ce66af4bc89f33e6f1
--- /dev/null
+++ b/src/backend/prompts/test.py
@@ -0,0 +1,26 @@
+import sys
+import os
+
+# Add project root to path to ensure we can import from src
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+sys.path.append(project_root)
+
+from src.backend.prompts import get_prompt
+
+def test_fetch_prompt():
+ prompt_name = "DB_Executor"
+ print(f"Attempting to fetch prompt: {prompt_name}")
+
+ try:
+ # Using latest_version=True to test the new feature and bypass env labels
+ print(f"Fetching prompt '{prompt_name}' using latest_version=True...")
+ prompt = get_prompt(prompt_name, latest_version=True)
+ print(f"\nโ
Successfully fetched '{prompt_name}':")
+ print("-" * 40)
+ print(prompt)
+ print("-" * 40)
+ except Exception as e:
+ print(f"\nโ Error fetching prompt: {e}")
+
+if __name__ == "__main__":
+ test_fetch_prompt()
diff --git a/src/backend/state/__init__.py b/src/backend/state/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/backend/state/candidate.py b/src/backend/state/candidate.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c80fc88b83ac2459422e1a9c983ce295e1a61f2
--- /dev/null
+++ b/src/backend/state/candidate.py
@@ -0,0 +1,79 @@
+import enum
+
+class CandidateStatus(str, enum.Enum):
+ """
+ Application proces and status updates
+ -------------------------------------
+ 1) CV Upload
+ -> "applied"
+
+ 2) CV Screening
+ -> "cv_screened"
+ -> "cv_passed"
+ -> "cv_rejected"
+
+ 3) Voice Screening Invitation
+ -> "voice_invitation_sent"
+
+ 4) Voice Screening
+ -> "voice_done"
+ -> "voice_passed"
+ -> "voice_rejected"
+
+ 5) Interview Scheduling
+ -> "interview_scheduled"
+ -> "interview_passed"
+ -> "interview_rejected"
+
+ 6) Final Decision
+ -> "decision_made"
+ -> "hired"
+ -> "rejected"
+ """
+ applied = "applied"
+ cv_screened = "cv_screened"
+ cv_passed = "cv_passed"
+ cv_rejected = "cv_rejected"
+ voice_invitation_sent = "voice_invitation_sent"
+ voice_done = "voice_done"
+ voice_passed = "voice_passed"
+ voice_rejected = "voice_rejected"
+ interview_scheduled = "interview_scheduled"
+ interview_passed = "interview_passed"
+ interview_rejected = "interview_rejected"
+ decision_made = "decision_made"
+ hired = "hired"
+ rejected = "rejected"
+
+
+class InterviewStatus(str, enum.Enum):
+ """
+ Person-to-Person Interview
+ -------------------------------------
+ 5) Interview Scheduling
+ -> "interview_scheduled"
+ -> "interview_completed"
+ -> "interview_cancelled"
+
+ -> "interview_passed"
+ -> "interview_rejected"
+ """
+ scheduled = "scheduled"
+ completed = "completed"
+ cancelled = "cancelled"
+ passed = "passed"
+ rejected = "rejected"
+
+
+class DecisionStatus(str, enum.Enum):
+ """
+ Final Decision
+ -------------------------------------
+ 6) Decision Made
+ -> "hired"
+ -> "rejected"
+ -> "pending"
+ """
+ hired = "hired"
+ rejected = "rejected"
+ pending = "pending"
diff --git a/src/frontend/gradio/app.py b/src/frontend/gradio/app.py
index ac1e239fc83b1fc95f85d5ae755e7f16ae91e70b..5a5b421843ff4e3d1e3747bdd9c62917ba222a91 100644
--- a/src/frontend/gradio/app.py
+++ b/src/frontend/gradio/app.py
@@ -5,11 +5,10 @@ Requires Gradio 6.0+
import os
import gradio as gr
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Dict, Any
import sys
from pathlib import Path
-import requests
-import uuid
+from uuid import uuid4
project_root = Path(__file__).resolve().parent.parent.parent.parent
sys.path.insert(0, str(project_root))
@@ -57,78 +56,40 @@ def get_api_url(service: str) -> str:
return f"https://{space_id}.hf.space/api/v1/{api_path}"
return f"http://localhost:8080/api/v1/{api_path}"
-def get_voice_screening_url() -> str:
- """Get the URL for the Streamlit voice screening page."""
- voice_url = os.getenv("VOICE_SCREENING_UI_URL", "http://localhost:8502")
- return voice_url
-
-def get_proxy_url(for_client: bool = False) -> str:
- """Get WebSocket proxy URL from environment or default."""
- proxy_url = os.getenv("WEBSOCKET_PROXY_URL", "ws://localhost:8000/ws/realtime")
- if for_client:
- if "websocket_proxy" in proxy_url:
- proxy_url = proxy_url.replace("websocket_proxy", "localhost")
- return proxy_url
-
-def get_proxy_base_url() -> str:
- """Get HTTP base URL for proxy API calls."""
- proxy_url = get_proxy_url(for_client=False)
- return proxy_url.replace("ws://", "http://").replace("wss://", "https://").replace("/ws/realtime", "")
-
-def authenticate_voice_screening(email: str, auth_code: str) -> Tuple[str, Optional[str], Optional[str]]:
- """Authenticate user for voice screening. Returns (status_message, session_token, candidate_id)."""
- if not email or not auth_code:
- return "โ Please enter both email and authentication code.", None, None
- try:
- proxy_base = get_proxy_base_url()
- response = requests.post(
- f"{proxy_base}/auth/verify",
- json={"email": email, "code": auth_code},
- timeout=5
- )
- if response.status_code == 200:
- data = response.json()
- session_token = data.get("session_token")
- candidate_id = data.get("candidate_id")
- return f"โ
Authentication successful! Session token: {session_token[:20]}...", session_token, candidate_id
- else:
- error_data = response.json() if response.content else {}
- return f"โ Authentication failed: {error_data.get('detail', response.text)}", None, None
- except Exception as e:
- return f"โ Error connecting to proxy: {str(e)}", None, None
-
# ============================================================================
# CANDIDATE APPLICATION PORTAL
# ============================================================================
-def submit_application(full_name: str, email: str, phone: str, cv_file) -> str:
+def submit_application(full_name: str, email: str, phone: str, cv_file, session_state: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return "โ SDK not available. Please check backend connection."
+ return "โ SDK not available. Please check backend connection.", ensure_session(session_state)
+ session = ensure_session(session_state)
if not full_name or not email:
- return "โ Full name and email are required."
+ return "โ Full name and email are required.", session
if not cv_file:
- return "โ Please upload your CV (PDF or DOCX)."
+ return "โ Please upload your CV (PDF or DOCX).", session
try:
- client = CVUploadClient(base_url=get_api_url("cv"))
+ client = CVUploadClient(base_url=get_api_url("cv"), session_id=session["session_id"])
file_path = cv_file.name if hasattr(cv_file, 'name') else str(cv_file)
filename = Path(file_path).name
with open(file_path, 'rb') as f:
response = client.submit(full_name=full_name, email=email, phone=phone or "", cv_file=f, filename=filename)
if response.success:
- return f"โ
{response.message}\n\nYour application has been recorded."
+ return f"โ
{response.message}\n\nYour application has been recorded.", session
elif response.already_exists:
- return f"โ ๏ธ {response.message}\n\nPlease wait for review."
- return f"โ {response.message}"
+ return f"โ ๏ธ {response.message}\n\nPlease wait for review.", session
+ return f"โ {response.message}", session
except Exception as e:
- return f"โ Failed to submit application: {str(e)}"
+ return f"โ Failed to submit application: {str(e)}", session
-def check_application_status(email: str) -> str:
+def check_application_status(email: str, session_state: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return "โ SDK not available."
+ return "โ SDK not available.", ensure_session(session_state)
+ session = ensure_session(session_state)
if not email:
- return "โ Please enter your email address."
+ return "โ Please enter your email address.", session
try:
- client = DatabaseClient(base_url=get_api_url("database"))
+ client = DatabaseClient(base_url=get_api_url("database"), session_id=session["session_id"])
response = client.get_candidate_by_email(email, include_relations=True)
if response.success and response.data:
c = response.data
@@ -143,25 +104,26 @@ def check_application_status(email: str) -> str:
info += f"**Interview:** {c['interview_scheduling'][0].get('status', 'Scheduled')}\n\n"
if c.get('final_decision'):
info += f"**Decision:** {c['final_decision'].get('decision', 'Pending')}"
- return info
- return f"โ No application found for {email}."
+ return info, session
+ return f"โ No application found for {email}.", session
except Exception as e:
- return f"โ Error: {str(e)}"
+ return f"โ Error: {str(e)}", session
# ============================================================================
# HR PORTAL
# ============================================================================
-def load_candidates(status_filter: Optional[str] = None) -> str:
+def load_candidates(status_filter: Optional[str] = None, session_state: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return "โ SDK not available."
+ return "โ SDK not available.", ensure_session(session_state)
+ session = ensure_session(session_state)
try:
- client = DatabaseClient(base_url=get_api_url("database"))
+ client = DatabaseClient(base_url=get_api_url("database"), session_id=session["session_id"])
response = client.get_candidates(status=status_filter if status_filter != "All" else None, limit=100, include_relations=True)
if response.success and response.data:
candidates = response.data
if not candidates:
- return "No candidates found."
+ return "No candidates found.", session
table = "| Name | Email | Status | Applied | Voice |\n|------|-------|--------|---------|-------|\n"
for c in candidates:
name = c.get('full_name', 'Unknown')
@@ -170,142 +132,55 @@ def load_candidates(status_filter: Optional[str] = None) -> str:
applied = str(c.get('created_at', 'N/A'))[:10]
voice = "โ
" if c.get('voice_screening_results') else "โ"
table += f"| {name} | {email} | {status} | {applied} | {voice} |\n"
- return f"**Found {len(candidates)} candidate(s)**\n\n{table}"
- return "No candidates found."
+ return f"**Found {len(candidates)} candidate(s)**\n\n{table}", session
+ return "No candidates found.", session
except Exception as e:
- return f"โ Error: {str(e)}"
+ return f"โ Error: {str(e)}", session
-def trigger_voice_screening(candidate_email: str) -> str:
+def trigger_voice_screening(candidate_email: str, session_state: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return "โ SDK not available."
+ return "โ SDK not available.", ensure_session(session_state)
+ session = ensure_session(session_state)
if not candidate_email:
- return "โ Please enter candidate email."
+ return "โ Please enter candidate email.", session
try:
- # First, get candidate info to retrieve candidate_id and auth_code
- db_client = DatabaseClient(base_url=get_api_url("database"))
- candidate_response = db_client.get_candidate_by_email(candidate_email, include_relations=False)
-
- if not candidate_response.success or not candidate_response.data:
- return f"โ Candidate not found with email: {candidate_email}"
-
- candidate = candidate_response.data
- candidate_id = candidate.get('id')
- candidate_name = candidate.get('full_name', 'Unknown')
- auth_code = candidate.get('auth_code')
-
- if not candidate_id:
- return f"โ Could not retrieve candidate ID for {candidate_email}"
-
- # Create voice screening session
- voice_api_url = get_api_url("voice-screener")
- session_response = requests.post(
- f"{voice_api_url}/session/create",
- json={"candidate_id": candidate_id},
- timeout=10
- )
-
- if session_response.status_code != 200:
- error_detail = session_response.json().get('detail', 'Unknown error') if session_response.content else 'Unknown error'
- return f"โ Failed to create voice screening session: {error_detail}"
-
- session_data = session_response.json()
- session_id = session_data.get('session_id')
-
- # Construct the Streamlit URL with candidate_id
- voice_screening_url = get_voice_screening_url()
- redirect_url = f"{voice_screening_url}?candidate_id={candidate_id}"
-
- # Build HTML response message
- result = f"""
-
- โ
Voice Screening Session Created
- Candidate: {candidate_name}
- Email: {candidate_email}
- Session ID: {session_id}
- """
-
- if auth_code:
- result += f"""
-
- ๐ Authentication Code:
- {auth_code}
- Share this code with the candidate to access voice screening
-
- """
-
- result += f"""
-
- ๐๏ธ Voice Screening URL:
-
-
-
-
-
- ๐ Instructions:
-
- - Share the authentication code with the candidate
- - Direct them to the Voice Screening URL above (or click the button)
- - They will enter their email ({candidate_email}) and the auth code to start the interview
-
-
-
- """
-
- # Also notify supervisor agent
- try:
- supervisor_client = SupervisorClient(base_url=get_api_url("supervisor"))
- thread_id = supervisor_client.new_chat()
- supervisor_response = supervisor_client.chat(
- message=f"Voice screening session created for candidate {candidate_name} ({candidate_email}). Session ID: {session_id}. Please proceed with the voice screening process.",
- thread_id=thread_id
- )
- if supervisor_response.token_count:
- result += f'๐ Supervisor tokens: {supervisor_response.token_count:,}
'
- except Exception as e:
- # Don't fail if supervisor notification fails
- pass
-
- return result
- except requests.exceptions.RequestException as e:
- return f"โ Network error: {str(e)}"
+ client = SupervisorClient(base_url=get_api_url("supervisor"), session_id=session["session_id"])
+ thread_id = client.new_chat()
+ response = client.chat(message=f"Please trigger voice screening for candidate with email {candidate_email}", thread_id=thread_id)
+ token_info = f"\n\n๐ Tokens: {response.token_count:,}" if response.token_count else ""
+ return f"โ
Voice screening triggered!\n\n{response.content}{token_info}", session
except Exception as e:
- return f"โ Failed: {str(e)}"
+ return f"โ Failed: {str(e)}", session
-def schedule_interview(candidate_email: str) -> str:
+def schedule_interview(candidate_email: str, session_state: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return "โ SDK not available."
+ return "โ SDK not available.", ensure_session(session_state)
+ session = ensure_session(session_state)
if not candidate_email:
- return "โ Please enter candidate email."
+ return "โ Please enter candidate email.", session
try:
- client = SupervisorClient(base_url=get_api_url("supervisor"))
+ client = SupervisorClient(base_url=get_api_url("supervisor"), session_id=session["session_id"])
thread_id = client.new_chat()
response = client.chat(message=f"Please schedule an interview for candidate with email {candidate_email}", thread_id=thread_id)
token_info = f"\n\n๐ Tokens: {response.token_count:,}" if response.token_count else ""
- return f"โ
Interview scheduling initiated!\n\n{response.content}{token_info}"
+ return f"โ
Interview scheduling initiated!\n\n{response.content}{token_info}", session
except Exception as e:
- return f"โ Failed: {str(e)}"
+ return f"โ Failed: {str(e)}", session
# ============================================================================
-# SUPERVISOR AGENT CHAT
+# SUPERVISOR AGENT CHAT (per-user state via session dict)
# ============================================================================
-class ChatState:
- def __init__(self):
- self.thread_id: Optional[str] = None
- self.messages: list = []
- self.total_tokens: int = 0
- def reset(self):
- self.thread_id = None
- self.messages = []
- self.total_tokens = 0
-
-chat_state = ChatState()
+def ensure_session(state: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+ """Ensure a per-user session dict exists with a unique session_id."""
+ if state is None:
+ state = {}
+ if not state.get("session_id"):
+ state["session_id"] = uuid4().hex
+ state.setdefault("thread_id", None)
+ state.setdefault("messages", [])
+ state.setdefault("total_tokens", 0)
+ return state
def format_chat_history(messages: list) -> str:
if not messages:
@@ -318,15 +193,16 @@ def format_chat_history(messages: list) -> str:
formatted.append(f"๐ค **Assistant**\n\n{content}")
return "\n\n---\n\n".join(formatted)
-def init_chat() -> Tuple[str, str]:
+def init_chat(session_state: Optional[Dict[str, Any]] = None) -> Tuple[str, str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return "โ SDK not available.", "๐ Tokens: 0"
+ return "โ SDK not available.", "๐ Tokens: 0", ensure_session(session_state)
+ session = ensure_session(session_state)
try:
- client = SupervisorClient(base_url=get_api_url("supervisor"))
+ client = SupervisorClient(base_url=get_api_url("supervisor"), session_id=session["session_id"])
thread_id = client.new_chat()
- chat_state.thread_id = thread_id
- chat_state.messages = []
- chat_state.total_tokens = 0
+ session["thread_id"] = thread_id
+ session["messages"] = []
+ session["total_tokens"] = 0
welcome = """Hello! I'm the HR Supervisor Agent. I can help you with:
- **Querying** candidate information
@@ -335,29 +211,30 @@ def init_chat() -> Tuple[str, str]:
- **Managing** the recruitment pipeline
What would you like to know?"""
- chat_state.messages.append(("assistant", welcome))
- return format_chat_history(chat_state.messages), "๐ Tokens: 0"
+ session["messages"].append(("assistant", welcome))
+ return format_chat_history(session["messages"]), "๐ Tokens: 0", session
except Exception as e:
- return f"โ Failed to initialize: {str(e)}", "๐ Tokens: 0"
+ return f"โ Failed to initialize: {str(e)}", "๐ Tokens: 0", session
-def chat_with_supervisor(message: str, history: str) -> Tuple[str, str, str]:
+def chat_with_supervisor(message: str, history: str, session_state: Optional[Dict[str, Any]]) -> Tuple[str, str, str, Dict[str, Any]]:
if not SDK_AVAILABLE:
- return history, "โ SDK not available.", ""
+ return history, "โ SDK not available.", "", ensure_session(session_state)
+ session = ensure_session(session_state)
if not message.strip():
- return history, f"๐ Tokens: {chat_state.total_tokens:,}", ""
- if not chat_state.thread_id:
- init_chat()
+ return history, f"๐ Tokens: {session['total_tokens']:,}", "", session
+ if not session.get("thread_id"):
+ _, _, session = init_chat(session)
try:
- client = SupervisorClient(base_url=get_api_url("supervisor"))
- chat_state.messages.append(("user", message))
- response = client.chat(message=message, thread_id=chat_state.thread_id)
- chat_state.messages.append(("assistant", response.content))
- chat_state.total_tokens += response.token_count or 0
- return format_chat_history(chat_state.messages), f"๐ Tokens: {chat_state.total_tokens:,}", ""
+ client = SupervisorClient(base_url=get_api_url("supervisor"), session_id=session["session_id"])
+ session["messages"].append(("user", message))
+ response = client.chat(message=message, thread_id=session["thread_id"])
+ session["messages"].append(("assistant", response.content))
+ session["total_tokens"] += response.token_count or 0
+ return format_chat_history(session["messages"]), f"๐ Tokens: {session['total_tokens']:,}", "", session
except Exception as e:
error_msg = f"โ Error: {str(e)}"
- chat_state.messages.append(("assistant", error_msg))
- return format_chat_history(chat_state.messages), f"๐ Tokens: {chat_state.total_tokens:,}", ""
+ session["messages"].append(("assistant", error_msg))
+ return format_chat_history(session["messages"]), f"๐ Tokens: {session['total_tokens']:,}", "", session
# ============================================================================
# CUSTOM CSS
@@ -831,6 +708,9 @@ def create_app():
AI-Powered Recruitment System
""")
+
+ # Per-user session state (persists across interactions)
+ session_state = gr.State(value=None)
with gr.Tabs():
# ============================================================
@@ -840,18 +720,6 @@ def create_app():
gr.Markdown("## ๐ Submit Your Application")
gr.HTML('Welcome! We\'re seeking talented engineers. Submit your CV below to start your application.')
- # Voice screening link for candidates
- voice_screening_url = get_voice_screening_url()
- gr.HTML(f"""
-
- ๐๏ธ Complete Your Voice Screening:
-
- Start Voice Screening Interview โ {voice_screening_url}
-
-
After submitting your application, you can complete your voice screening interview here.
-
- """)
-
with gr.Row():
with gr.Column():
full_name = gr.Textbox(label="Full Name", placeholder="Ada Lovelace")
@@ -866,9 +734,8 @@ def create_app():
submit_btn.click(
fn=submit_application,
- inputs=[full_name, email, phone, cv_file],
- outputs=application_output,
- show_progress="full"
+ inputs=[full_name, email, phone, cv_file, session_state],
+ outputs=[application_output, session_state]
)
gr.Markdown("---")
@@ -879,7 +746,7 @@ def create_app():
check_btn = gr.Button("๐ Check Status", variant="secondary", scale=1)
status_output = gr.Markdown()
- check_btn.click(fn=check_application_status, inputs=status_email, outputs=status_output)
+ check_btn.click(fn=check_application_status, inputs=[status_email, session_state], outputs=[status_output, session_state])
# ============================================================
# TAB 2: HR Portal
@@ -897,21 +764,17 @@ def create_app():
load_btn = gr.Button("๐ Load Candidates", variant="primary", scale=1)
candidates_output = gr.Markdown()
- load_btn.click(fn=load_candidates, inputs=status_filter, outputs=candidates_output)
+ load_btn.click(fn=load_candidates, inputs=[status_filter, session_state], outputs=[candidates_output, session_state])
gr.Markdown("---")
gr.Markdown("## ๐๏ธ Voice Screening")
- gr.HTML('''
- Note: Use the "๐๏ธ Voice Screening" tab to access the voice interview interface.
- Trigger voice screening for a candidate below to generate their authentication code.
- ''')
with gr.Row():
voice_email = gr.Textbox(label="Candidate Email", placeholder="candidate@example.com", scale=3)
voice_btn = gr.Button("๐๏ธ Trigger Screening", variant="secondary", scale=1)
- voice_output = gr.HTML(elem_classes=["no-scroll-output"])
- voice_btn.click(fn=trigger_voice_screening, inputs=voice_email, outputs=voice_output)
+ voice_output = gr.Markdown(elem_classes=["no-scroll-output"])
+ voice_btn.click(fn=trigger_voice_screening, inputs=[voice_email, session_state], outputs=[voice_output, session_state])
gr.Markdown("---")
gr.Markdown("## ๐
Interview Scheduling")
@@ -921,103 +784,10 @@ def create_app():
interview_btn = gr.Button("๐
Schedule Interview", variant="secondary", scale=1)
interview_output = gr.Markdown(elem_classes=["no-scroll-output"])
- interview_btn.click(fn=schedule_interview, inputs=interview_email, outputs=interview_output)
-
- # ============================================================
- # TAB 3: Voice Screening
- # ============================================================
- with gr.Tab("๐๏ธ Voice Screening"):
- gr.Markdown("## ๐๏ธ Voice Screening Interview")
- gr.HTML('''
- Instructions: Enter your email and authentication code to start the voice screening interview.
- You will receive the authentication code when HR triggers voice screening for you.
- ''')
-
- # Authentication section
- with gr.Row():
- with gr.Column():
- gr.Markdown("### ๐ Authentication")
- vs_email = gr.Textbox(label="Email", placeholder="your.email@example.com")
- vs_auth_code = gr.Textbox(label="Authentication Code", placeholder="Enter your 6-digit code", type="password")
- vs_auth_btn = gr.Button("โ
Authenticate", variant="primary")
- vs_auth_status = gr.Markdown()
-
- # Hidden components for session management
- vs_session_token = gr.State()
- vs_candidate_id = gr.State()
- vs_session_id = gr.State()
-
- # Voice interface (shown after authentication)
- voice_interface_row = gr.Row(visible=False)
- with voice_interface_row:
- with gr.Column():
- gr.Markdown("### ๐ค Voice Interview")
- voice_interface_html = gr.HTML()
- transcript_display = gr.Markdown("### ๐ Transcript\n\n*Transcript will appear here during the interview...*")
-
- with gr.Row():
- start_interview_btn = gr.Button("๐ Start Interview", variant="primary")
- end_interview_btn = gr.Button("โน๏ธ End Interview", variant="secondary")
-
- def handle_authentication(email: str, auth_code: str):
- """Handle voice screening authentication."""
- status_msg, session_token, candidate_id = authenticate_voice_screening(email, auth_code)
- if session_token:
- session_id = str(uuid.uuid4())
- return (
- status_msg,
- gr.update(visible=True), # Show voice interface
- session_token,
- candidate_id or "",
- session_id
- )
- return (
- status_msg,
- gr.update(visible=False), # Hide voice interface
- None,
- None,
- None
- )
-
- def start_interview(session_token, candidate_id, session_id):
- """Start the voice interview and load HTML component."""
- if not session_token:
- return "โ Please authenticate first.", gr.HTML()
-
- # Load the HTML component
- html_file_path = Path(__file__).parent.parent / "streamlit" / "voice_screening_ui" / "components" / "voice_interface.html"
-
- if not html_file_path.exists():
- return "โ Voice interface component not found.", gr.HTML("Voice interface HTML file not found.
")
-
- with open(html_file_path, 'r', encoding='utf-8') as f:
- html_content = f.read()
-
- # Get proxy URL
- proxy_url = get_proxy_url(for_client=True)
- ws_url = f"{proxy_url}?token={session_token}"
-
- # Replace placeholders
- html_content = html_content.replace("{{SESSION_ID}}", session_id or "")
- html_content = html_content.replace("{{SESSION_TOKEN}}", session_token)
- html_content = html_content.replace("{{PROXY_URL}}", ws_url)
-
- return "โ
Interview started! Use the microphone button to speak.", gr.HTML(html_content)
-
- vs_auth_btn.click(
- fn=handle_authentication,
- inputs=[vs_email, vs_auth_code],
- outputs=[vs_auth_status, voice_interface_row, vs_session_token, vs_candidate_id, vs_session_id]
- )
-
- start_interview_btn.click(
- fn=start_interview,
- inputs=[vs_session_token, vs_candidate_id, vs_session_id],
- outputs=[vs_auth_status, voice_interface_html]
- )
+ interview_btn.click(fn=schedule_interview, inputs=[interview_email, session_state], outputs=[interview_output, session_state])
# ============================================================
- # TAB 4: Supervisor Chat
+ # TAB 3: Supervisor Chat
# ============================================================
with gr.Tab("๐ค Supervisor Chat"):
gr.Markdown("## ๐ฌ Chat with HR Supervisor Agent")
@@ -1049,13 +819,14 @@ def create_app():
""")
# Initialize chat on load with auto-scroll
- def init_chat_with_scroll():
- hist, tokens = init_chat()
- return hist, tokens
+ def init_chat_with_scroll(state):
+ hist, tokens, new_state = init_chat(state)
+ return hist, tokens, new_state
app.load(
fn=init_chat_with_scroll,
- outputs=[chat_history, token_info]
+ inputs=[session_state],
+ outputs=[chat_history, token_info, session_state]
).then(
fn=None,
js="""
@@ -1073,8 +844,8 @@ def create_app():
# Send message with auto-scroll
send_btn.click(
fn=chat_with_supervisor,
- inputs=[chat_input, chat_history],
- outputs=[chat_history, token_info, chat_input]
+ inputs=[chat_input, chat_history, session_state],
+ outputs=[chat_history, token_info, chat_input, session_state]
).then(
fn=None,
js="""
@@ -1093,7 +864,8 @@ def create_app():
# New chat with auto-scroll
new_chat_btn.click(
fn=init_chat,
- outputs=[chat_history, token_info]
+ inputs=[session_state],
+ outputs=[chat_history, token_info, session_state]
).then(
fn=None,
js="""
@@ -1121,12 +893,17 @@ if __name__ == "__main__":
print(f"Gradio version: {gr.__version__}")
app = create_app()
+ # Honor PORT if provided by hosting platform (e.g., Hugging Face Spaces)
+ # Some platforms inject quotes around PORT (e.g., "\"7860\""); strip them.
+ raw_port = os.getenv("PORT", "7860").strip().strip("\"'")
+ port = int(raw_port)
+
# In Gradio 6, theme and css are passed to launch(), not Blocks()
app.launch(
server_name="0.0.0.0",
- server_port=7860,
+ server_port=port,
theme=THEME,
css=CUSTOM_CSS,
# Try to force light mode if available
# dark_mode=False, # Uncomment if supported in your version
- )
\ No newline at end of file
+ )
diff --git a/src/frontend/streamlit/voice_screening_ui/proxy.py b/src/frontend/streamlit/voice_screening_ui/proxy.py
index b767f2956538780c0973440a7db04e56fd239201..fa33dfdecc24b43751ae974c076a0a07b49fb4c3 100644
--- a/src/frontend/streamlit/voice_screening_ui/proxy.py
+++ b/src/frontend/streamlit/voice_screening_ui/proxy.py
@@ -22,8 +22,8 @@ from dotenv import load_dotenv
from sqlalchemy import select
# Import database client and models
-from src.database.candidates.client import SessionLocal
-from src.database.candidates.models import Candidate
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate
load_dotenv()
diff --git a/src/mcp_servers/calendar-mcp/src/calendar_actions.py b/src/mcp_servers/calendar-mcp/src/calendar_actions.py
index 19c424a94690d9e82862263fbdf2de871fb9a9c2..fac944cf2939a6542260421639f97a23ad6255f3 100644
--- a/src/mcp_servers/calendar-mcp/src/calendar_actions.py
+++ b/src/mcp_servers/calendar-mcp/src/calendar_actions.py
@@ -1,6 +1,4 @@
import logging
-import os
-import tempfile
from typing import List, Optional, Dict, Any
from google.oauth2.credentials import Credentials
@@ -8,40 +6,6 @@ from googleapiclient.discovery import build
logger = logging.getLogger(__name__)
-_CALENDAR_CREDS_FILE: Optional[str] = None
-_CALENDAR_TOKEN_FILE: Optional[str] = None
-
-
-def _write_temp(prefix: str, content: str) -> str:
- with tempfile.NamedTemporaryFile(delete=False, prefix=f"{prefix}_", suffix=".json") as tmp:
- tmp.write(content.encode("utf-8"))
- return tmp.name
-
-
-def _get_calendar_paths() -> tuple[str, str]:
- """
- Resolve credential + token paths from environment.
- Expects raw JSON content in CALENDAR_CREDS_JSON and CALENDAR_TOKEN_JSON.
- """
- global _CALENDAR_CREDS_FILE, _CALENDAR_TOKEN_FILE
-
- if _CALENDAR_CREDS_FILE and _CALENDAR_TOKEN_FILE:
- return _CALENDAR_TOKEN_FILE, _CALENDAR_CREDS_FILE
-
- token_json = os.getenv("CALENDAR_TOKEN_JSON")
- creds_json = os.getenv("CALENDAR_CREDS_JSON")
-
- if not token_json or not creds_json:
- raise RuntimeError(
- "Missing calendar credential JSON. "
- "Set CALENDAR_TOKEN_JSON and CALENDAR_CREDS_JSON environment variables "
- "to the raw file contents."
- )
-
- _CALENDAR_TOKEN_FILE = _write_temp("calendar_token", token_json)
- _CALENDAR_CREDS_FILE = _write_temp("calendar_creds", creds_json)
- return _CALENDAR_TOKEN_FILE, _CALENDAR_CREDS_FILE
-
def _load_credentials(token_path: str, scopes: List[str], client_secret_path: Optional[str] = None) -> Credentials:
"""
@@ -75,11 +39,10 @@ def _calendar_service(creds: Credentials):
# ------------------------------
def list_calendars(min_access_role: Optional[str] = None) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -99,11 +62,10 @@ def find_events(
max_results: int,
) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -139,11 +101,10 @@ def create_event(
attendee_emails: Optional[List[str]],
) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -170,11 +131,10 @@ def create_event(
def quick_add_event(calendar_id: str, text: str) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -196,11 +156,10 @@ def update_event(
location: Optional[str],
) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -232,11 +191,10 @@ def update_event(
def delete_event(calendar_id: str, event_id: str) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -250,11 +208,10 @@ def delete_event(calendar_id: str, event_id: str) -> Dict[str, Any]:
def add_attendee(calendar_id: str, event_id: str, attendee_emails: List[str]) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -281,11 +238,10 @@ def add_attendee(calendar_id: str, event_id: str, attendee_emails: List[str]) ->
def check_attendee_status(event_id: str, calendar_id: str, attendee_emails: Optional[List[str]]) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -305,11 +261,10 @@ def check_attendee_status(event_id: str, calendar_id: str, attendee_emails: Opti
def query_free_busy(calendar_ids: List[str], time_min: str, time_max: str) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
@@ -348,11 +303,10 @@ def analyze_busyness(time_min: str, time_max: str, calendar_id: str) -> Dict[str
def create_calendar(summary: str) -> Dict[str, Any]:
- token_path, client_secret_path = _get_calendar_paths()
creds = _load_credentials(
- token_path=token_path,
+ token_path="./secrets/gcalendar-mcp/calendar_token.json",
scopes=["https://www.googleapis.com/auth/calendar"],
- client_secret_path=client_secret_path,
+ client_secret_path="./secrets/gcalendar-mcp/calendar_credentials.json",
)
service = _calendar_service(creds)
diff --git a/src/mcp_servers/examples/gcalendar/interact_calendar.py b/src/mcp_servers/examples/gcalendar/interact_calendar.py
index 22bed8d9e8582164820497a429384b92229d1c07..23b94291390e1556a0da359ca75a62c04d400e25 100644
--- a/src/mcp_servers/examples/gcalendar/interact_calendar.py
+++ b/src/mcp_servers/examples/gcalendar/interact_calendar.py
@@ -22,7 +22,8 @@ MODEL = ChatOpenAI(model="gpt-4o", temperature=0)
settings = GoogleCalendarSettings()
CALENDAR_MCP_DIR = settings.calendar_mcp_dir
-CREDS, TOKEN = settings.materialize_files()
+CREDS = settings.creds
+TOKEN = settings.token
async def main():
client = MultiServerMCPClient({
diff --git a/src/mcp_servers/examples/gcalendar/list_server_tools.py b/src/mcp_servers/examples/gcalendar/list_server_tools.py
index 9ee70f4e517bf70a249cadb30efa1a12043ba818..ceb4ff967a1dcbaaaf0e2b012d495af0d6d5ab9d 100644
--- a/src/mcp_servers/examples/gcalendar/list_server_tools.py
+++ b/src/mcp_servers/examples/gcalendar/list_server_tools.py
@@ -21,7 +21,8 @@ from .settings import GoogleCalendarSettings
settings = GoogleCalendarSettings()
CALENDAR_MCP_DIR = settings.calendar_mcp_dir
-CREDS, TOKEN = settings.materialize_files()
+CREDS = settings.creds
+TOKEN = settings.token
#settings = GoogleCalendarSettings()
diff --git a/src/mcp_servers/examples/gcalendar/settings.py b/src/mcp_servers/examples/gcalendar/settings.py
index b474c3cb7a2bdc323894f216b9ba908f17299aa7..7d95cd40a3b5ecabffbe33ad3535d1bdf3ef9037 100644
--- a/src/mcp_servers/examples/gcalendar/settings.py
+++ b/src/mcp_servers/examples/gcalendar/settings.py
@@ -1,48 +1,19 @@
from pathlib import Path
-from tempfile import NamedTemporaryFile
-from pydantic import Field, PrivateAttr
+from pydantic import Field
from pydantic_settings import BaseSettings
BASE_PATH = Path(__file__).resolve().parents[4] # goes up to project root
-
class GoogleCalendarSettings(BaseSettings):
- """Settings for Google Calendar MCP server."""
-
- creds_json: str = Field(..., env="CALENDAR_CREDS_JSON")
- token_json: str = Field(..., env="CALENDAR_TOKEN_JSON")
+ """Settings for Gmail MCP server."""
+ creds: Path = Field(default_factory=lambda: BASE_PATH / "secrets/gcalendar-mcp/calendar_credentials.json")
+ token: Path = Field(default_factory=lambda: BASE_PATH / "secrets/gcalendar-mcp/calendar_token.json")
calendar_mcp_dir: Path = Field(default=BASE_PATH / "src/mcp_servers/calendar-mcp")
- _creds_file: Path | None = PrivateAttr(default=None)
- _token_file: Path | None = PrivateAttr(default=None)
-
- model_config = {
- "env_file": ".env",
- "env_file_encoding": "utf-8",
- }
-
- @staticmethod
- def _write_temp(prefix: str, content: str) -> Path:
- with NamedTemporaryFile(delete=False, prefix=f"{prefix}_", suffix=".json") as tmp:
- tmp.write(content.encode("utf-8"))
- return Path(tmp.name)
-
- def materialize_files(self) -> tuple[Path, Path]:
- """
- Writes the credential and token JSON from env to temp files
- and returns their paths for downstream tooling that expects paths.
- """
- if self._creds_file is None:
- self._creds_file = self._write_temp("gcalendar_creds", self.creds_json)
- if self._token_file is None:
- self._token_file = self._write_temp("gcalendar_token", self.token_json)
- return self._creds_file, self._token_file
-
if __name__ == "__main__":
settings = GoogleCalendarSettings()
- creds_path, token_path = settings.materialize_files()
print(settings)
- print(creds_path)
- print(token_path)
+ print(settings.creds)
+ print(settings.token)
\ No newline at end of file
diff --git a/src/mcp_servers/examples/gmail/list_server_tools.py b/src/mcp_servers/examples/gmail/list_server_tools.py
index 290434b53472c06143c7d59588a679d3749a3a8b..821e436cc20f65ac59cc9405e4848d9ae8ba3d77 100644
--- a/src/mcp_servers/examples/gmail/list_server_tools.py
+++ b/src/mcp_servers/examples/gmail/list_server_tools.py
@@ -15,21 +15,20 @@ from .settings import GMailSettings
UV_PATH = "/Users/sebastianwefers/.local/bin/uv" # <= full path to uv (important)
settings = GMailSettings()
-CREDS_PATH, TOKEN_PATH = settings.materialize_files()
async def main():
client = MultiServerMCPClient({
"gmail": {
- "command": UV_PATH,
- "args": [
- "--directory", str(settings.gmail_mcp_dir),
- "run", "gmail",
- "--creds-file-path", str(CREDS_PATH),
- "--token-path", str(TOKEN_PATH)
- ],
- "transport": "stdio",
- }
-})
+ "command": UV_PATH,
+ "args": [
+ "--directory", str(settings.gmail_mcp_dir),
+ "run", "gmail",
+ "--creds-file-path", str(settings.creds),
+ "--token-path", str(settings.token)
+ ],
+ "transport": "stdio",
+ }
+ })
tools = await client.get_tools()
print("\n๐ฌ Tools exposed by Gmail MCP server:\n")
diff --git a/src/mcp_servers/examples/gmail/send_email.py b/src/mcp_servers/examples/gmail/send_email.py
index 5fbcfb7c1315b269ef5f6b83b8ab21d3e55477fd..672c36bad470ca0acb095688492a770cdfbb5bf2 100644
--- a/src/mcp_servers/examples/gmail/send_email.py
+++ b/src/mcp_servers/examples/gmail/send_email.py
@@ -44,7 +44,6 @@ UV_PATH = "/Users/sebastianwefers/.local/bin/uv" # <= full path to uv (importan
MODEL = ChatOpenAI(model="gpt-4o", temperature=0)
settings = GMailSettings()
-CREDS_PATH, TOKEN_PATH = settings.materialize_files()
async def main():
@@ -56,8 +55,8 @@ async def main():
"args": [
"--directory", str(settings.gmail_mcp_dir),
"run", "gmail",
- "--creds-file-path", str(CREDS_PATH),
- "--token-path", str(TOKEN_PATH),
+ "--creds-file-path", str(settings.creds),
+ "--token-path", str(settings.token),
],
"transport": "stdio",
}
@@ -94,4 +93,4 @@ async def main():
print("~~~ END RESULT ~~~")
if __name__ == "__main__":
- asyncio.run(main())
+ asyncio.run(main())
\ No newline at end of file
diff --git a/src/mcp_servers/examples/gmail/settings.py b/src/mcp_servers/examples/gmail/settings.py
index 6b4b24ddb0befd450b3a8ffb361ebd7be3347b20..6ad82e3aa7adb73f453db9f956d1b3cca471b3f7 100644
--- a/src/mcp_servers/examples/gmail/settings.py
+++ b/src/mcp_servers/examples/gmail/settings.py
@@ -1,48 +1,19 @@
from pathlib import Path
-from tempfile import NamedTemporaryFile
-from pydantic import Field, PrivateAttr
+from pydantic import Field
from pydantic_settings import BaseSettings
BASE_PATH = Path(__file__).resolve().parents[4] # goes up to project root
-
class GMailSettings(BaseSettings):
"""Settings for Gmail MCP server."""
-
- creds_json: str = Field(..., env="GMAIL_CREDS_JSON")
- token_json: str = Field(..., env="GMAIL_TOKEN_JSON")
+ creds: Path = Field(default_factory=lambda: BASE_PATH / "secrets/gmail-mcp/credentials.json")
+ token: Path = Field(default_factory=lambda: BASE_PATH / "secrets/gmail-mcp/token.json")
gmail_mcp_dir: Path = Field(default=BASE_PATH / "src/mcp_servers/gmail-mcp")
- _creds_file: Path | None = PrivateAttr(default=None)
- _token_file: Path | None = PrivateAttr(default=None)
-
- model_config = {
- "env_file": ".env",
- "env_file_encoding": "utf-8",
- }
-
- @staticmethod
- def _write_temp(prefix: str, content: str) -> Path:
- with NamedTemporaryFile(delete=False, prefix=f"{prefix}_", suffix=".json") as tmp:
- tmp.write(content.encode("utf-8"))
- return Path(tmp.name)
-
- def materialize_files(self) -> tuple[Path, Path]:
- """
- Writes the credential and token JSON from env to temp files
- and returns their paths for downstream tooling that expects paths.
- """
- if self._creds_file is None:
- self._creds_file = self._write_temp("gmail_creds", self.creds_json)
- if self._token_file is None:
- self._token_file = self._write_temp("gmail_token", self.token_json)
- return self._creds_file, self._token_file
-
if __name__ == "__main__":
settings = GMailSettings()
- creds_path, token_path = settings.materialize_files()
print(settings)
- print(creds_path)
- print(token_path)
+ print(settings.creds)
+ print(settings.token)
\ No newline at end of file
diff --git a/start.sh b/start.sh
index 389e6f34cef86335e4a14e265d7ebcf65be0b362..5f7c7a05bb50a882d9f07bff8bbfa9eed983e21c 100644
--- a/start.sh
+++ b/start.sh
@@ -24,10 +24,6 @@ echo "[start.sh] POSTGRES_HOST=${POSTGRES_HOST}"
echo "[start.sh] POSTGRES_PORT=${POSTGRES_PORT}"
echo "[start.sh] POSTGRES_USER=${POSTGRES_USER}"
echo "[start.sh] POSTGRES_DB=${POSTGRES_DB}"
-creds_len=${#GMAIL_CREDS_JSON}
-token_len=${#GMAIL_TOKEN_JSON}
-echo "[start.sh] GMAIL_CREDS_JSON length=${creds_len:-0}"
-echo "[start.sh] GMAIL_TOKEN_JSON length=${token_len:-0}"
# Start local Postgres if not already running
export PGDATA=/var/lib/postgresql/data
diff --git a/tests/create_dummy_candidate.py b/tests/create_dummy_candidate.py
index 5e5b048d0e196bc0e8165ad0e102a51af8a83274..5e0cacfca1c3e330039f449a602806c4018a3a11 100644
--- a/tests/create_dummy_candidate.py
+++ b/tests/create_dummy_candidate.py
@@ -1,8 +1,8 @@
import uuid
from datetime import datetime
-from src.database.candidates.client import SessionLocal
-from src.database.candidates.models import Candidate, CVScreeningResult
-from src.state.candidate import CandidateStatus
+from src.backend.database.candidates.client import SessionLocal
+from src.backend.database.candidates.models import Candidate, CVScreeningResult
+from src.backend.state.candidate import CandidateStatus
def create_dummy_candidate():
with SessionLocal() as db:
diff --git a/tests/verify_voice_integration.py b/tests/verify_voice_integration.py
index 519fa742d75af601a6bec3fd7ae1cd241e484987..54b3a09932055e6c29d3a8a6e9feec304393da66 100644
--- a/tests/verify_voice_integration.py
+++ b/tests/verify_voice_integration.py
@@ -10,9 +10,9 @@ load_dotenv()
# Add src to path
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from src.database.candidates.models import Candidate, CVScreeningResult, Base
-from src.database.candidates.client import SessionLocal, engine
-from src.agents.voice_screening.utils.questions import get_screening_questions
+from src.backend.database.candidates.models import Candidate, CVScreeningResult, Base
+from src.backend.database.candidates.client import SessionLocal, engine
+from src.backend.agents.voice_screening.utils.questions import get_screening_questions
def verify_integration():
print("Verifying integration...")