ketannnn commited on
Commit
c1dbdc6
·
1 Parent(s): 4bc38b6

feat: scaffold FastAPI app with async Postgres and Qdrant connection

Browse files
backend/alembic.ini ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [alembic]
2
+ script_location = alembic
3
+ prepend_sys_path = .
4
+ version_path_separator = os
5
+ sqlalchemy.url = driver://user:pass@localhost/dbname
6
+
7
+ [post_write_hooks]
8
+
9
+ [loggers]
10
+ keys = root,sqlalchemy,alembic
11
+
12
+ [handlers]
13
+ keys = console
14
+
15
+ [formatters]
16
+ keys = generic
17
+
18
+ [logger_root]
19
+ level = WARN
20
+ handlers = console
21
+ qualname =
22
+
23
+ [logger_sqlalchemy]
24
+ level = WARN
25
+ handlers =
26
+ qualname = sqlalchemy.engine
27
+
28
+ [logger_alembic]
29
+ level = INFO
30
+ handlers =
31
+ qualname = alembic
32
+
33
+ [handler_console]
34
+ class = StreamHandler
35
+ args = (sys.stderr,)
36
+ level = NOTSET
37
+ formatter = generic
38
+
39
+ [formatter_generic]
40
+ format = %(levelname)-5.5s [%(name)s] %(message)s
41
+ datefmt = %H:%M:%S
backend/alembic/README ADDED
@@ -0,0 +1 @@
 
 
1
+ Generic single-database configuration.
backend/alembic/env.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import re
3
+ from logging.config import fileConfig
4
+ from sqlalchemy import pool
5
+ from sqlalchemy.ext.asyncio import create_async_engine
6
+ from alembic import context
7
+
8
+ config = context.config
9
+ if config.config_file_name is not None:
10
+ fileConfig(config.config_file_name)
11
+
12
+ import sys, os
13
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
14
+
15
+ from src.database import Base
16
+ from src.models import JobDescription, Candidate, MatchResult
17
+ from src.config import get_settings
18
+
19
+ target_metadata = Base.metadata
20
+
21
+
22
+ def _make_async_url(url: str) -> str:
23
+ url = re.sub(r"^postgresql:", "postgresql+asyncpg:", url)
24
+ url = re.sub(r"[?&]channel_binding=require", "", url)
25
+ return url
26
+
27
+
28
+ def run_migrations_offline() -> None:
29
+ settings = get_settings()
30
+ context.configure(
31
+ url=_make_async_url(settings.database_url),
32
+ target_metadata=target_metadata,
33
+ literal_binds=True,
34
+ dialect_opts={"paramstyle": "named"},
35
+ )
36
+ with context.begin_transaction():
37
+ context.run_migrations()
38
+
39
+
40
+ def do_run_migrations(connection):
41
+ context.configure(connection=connection, target_metadata=target_metadata)
42
+ with context.begin_transaction():
43
+ context.run_migrations()
44
+
45
+
46
+ async def run_async_migrations() -> None:
47
+ settings = get_settings()
48
+ connectable = create_async_engine(_make_async_url(settings.database_url), poolclass=pool.NullPool)
49
+ async with connectable.connect() as connection:
50
+ await connection.run_sync(do_run_migrations)
51
+ await connectable.dispose()
52
+
53
+
54
+ def run_migrations_online() -> None:
55
+ asyncio.run(run_async_migrations())
56
+
57
+
58
+ if context.is_offline_mode():
59
+ run_migrations_offline()
60
+ else:
61
+ run_migrations_online()
backend/alembic/script.py.mako ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+ """
7
+ from typing import Sequence, Union
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ ${imports if imports else ""}
11
+
12
+ revision: str = ${repr(up_revision)}
13
+ down_revision: Union[str, None] = ${repr(down_revision)}
14
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
15
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
16
+
17
+
18
+ def upgrade() -> None:
19
+ ${upgrades if upgrades else "pass"}
20
+
21
+
22
+ def downgrade() -> None:
23
+ ${downgrades if downgrades else "pass"}
backend/alembic/versions/001_initial_schema.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """initial schema
2
+
3
+ Revision ID: 001
4
+ Revises:
5
+ Create Date: 2026-04-11 10:00:00.000000
6
+ """
7
+ from typing import Sequence, Union
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ from sqlalchemy.dialects.postgresql import UUID, JSON
11
+
12
+ revision: str = "001"
13
+ down_revision: Union[str, None] = None
14
+ branch_labels: Union[str, Sequence[str], None] = None
15
+ depends_on: Union[str, Sequence[str], None] = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.create_table(
20
+ "job_descriptions",
21
+ sa.Column("id", UUID(as_uuid=True), primary_key=True),
22
+ sa.Column("title", sa.String(255), nullable=False),
23
+ sa.Column("raw_text", sa.Text, nullable=False),
24
+ sa.Column("parsed_requirements", JSON, nullable=True),
25
+ sa.Column("required_skills", JSON, nullable=True),
26
+ sa.Column("min_yoe", sa.Float, nullable=True),
27
+ sa.Column("max_yoe", sa.Float, nullable=True),
28
+ sa.Column("role_type", sa.String(100), nullable=True),
29
+ sa.Column("engineer_type", sa.String(100), nullable=True),
30
+ sa.Column("location", sa.String(255), nullable=True),
31
+ sa.Column("remote_allowed", sa.Boolean, nullable=True),
32
+ sa.Column("jd_quality", JSON, nullable=True),
33
+ sa.Column("embedding_text", sa.Text, nullable=True),
34
+ sa.Column("qdrant_id", sa.String(64), nullable=True),
35
+ sa.Column("status", sa.String(32), nullable=False, server_default="pending"),
36
+ sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
37
+ sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
38
+ )
39
+ op.create_table(
40
+ "candidates",
41
+ sa.Column("id", UUID(as_uuid=True), primary_key=True),
42
+ sa.Column("external_id", sa.String(128), nullable=True),
43
+ sa.Column("name", sa.String(255), nullable=True),
44
+ sa.Column("email", sa.String(255), nullable=True),
45
+ sa.Column("looking_for", sa.String(100), nullable=True),
46
+ sa.Column("currently_employed", sa.Boolean, nullable=True),
47
+ sa.Column("notice_period", sa.String(100), nullable=True),
48
+ sa.Column("open_to_working_at", sa.String(255), nullable=True),
49
+ sa.Column("role_type", sa.String(100), nullable=True),
50
+ sa.Column("engineer_type", sa.String(100), nullable=True),
51
+ sa.Column("years_of_experience", sa.Float, nullable=True),
52
+ sa.Column("programming_languages", JSON, nullable=True),
53
+ sa.Column("backend_frameworks", JSON, nullable=True),
54
+ sa.Column("frontend_technologies", JSON, nullable=True),
55
+ sa.Column("gen_ai_experience", sa.Boolean, nullable=True),
56
+ sa.Column("recent_experience_type", sa.String(100), nullable=True),
57
+ sa.Column("education_status", sa.String(100), nullable=True),
58
+ sa.Column("degree", sa.String(255), nullable=True),
59
+ sa.Column("parsed_summary", sa.Text, nullable=True),
60
+ sa.Column("parsed_skills", sa.Text, nullable=True),
61
+ sa.Column("parsed_work_experience", JSON, nullable=True),
62
+ sa.Column("most_recent_company", sa.String(255), nullable=True),
63
+ sa.Column("most_recent_company_description", sa.Text, nullable=True),
64
+ sa.Column("most_recent_company_is_funded", sa.Boolean, nullable=True),
65
+ sa.Column("most_recent_company_is_product_company", sa.Boolean, nullable=True),
66
+ sa.Column("most_recent_company_total_funding", sa.Float, nullable=True),
67
+ sa.Column("most_recent_company_funding_status", sa.String(100), nullable=True),
68
+ sa.Column("time_in_current_company", sa.Float, nullable=True),
69
+ sa.Column("is_actively_or_passively_looking", sa.String(100), nullable=True),
70
+ sa.Column("growth_velocity", sa.Float, nullable=False, server_default="0.5"),
71
+ sa.Column("embedding_hash", sa.String(64), nullable=True),
72
+ sa.Column("qdrant_id", sa.String(64), nullable=True),
73
+ sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
74
+ sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
75
+ )
76
+ op.create_index("ix_candidates_external_id", "candidates", ["external_id"])
77
+ op.create_index("ix_candidates_qdrant_id", "candidates", ["qdrant_id"])
78
+ op.create_table(
79
+ "match_results",
80
+ sa.Column("id", UUID(as_uuid=True), primary_key=True),
81
+ sa.Column("jd_id", UUID(as_uuid=True), sa.ForeignKey("job_descriptions.id", ondelete="CASCADE"), nullable=False),
82
+ sa.Column("candidate_id", UUID(as_uuid=True), sa.ForeignKey("candidates.id", ondelete="CASCADE"), nullable=False),
83
+ sa.Column("rank", sa.Integer, nullable=True),
84
+ sa.Column("stage1_score", sa.Float, nullable=False, server_default="0"),
85
+ sa.Column("stage2_score", sa.Float, nullable=True),
86
+ sa.Column("final_score", sa.Float, nullable=False, server_default="0"),
87
+ sa.Column("component_scores", JSON, nullable=True),
88
+ sa.Column("gaps", JSON, nullable=True),
89
+ sa.Column("explanation", sa.Text, nullable=True),
90
+ sa.Column("explanation_generated_at", sa.DateTime(timezone=True), nullable=True),
91
+ sa.Column("weights_used", JSON, nullable=True),
92
+ sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
93
+ )
94
+ op.create_index("ix_match_results_jd_id", "match_results", ["jd_id"])
95
+ op.create_index("ix_match_results_candidate_id", "match_results", ["candidate_id"])
96
+
97
+
98
+ def downgrade() -> None:
99
+ op.drop_table("match_results")
100
+ op.drop_table("candidates")
101
+ op.drop_table("job_descriptions")
backend/main.py CHANGED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from contextlib import asynccontextmanager
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.models import Distance, VectorParams
7
+
8
+ from src.config import get_settings
9
+ from src.database import engine
10
+ from src.models import JobDescription, Candidate, MatchResult
11
+ from src.routers import jds, candidates, matching
12
+
13
+ settings = get_settings()
14
+
15
+ _qdrant_client: QdrantClient | None = None
16
+
17
+
18
+ def get_qdrant() -> QdrantClient:
19
+ return _qdrant_client
20
+
21
+
22
+ @asynccontextmanager
23
+ async def lifespan(app: FastAPI):
24
+ global _qdrant_client
25
+ _qdrant_client = QdrantClient(url=settings.qdrant_url, api_key=settings.qdrant_api_key)
26
+
27
+ existing = [c.name for c in _qdrant_client.get_collections().collections]
28
+ if settings.collection_name not in existing:
29
+ _qdrant_client.create_collection(
30
+ collection_name=settings.collection_name,
31
+ vectors_config=VectorParams(size=settings.vector_size, distance=Distance.COSINE),
32
+ )
33
+
34
+ app.state.qdrant = _qdrant_client
35
+ yield
36
+ _qdrant_client.close()
37
+
38
+
39
+ app = FastAPI(
40
+ title="TalentPulse — AI Candidate Matching",
41
+ description="Two-stage retrieval + reranking pipeline for matching JDs against 100K+ candidates",
42
+ version="1.0.0",
43
+ lifespan=lifespan,
44
+ )
45
+
46
+ app.add_middleware(
47
+ CORSMiddleware,
48
+ allow_origins=["*"],
49
+ allow_credentials=True,
50
+ allow_methods=["*"],
51
+ allow_headers=["*"],
52
+ )
53
+
54
+ app.include_router(jds.router, prefix="/api/jds", tags=["Job Descriptions"])
55
+ app.include_router(candidates.router, prefix="/api/candidates", tags=["Candidates"])
56
+ app.include_router(matching.router, prefix="/api/match", tags=["Matching"])
57
+
58
+
59
+ @app.get("/health")
60
+ async def health():
61
+ return {"status": "ok", "version": "1.0.0"}
backend/requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn[standard]==0.30.6
3
+ sqlalchemy[asyncio]==2.0.36
4
+ asyncpg==0.30.0
5
+ psycopg[binary]==3.2.3
6
+ alembic==1.13.3
7
+ qdrant-client==1.11.0
8
+ celery[redis]==5.4.0
9
+ redis==5.1.0
10
+ sentence-transformers==3.3.0
11
+ FlagEmbedding==1.2.15
12
+ groq==0.12.0
13
+ python-multipart==0.0.12
14
+ pydantic==2.9.2
15
+ pydantic-settings==2.5.2
16
+ pandas==2.2.3
17
+ numpy==1.26.4
18
+ python-dotenv==1.0.1
19
+ httpx==0.27.2
20
+ aiofiles==24.1.0
backend/src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # src package
backend/src/config.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ from functools import lru_cache
3
+
4
+
5
+ class Settings(BaseSettings):
6
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
7
+
8
+ database_url: str
9
+ qdrant_url: str
10
+ qdrant_api_key: str
11
+ redis_url: str
12
+ groq_api_key: str
13
+ groq_model: str = "llama-3.3-70b-versatile"
14
+ embedding_model: str = "BAAI/bge-small-en-v1.5"
15
+ reranker_model: str = "BAAI/bge-reranker-v2-m3"
16
+ collection_name: str = "candidates_v1"
17
+ vector_size: int = 384
18
+
19
+
20
+ @lru_cache
21
+ def get_settings() -> Settings:
22
+ return Settings()
backend/src/database.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import AsyncGenerator
3
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
4
+ from sqlalchemy.orm import DeclarativeBase
5
+ from .config import get_settings
6
+
7
+
8
+ def _make_async_url(url: str) -> str:
9
+ url = re.sub(r"^postgresql:", "postgresql+asyncpg:", url)
10
+ url = re.sub(r"[?&]channel_binding=require", "", url)
11
+ return url
12
+
13
+
14
+ settings = get_settings()
15
+ engine = create_async_engine(_make_async_url(settings.database_url), echo=False, pool_pre_ping=True)
16
+ AsyncSessionLocal = async_sessionmaker(engine, expire_on_commit=False)
17
+
18
+
19
+ class Base(DeclarativeBase):
20
+ pass
21
+
22
+
23
+ async def get_db() -> AsyncGenerator[AsyncSession, None]:
24
+ async with AsyncSessionLocal() as session:
25
+ yield session