Doanh Van Vu commited on
Commit
1904012
·
1 Parent(s): 6dc87ae

Initial commit of the MentorMe AI Recommendation Server, including core application structure, environment configuration, Docker setup, and essential services for mentor recommendations and health checks.

Browse files
.env.example ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pinecone Configuration
2
+ PINECONE_API_KEY=your_pinecone_api_key_here
3
+ PINECONE_INDEX=mentorme-mentors
4
+ PINECONE_ENVIRONMENT=us-east-1-aws
5
+ PINECONE_DIMENSION=1024
6
+
7
+ # Server Configuration
8
+ HOST=0.0.0.0
9
+ PORT=8000
10
+ DEBUG=False
11
+
12
+ # CORS Configuration
13
+ CORS_ORIGINS=http://localhost:8080
14
+
15
+ # Model Configuration
16
+ BGE_MODEL_NAME=BAAI/bge-m3
17
+ USE_FP16=True
18
+
19
+ # Recommendation Configuration
20
+ RECOMMENDATION_TOP_K=30
21
+ RECOMMENDATION_FINAL_COUNT=8
.gitignore ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+
28
+ # Environment variables
29
+ .env
30
+
31
+ # IDE
32
+ .vscode/
33
+ .idea/
34
+ *.swp
35
+ *.swo
36
+ *~
37
+
38
+ # Model cache (optional - can be large)
39
+ # .cache/
40
+
41
+ # Logs
42
+ *.log
43
+
44
+ # OS
45
+ .DS_Store
46
+ Thumbs.db
47
+
48
+
49
+
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first for better caching
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application code
15
+ COPY . .
16
+
17
+ # Expose port (Hugging Face Spaces will set PORT env var)
18
+ EXPOSE 7860
19
+
20
+ # Run the application (PORT will be set by Hugging Face Spaces)
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
22
+
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Mentorme
3
- emoji: 😻
4
- colorFrom: green
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  ---
 
1
  ---
2
+ title: Ai Service
3
+ emoji:
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from contextlib import asynccontextmanager
4
+ import os
5
+ import logging
6
+
7
+ from config.settings import get_settings
8
+ from routers import mentors, recommend, health
9
+ from services.embedding_service import EmbeddingService
10
+
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
14
+ )
15
+ logger = logging.getLogger(__name__)
16
+
17
+ settings = get_settings()
18
+
19
+ @asynccontextmanager
20
+ async def lifespan(app: FastAPI):
21
+ logger.info("Starting application...")
22
+ try:
23
+ logger.info("Loading embedding service...")
24
+ embedding_service = EmbeddingService()
25
+ app.state.embedding_service = embedding_service
26
+ logger.info("Application started successfully")
27
+ except Exception as e:
28
+ logger.error(f"Failed to start application: {str(e)}", exc_info=True)
29
+ raise
30
+ yield
31
+ logger.info("Shutting down application...")
32
+ if hasattr(app.state, 'embedding_service'):
33
+ del app.state.embedding_service
34
+
35
+ app = FastAPI(
36
+ title="MentorMe AI Recommendation Server",
37
+ description="AI-powered mentor-mentee recommendation using Vietnamese_Embedding and Pinecone",
38
+ version="1.0.0",
39
+ lifespan=lifespan
40
+ )
41
+
42
+ cors_origins = settings.CORS_ORIGINS
43
+ if not cors_origins:
44
+ cors_origins = ["*"]
45
+
46
+ app.add_middleware(
47
+ CORSMiddleware,
48
+ allow_origins=cors_origins,
49
+ allow_credentials=True,
50
+ allow_methods=["*"],
51
+ allow_headers=["*"],
52
+ )
53
+
54
+ app.include_router(health.router, prefix="/api/v1", tags=["Health"])
55
+ app.include_router(mentors.router, prefix="/api/v1", tags=["Mentors"])
56
+ app.include_router(recommend.router, prefix="/api/v1", tags=["Recommendations"])
57
+
58
+ @app.middleware("http")
59
+ async def log_requests(request: Request, call_next):
60
+ logger.info(f"Incoming request: {request.method} {request.url}")
61
+ logger.debug(f"Headers: {dict(request.headers)}")
62
+ response = await call_next(request)
63
+ logger.info(f"Response status: {response.status_code}")
64
+ return response
65
+
66
+ @app.get("/")
67
+ async def root():
68
+ return {
69
+ "message": "MentorMe AI Recommendation Server",
70
+ "version": "1.0.0",
71
+ "status": "running",
72
+ "endpoints": {
73
+ "health": "/api/v1/health",
74
+ "upsert_mentor": "/api/v1/mentors/upsert",
75
+ "recommend": "/api/v1/recommend"
76
+ }
77
+ }
78
+
79
+ @app.post("/test-post")
80
+ async def test_post(request: Request):
81
+ logger.info(f"Test POST received: {request.method} {request.url}")
82
+ logger.info(f"Headers: {dict(request.headers)}")
83
+ body = await request.body()
84
+ logger.info(f"Body: {body}")
85
+ return {
86
+ "message": "POST request received successfully",
87
+ "method": request.method,
88
+ "url": str(request.url),
89
+ "headers": dict(request.headers)
90
+ }
91
+
config/__init__.py ADDED
File without changes
config/settings.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ from pydantic import field_validator
3
+ from typing import List, Union
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ class Settings(BaseSettings):
10
+ model_config = SettingsConfigDict(
11
+ env_file=".env",
12
+ case_sensitive=True,
13
+ extra="ignore"
14
+ )
15
+
16
+ PINECONE_API_KEY: str = ""
17
+ PINECONE_INDEX: str = "mentorme-mentors"
18
+ PINECONE_ENVIRONMENT: str = "us-east-1-aws"
19
+ PINECONE_DIMENSION: int = 1024
20
+
21
+ HOST: str = "0.0.0.0"
22
+ PORT: int = int(os.getenv("PORT", "7860"))
23
+ DEBUG: bool = False
24
+
25
+ EMBEDDING_MODEL_NAME: str = "AITeamVN/Vietnamese_Embedding"
26
+ USE_FP16: bool = True
27
+
28
+ RECOMMENDATION_TOP_K: int = 30
29
+ RECOMMENDATION_RERANK_K: int = 15
30
+ RECOMMENDATION_FINAL_COUNT: int = 6
31
+ SEMANTIC_WEIGHT: float = 0.7
32
+ RULE_BASED_WEIGHT: float = 0.3
33
+
34
+ @property
35
+ def CORS_ORIGINS(self) -> List[str]:
36
+ cors_str = os.getenv("CORS_ORIGINS", "*")
37
+ if cors_str == "*":
38
+ return ["*"]
39
+ return [origin.strip() for origin in cors_str.split(",") if origin.strip()]
40
+
41
+ @field_validator("DEBUG", "USE_FP16", mode="before")
42
+ @classmethod
43
+ def parse_bool(cls, v: Union[str, bool]) -> bool:
44
+ if isinstance(v, str):
45
+ return v.lower() in ("true", "1", "yes", "on")
46
+ return bool(v)
47
+
48
+ @field_validator("PORT", "PINECONE_DIMENSION", "RECOMMENDATION_TOP_K", "RECOMMENDATION_RERANK_K", "RECOMMENDATION_FINAL_COUNT", mode="before")
49
+ @classmethod
50
+ def parse_int(cls, v: Union[str, int]) -> int:
51
+ if isinstance(v, str):
52
+ return int(v)
53
+ return v
54
+
55
+ @field_validator("SEMANTIC_WEIGHT", "RULE_BASED_WEIGHT", mode="before")
56
+ @classmethod
57
+ def parse_float(cls, v: Union[str, float]) -> float:
58
+ if isinstance(v, str):
59
+ return float(v)
60
+ return v
61
+
62
+ _settings: Settings = None
63
+
64
+ def get_settings() -> Settings:
65
+ global _settings
66
+ if _settings is None:
67
+ _settings = Settings()
68
+ return _settings
69
+
70
+
71
+
main.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import uvicorn
4
+ from contextlib import asynccontextmanager
5
+
6
+ from config.settings import get_settings
7
+ from routers import mentors, recommend, health
8
+ from services.embedding_service import EmbeddingService
9
+
10
+ settings = get_settings()
11
+
12
+ @asynccontextmanager
13
+ async def lifespan(app: FastAPI):
14
+ embedding_service = EmbeddingService()
15
+ app.state.embedding_service = embedding_service
16
+ yield
17
+ if hasattr(app.state, 'embedding_service'):
18
+ del app.state.embedding_service
19
+
20
+ app = FastAPI(
21
+ title="MentorMe AI Recommendation Server",
22
+ description="AI-powered mentor-mentee recommendation using Vietnamese_Embedding and Pinecone",
23
+ version="1.0.0",
24
+ lifespan=lifespan
25
+ )
26
+
27
+ app.add_middleware(
28
+ CORSMiddleware,
29
+ allow_origins=settings.CORS_ORIGINS,
30
+ allow_credentials=True,
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+ app.include_router(health.router, prefix="/api/v1", tags=["Health"])
36
+ app.include_router(mentors.router, prefix="/api/v1", tags=["Mentors"])
37
+ app.include_router(recommend.router, prefix="/api/v1", tags=["Recommendations"])
38
+
39
+ @app.get("/")
40
+ async def root():
41
+ return {
42
+ "message": "MentorMe AI Recommendation Server",
43
+ "version": "1.0.0",
44
+ "status": "running"
45
+ }
46
+
47
+ if __name__ == "__main__":
48
+ uvicorn.run(
49
+ "main:app",
50
+ host=settings.HOST,
51
+ port=settings.PORT,
52
+ reload=settings.DEBUG,
53
+ log_level="info"
54
+ )
55
+
56
+
57
+
models/__init__.py ADDED
File without changes
models/schemas.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Dict, Any, Union
3
+
4
+ class SkillDto(BaseModel):
5
+ id: Optional[int] = None
6
+ name: str
7
+
8
+ class DomainDto(BaseModel):
9
+ id: Optional[int] = None
10
+ name: str
11
+
12
+ class CareerDto(BaseModel):
13
+ id: Optional[int] = None
14
+ name: str
15
+
16
+ class ExperienceDto(BaseModel):
17
+ company: Optional[str] = None
18
+ position: Optional[str] = None
19
+ start_date: Optional[str] = None
20
+ end_date: Optional[str] = None
21
+ description: Optional[str] = None
22
+
23
+ class EducationDto(BaseModel):
24
+ school: Optional[str] = None
25
+ degree: Optional[str] = None
26
+ start_date: Optional[str] = None
27
+ end_date: Optional[str] = None
28
+ description: Optional[str] = None
29
+
30
+ class ScheduleDto(BaseModel):
31
+ day_of_week: int
32
+ start_time: str
33
+ end_time: str
34
+ is_active: int = 1
35
+
36
+ class MentorUpsertRequest(BaseModel):
37
+ mentor_id: int
38
+ full_name: Optional[str] = None
39
+ bio: Optional[str] = None
40
+ career: Optional[Union[CareerDto, Dict[str, Any], str]] = None
41
+ skills: Optional[List[Union[SkillDto, Dict[str, Any], str]]] = None
42
+ domains: Optional[List[Union[DomainDto, Dict[str, Any], str]]] = None
43
+ experiences: Optional[List[Union[ExperienceDto, Dict[str, Any]]]] = None
44
+ educations: Optional[List[Union[EducationDto, Dict[str, Any]]]] = None
45
+ rating: Optional[float] = Field(None, ge=0.0, le=5.0)
46
+ total_ratings: Optional[int] = Field(None, ge=0)
47
+ session_count: Optional[int] = Field(None, ge=0)
48
+ available_slots: Optional[int] = Field(None, ge=0)
49
+ schedules: Optional[List[Union[ScheduleDto, Dict[str, Any]]]] = None
50
+ career_id: Optional[int] = None
51
+ skill_ids: Optional[List[int]] = None
52
+ domain_ids: Optional[List[int]] = None
53
+ status: Optional[str] = "ACTIVATED"
54
+ has_availability: Optional[bool] = False
55
+
56
+ class MentorUpsertResponse(BaseModel):
57
+ success: bool
58
+ message: str
59
+ mentor_id: int
60
+
61
+ class RecommendationRequest(BaseModel):
62
+ goals: Optional[str] = None
63
+ desired_skills: Optional[List[Union[SkillDto, Dict[str, Any], str]]] = None
64
+ current_skills: Optional[List[Union[SkillDto, Dict[str, Any], str]]] = None
65
+ interests: Optional[List[Union[DomainDto, Dict[str, Any], str]]] = None
66
+ domains: Optional[List[Union[DomainDto, Dict[str, Any], str]]] = None
67
+ availability: Optional[str] = None
68
+ preferred_availability: Optional[str] = None
69
+ min_rating: Optional[float] = Field(None, ge=0.0, le=5.0)
70
+ require_availability: Optional[bool] = False
71
+ skill_ids: Optional[List[int]] = None
72
+ domain_ids: Optional[List[int]] = None
73
+ career_id: Optional[int] = None
74
+ status: Optional[str] = "ACTIVATED"
75
+ top_k: Optional[int] = Field(None, ge=1, le=100)
76
+ final_count: Optional[int] = Field(None, ge=1, le=20)
77
+
78
+ class RecommendedMentor(BaseModel):
79
+ mentor_id: str
80
+ score: float
81
+ semantic_similarity: float
82
+ reason: str
83
+ metadata: Dict[str, Any]
84
+
85
+ class RecommendationResponse(BaseModel):
86
+ mentors: List[RecommendedMentor]
87
+ count: int
88
+ query_text: Optional[str] = None
89
+
90
+ class HealthResponse(BaseModel):
91
+ status: str
92
+ embedding_model: Dict[str, Any]
93
+ pinecone_index: Dict[str, Any]
94
+
95
+ class BatchUpsertRequest(BaseModel):
96
+ mentors: List[MentorUpsertRequest]
97
+
98
+ class BatchUpsertResponse(BaseModel):
99
+ success: bool
100
+ message: str
101
+ upserted_count: int
102
+ failed_count: int
103
+
104
+
105
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ pydantic==2.5.0
4
+ pydantic-settings==2.1.0
5
+ python-dotenv==1.0.0
6
+ pinecone-client>=3.2.0,<6.0.0
7
+ sentence-transformers>=2.2.0
8
+ torch>=2.0.0
9
+ numpy>=1.24.0
10
+ python-multipart==0.0.6
11
+ transformers>=4.30.0
12
+ accelerate>=0.20.0
13
+ sentencepiece>=0.1.99
14
+
15
+
16
+
routers/__init__.py ADDED
File without changes
routers/health.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request
2
+ from models.schemas import HealthResponse
3
+ from services.embedding_service import EmbeddingService
4
+ from services.pinecone_service import PineconeService
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ router = APIRouter()
10
+
11
+ @router.get("/health", response_model=HealthResponse)
12
+ async def health_check(request: Request):
13
+ try:
14
+ embedding_service: EmbeddingService = request.app.state.embedding_service
15
+ embedding_info = embedding_service.get_model_info()
16
+
17
+ pinecone_service = PineconeService()
18
+ pinecone_stats = pinecone_service.get_index_stats()
19
+
20
+ return HealthResponse(
21
+ status="healthy",
22
+ embedding_model=embedding_info,
23
+ pinecone_index=pinecone_stats
24
+ )
25
+ except Exception as e:
26
+ logger.error(f"Health check failed: {str(e)}")
27
+ return HealthResponse(
28
+ status="unhealthy",
29
+ embedding_model={"error": str(e)},
30
+ pinecone_index={"error": str(e)}
31
+ )
32
+
33
+
34
+
routers/mentors.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request, HTTPException, status
2
+ from models.schemas import (
3
+ MentorUpsertRequest,
4
+ MentorUpsertResponse,
5
+ BatchUpsertRequest,
6
+ BatchUpsertResponse
7
+ )
8
+ from services.recommendation_service import RecommendationService
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ router = APIRouter()
14
+
15
+ @router.post("/mentors/upsert", response_model=MentorUpsertResponse)
16
+ async def upsert_mentor(
17
+ request: Request,
18
+ mentor_data: MentorUpsertRequest
19
+ ):
20
+ try:
21
+ logger.info(f"Received upsert request for mentor ID: {mentor_data.mentor_id}")
22
+ logger.debug(f"Request headers: {dict(request.headers)}")
23
+ logger.debug(f"Request method: {request.method}")
24
+ logger.debug(f"Request URL: {request.url}")
25
+
26
+ recommendation_service = RecommendationService()
27
+
28
+ mentor_dict = mentor_data.model_dump()
29
+ success = recommendation_service.upsert_mentor(mentor_dict)
30
+
31
+ if success:
32
+ logger.info(f"Successfully upserted mentor ID: {mentor_data.mentor_id}")
33
+ return MentorUpsertResponse(
34
+ success=True,
35
+ message="Mentor upserted successfully",
36
+ mentor_id=mentor_data.mentor_id
37
+ )
38
+ else:
39
+ logger.error(f"Failed to upsert mentor ID: {mentor_data.mentor_id}")
40
+ raise HTTPException(
41
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
42
+ detail="Failed to upsert mentor"
43
+ )
44
+ except HTTPException:
45
+ raise
46
+ except Exception as e:
47
+ logger.error(f"Error upserting mentor: {str(e)}", exc_info=True)
48
+ raise HTTPException(
49
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
50
+ detail=f"Failed to upsert mentor: {str(e)}"
51
+ )
52
+
53
+ @router.post("/mentors/batch-upsert", response_model=BatchUpsertResponse)
54
+ async def batch_upsert_mentors(
55
+ request: Request,
56
+ batch_data: BatchUpsertRequest
57
+ ):
58
+ try:
59
+ recommendation_service = RecommendationService()
60
+ upserted_count = 0
61
+ failed_count = 0
62
+
63
+ for mentor_data in batch_data.mentors:
64
+ try:
65
+ mentor_dict = mentor_data.model_dump()
66
+ success = recommendation_service.upsert_mentor(mentor_dict)
67
+ if success:
68
+ upserted_count += 1
69
+ else:
70
+ failed_count += 1
71
+ except Exception as e:
72
+ logger.error(f"Error upserting mentor {mentor_data.mentor_id}: {str(e)}")
73
+ failed_count += 1
74
+
75
+ return BatchUpsertResponse(
76
+ success=True,
77
+ message=f"Batch upsert completed: {upserted_count} succeeded, {failed_count} failed",
78
+ upserted_count=upserted_count,
79
+ failed_count=failed_count
80
+ )
81
+ except Exception as e:
82
+ logger.error(f"Error in batch upsert: {str(e)}")
83
+ raise HTTPException(
84
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
85
+ detail=f"Failed to batch upsert mentors: {str(e)}"
86
+ )
87
+
88
+ @router.delete("/mentors/{mentor_id}")
89
+ async def delete_mentor(
90
+ request: Request,
91
+ mentor_id: int
92
+ ):
93
+ try:
94
+ logger.info(f"Received delete request for mentor ID: {mentor_id}")
95
+
96
+ recommendation_service = RecommendationService()
97
+ success = recommendation_service.delete_mentor(str(mentor_id))
98
+
99
+ if success:
100
+ logger.info(f"Successfully deleted mentor ID: {mentor_id}")
101
+ return {
102
+ "success": True,
103
+ "message": "Mentor deleted successfully",
104
+ "mentor_id": mentor_id
105
+ }
106
+ else:
107
+ raise HTTPException(
108
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
109
+ detail="Failed to delete mentor"
110
+ )
111
+ except HTTPException:
112
+ raise
113
+ except Exception as e:
114
+ logger.error(f"Error deleting mentor: {str(e)}", exc_info=True)
115
+ raise HTTPException(
116
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
117
+ detail=f"Failed to delete mentor: {str(e)}"
118
+ )
119
+
routers/recommend.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request, HTTPException, status
2
+ from models.schemas import RecommendationRequest, RecommendationResponse, RecommendedMentor
3
+ from services.recommendation_service import RecommendationService
4
+ from utils.text_builder import build_mentee_query_text
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ router = APIRouter()
10
+
11
+ @router.post("/recommend", response_model=RecommendationResponse)
12
+ async def recommend_mentors(
13
+ request: Request,
14
+ recommendation_request: RecommendationRequest
15
+ ):
16
+ try:
17
+ recommendation_service = RecommendationService()
18
+
19
+ mentee_dict = recommendation_request.model_dump(exclude_none=True)
20
+ query_text = build_mentee_query_text(mentee_dict)
21
+
22
+ recommended = recommendation_service.recommend_mentors(
23
+ mentee_dict,
24
+ top_k=recommendation_request.top_k,
25
+ final_count=recommendation_request.final_count
26
+ )
27
+
28
+ mentors = [
29
+ RecommendedMentor(
30
+ mentor_id=rec["mentor_id"],
31
+ score=rec["score"],
32
+ semantic_similarity=rec["semantic_similarity"],
33
+ reason=rec["reason"],
34
+ metadata=rec["metadata"]
35
+ )
36
+ for rec in recommended
37
+ ]
38
+
39
+ return RecommendationResponse(
40
+ mentors=mentors,
41
+ count=len(mentors),
42
+ query_text=query_text
43
+ )
44
+ except Exception as e:
45
+ logger.error(f"Error recommending mentors: {str(e)}")
46
+ raise HTTPException(
47
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
48
+ detail=f"Failed to recommend mentors: {str(e)}"
49
+ )
50
+
51
+
52
+
services/__init__.py ADDED
File without changes
services/embedding_service.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from sentence_transformers import SentenceTransformer
3
+ import logging
4
+ from typing import List, Union
5
+ from config.settings import get_settings
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class EmbeddingService:
10
+ _instance = None
11
+ _model = None
12
+
13
+ def __new__(cls):
14
+ if cls._instance is None:
15
+ cls._instance = super(EmbeddingService, cls).__new__(cls)
16
+ return cls._instance
17
+
18
+ def __init__(self):
19
+ if EmbeddingService._model is None:
20
+ self._load_model()
21
+
22
+ def _load_model(self):
23
+ settings = get_settings()
24
+ try:
25
+ logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL_NAME}")
26
+ device = "cuda" if torch.cuda.is_available() else "cpu"
27
+ logger.info(f"Using device: {device}")
28
+
29
+ EmbeddingService._model = SentenceTransformer(
30
+ settings.EMBEDDING_MODEL_NAME,
31
+ device=device
32
+ )
33
+
34
+ EmbeddingService._model.max_seq_length = 2048
35
+
36
+ logger.info("Embedding model loaded successfully")
37
+ except Exception as e:
38
+ logger.error(f"Failed to load embedding model: {str(e)}")
39
+ raise
40
+
41
+ def encode(
42
+ self,
43
+ texts: Union[str, List[str]],
44
+ is_query: bool = False,
45
+ batch_size: int = 32,
46
+ max_length: int = 2048
47
+ ) -> Union[List[float], List[List[float]]]:
48
+ if EmbeddingService._model is None:
49
+ raise RuntimeError("Embedding model not loaded")
50
+
51
+ if isinstance(texts, str):
52
+ texts = [texts]
53
+ single_text = True
54
+ else:
55
+ single_text = False
56
+
57
+ if not texts:
58
+ raise ValueError("Texts cannot be empty")
59
+
60
+ try:
61
+ embeddings = EmbeddingService._model.encode(
62
+ texts,
63
+ batch_size=batch_size,
64
+ show_progress_bar=False,
65
+ convert_to_numpy=True,
66
+ normalize_embeddings=False
67
+ )
68
+
69
+ expected_dim = 1024
70
+ if single_text:
71
+ embedding_list = embeddings[0].tolist()
72
+ if len(embedding_list) != expected_dim:
73
+ logger.warning(f"Embedding dimension mismatch: expected {expected_dim}, got {len(embedding_list)}")
74
+ return embedding_list
75
+
76
+ result = []
77
+ for emb in embeddings:
78
+ emb_list = emb.tolist()
79
+ if len(emb_list) != expected_dim:
80
+ logger.warning(f"Embedding dimension mismatch: expected {expected_dim}, got {len(emb_list)}")
81
+ result.append(emb_list)
82
+
83
+ return result
84
+ except Exception as e:
85
+ logger.error(f"Error encoding texts: {str(e)}")
86
+ raise
87
+
88
+ def get_model_info(self) -> dict:
89
+ settings = get_settings()
90
+ dimension = 1024
91
+
92
+ if EmbeddingService._model is not None:
93
+ try:
94
+ test_embedding = EmbeddingService._model.encode(["test"], convert_to_numpy=True)
95
+ dimension = len(test_embedding[0])
96
+ except Exception as e:
97
+ logger.warning(f"Could not determine model dimension: {str(e)}")
98
+
99
+ return {
100
+ "model_name": settings.EMBEDDING_MODEL_NAME,
101
+ "dimension": dimension,
102
+ "device": "cuda" if torch.cuda.is_available() else "cpu",
103
+ "max_seq_length": EmbeddingService._model.max_seq_length if EmbeddingService._model else 2048
104
+ }
105
+
106
+
107
+
services/pinecone_service.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pinecone import Pinecone, ServerlessSpec
2
+ from typing import List, Dict, Optional, Any
3
+ import logging
4
+ from config.settings import get_settings
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class PineconeService:
9
+ _instance = None
10
+ _client = None
11
+ _index = None
12
+
13
+ def __new__(cls):
14
+ if cls._instance is None:
15
+ cls._instance = super(PineconeService, cls).__new__(cls)
16
+ return cls._instance
17
+
18
+ def __init__(self):
19
+ if PineconeService._client is None:
20
+ self._initialize()
21
+
22
+ def _initialize(self):
23
+ settings = get_settings()
24
+
25
+ if not settings.PINECONE_API_KEY:
26
+ raise ValueError("PINECONE_API_KEY is required")
27
+
28
+ try:
29
+ PineconeService._client = Pinecone(api_key=settings.PINECONE_API_KEY)
30
+
31
+ index_name = settings.PINECONE_INDEX
32
+
33
+ existing_indexes = [idx.name for idx in PineconeService._client.list_indexes()]
34
+
35
+ if index_name not in existing_indexes:
36
+ logger.info(f"Creating Pinecone index: {index_name}")
37
+ PineconeService._client.create_index(
38
+ name=index_name,
39
+ dimension=settings.PINECONE_DIMENSION,
40
+ metric="cosine",
41
+ spec=ServerlessSpec(
42
+ cloud="aws",
43
+ region=settings.PINECONE_ENVIRONMENT
44
+ )
45
+ )
46
+ logger.info(f"Index {index_name} created successfully")
47
+
48
+ PineconeService._index = PineconeService._client.Index(index_name)
49
+ logger.info(f"Connected to Pinecone index: {index_name}")
50
+ except Exception as e:
51
+ logger.error(f"Failed to initialize Pinecone: {str(e)}")
52
+ raise
53
+
54
+ def upsert_mentor(
55
+ self,
56
+ mentor_id: str,
57
+ vector: List[float],
58
+ metadata: Dict[str, Any]
59
+ ) -> bool:
60
+ try:
61
+ settings = get_settings()
62
+ expected_dim = settings.PINECONE_DIMENSION
63
+
64
+ if len(vector) != expected_dim:
65
+ error_msg = f"Vector dimension mismatch: expected {expected_dim}, got {len(vector)}"
66
+ logger.error(error_msg)
67
+ raise ValueError(error_msg)
68
+
69
+ PineconeService._index.upsert(
70
+ vectors=[{
71
+ "id": str(mentor_id),
72
+ "values": vector,
73
+ "metadata": metadata
74
+ }]
75
+ )
76
+ logger.info(f"Mentor {mentor_id} upserted successfully")
77
+ return True
78
+ except Exception as e:
79
+ logger.error(f"Failed to upsert mentor {mentor_id}: {str(e)}")
80
+ raise
81
+
82
+ def upsert_mentors_batch(
83
+ self,
84
+ vectors: List[Dict[str, Any]]
85
+ ) -> bool:
86
+ try:
87
+ PineconeService._index.upsert(vectors=vectors)
88
+ logger.info(f"Batch upserted {len(vectors)} mentors")
89
+ return True
90
+ except Exception as e:
91
+ logger.error(f"Failed to batch upsert mentors: {str(e)}")
92
+ raise
93
+
94
+ def query_similar(
95
+ self,
96
+ query_vector: List[float],
97
+ top_k: int = 30,
98
+ filter: Optional[Dict[str, Any]] = None,
99
+ include_metadata: bool = True
100
+ ) -> List[Dict[str, Any]]:
101
+ try:
102
+ settings = get_settings()
103
+ expected_dim = settings.PINECONE_DIMENSION
104
+
105
+ if len(query_vector) != expected_dim:
106
+ error_msg = f"Query vector dimension mismatch: expected {expected_dim}, got {len(query_vector)}"
107
+ logger.error(error_msg)
108
+ raise ValueError(error_msg)
109
+
110
+ query_response = PineconeService._index.query(
111
+ vector=query_vector,
112
+ top_k=top_k,
113
+ filter=filter,
114
+ include_metadata=include_metadata
115
+ )
116
+
117
+ results = []
118
+ for match in query_response.matches:
119
+ results.append({
120
+ "mentor_id": match.id,
121
+ "score": match.score,
122
+ "metadata": match.metadata if include_metadata else None
123
+ })
124
+
125
+ return results
126
+ except Exception as e:
127
+ logger.error(f"Failed to query similar mentors: {str(e)}")
128
+ raise
129
+
130
+ def delete_mentor(self, mentor_id: str) -> bool:
131
+ try:
132
+ PineconeService._index.delete(ids=[str(mentor_id)])
133
+ logger.info(f"Mentor {mentor_id} deleted successfully")
134
+ return True
135
+ except Exception as e:
136
+ logger.error(f"Failed to delete mentor {mentor_id}: {str(e)}")
137
+ raise
138
+
139
+ def delete_mentors_batch(self, mentor_ids: List[str]) -> bool:
140
+ try:
141
+ PineconeService._index.delete(ids=[str(id) for id in mentor_ids])
142
+ logger.info(f"Batch deleted {len(mentor_ids)} mentors")
143
+ return True
144
+ except Exception as e:
145
+ logger.error(f"Failed to batch delete mentors: {str(e)}")
146
+ raise
147
+
148
+ def get_index_stats(self) -> Dict[str, Any]:
149
+ try:
150
+ stats = PineconeService._index.describe_index_stats()
151
+ return {
152
+ "total_vectors": stats.total_vector_count,
153
+ "dimension": stats.dimension,
154
+ "index_fullness": stats.index_fullness if hasattr(stats, 'index_fullness') else None
155
+ }
156
+ except Exception as e:
157
+ logger.error(f"Failed to get index stats: {str(e)}")
158
+ raise
159
+
160
+
161
+
services/recommendation_service.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional
2
+ import logging
3
+ from services.embedding_service import EmbeddingService
4
+ from services.pinecone_service import PineconeService
5
+ from services.reranker_service import RerankerService
6
+ from utils.text_builder import build_mentor_text, build_mentee_query_text
7
+ from utils.scoring import rerank_mentors
8
+ from config.settings import get_settings
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class RecommendationService:
13
+ def __init__(self):
14
+ self.embedding_service = EmbeddingService()
15
+ self.pinecone_service = PineconeService()
16
+ self.settings = get_settings()
17
+
18
+ def upsert_mentor(
19
+ self,
20
+ mentor_data: Dict[str, Any]
21
+ ) -> bool:
22
+ try:
23
+ mentor_text = build_mentor_text(mentor_data)
24
+ embedding = self.embedding_service.encode(mentor_text, is_query=False)
25
+
26
+ def safe_float(value, default=0.0):
27
+ if value is None:
28
+ return default
29
+ try:
30
+ return float(value)
31
+ except (ValueError, TypeError):
32
+ return default
33
+
34
+ def safe_int(value, default=0):
35
+ if value is None:
36
+ return default
37
+ try:
38
+ return int(value)
39
+ except (ValueError, TypeError):
40
+ return default
41
+
42
+ metadata = {
43
+ "mentor_id": str(mentor_data["mentor_id"]),
44
+ "rating": safe_float(mentor_data.get("rating"), 0.0),
45
+ "total_ratings": safe_int(mentor_data.get("total_ratings"), 0),
46
+ "session_count": safe_int(mentor_data.get("session_count"), 0),
47
+ "available_slots": safe_int(mentor_data.get("available_slots"), 0),
48
+ "has_availability": bool(mentor_data.get("has_availability", False)),
49
+ "career_id": safe_int(mentor_data.get("career_id")) if mentor_data.get("career_id") else None,
50
+ "status": str(mentor_data.get("status") or "ACTIVATED"),
51
+ "mentor_text": mentor_text
52
+ }
53
+
54
+ if mentor_data.get("skill_ids"):
55
+ metadata["skill_ids"] = [str(int(id)) for id in mentor_data["skill_ids"]]
56
+ if mentor_data.get("domain_ids"):
57
+ metadata["domain_ids"] = [str(int(id)) for id in mentor_data["domain_ids"]]
58
+
59
+ return self.pinecone_service.upsert_mentor(
60
+ mentor_id=str(mentor_data["mentor_id"]),
61
+ vector=embedding,
62
+ metadata=metadata
63
+ )
64
+ except Exception as e:
65
+ logger.error(f"Failed to upsert mentor: {str(e)}")
66
+ raise
67
+
68
+ def recommend_mentors(
69
+ self,
70
+ mentee_data: Dict[str, Any],
71
+ top_k: Optional[int] = None,
72
+ final_count: Optional[int] = None
73
+ ) -> List[Dict[str, Any]]:
74
+ try:
75
+ top_k = top_k or self.settings.RECOMMENDATION_TOP_K
76
+ final_count = final_count or self.settings.RECOMMENDATION_FINAL_COUNT
77
+
78
+ query_text = build_mentee_query_text(mentee_data)
79
+ query_embedding = self.embedding_service.encode(query_text, is_query=True)
80
+
81
+ filter_dict = self._build_filter(mentee_data)
82
+
83
+ similar_mentors = self.pinecone_service.query_similar(
84
+ query_vector=query_embedding,
85
+ top_k=top_k,
86
+ filter=filter_dict if filter_dict else None,
87
+ include_metadata=True
88
+ )
89
+
90
+ if not similar_mentors:
91
+ return []
92
+
93
+ for mentor in similar_mentors:
94
+ metadata = mentor.get("metadata", {})
95
+ mentor["mentor_text"] = metadata.get("mentor_text", "")
96
+ if not mentor["mentor_text"]:
97
+ logger.warning(f"Mentor {mentor.get('mentor_id', 'unknown')} missing mentor_text in metadata")
98
+
99
+ reranker = RerankerService()
100
+ rerank_k = self.settings.RECOMMENDATION_RERANK_K
101
+
102
+ try:
103
+ reranked_mentors = reranker.rerank(
104
+ query_text=query_text,
105
+ candidates=similar_mentors,
106
+ top_k=rerank_k
107
+ )
108
+ except Exception as e:
109
+ logger.error(f"Reranker failed: {e}. Falling back to cosine similarity.", exc_info=True)
110
+ for mentor in similar_mentors:
111
+ mentor["reranker_score"] = mentor.get("score", 0.0)
112
+ reranked_mentors = sorted(similar_mentors, key=lambda x: x.get("reranker_score", 0.0), reverse=True)[:rerank_k]
113
+
114
+ reranked = rerank_mentors(
115
+ reranked_mentors,
116
+ mentee_data,
117
+ final_count=final_count
118
+ )
119
+
120
+ return reranked
121
+ except Exception as e:
122
+ logger.error(f"Failed to recommend mentors: {str(e)}")
123
+ raise
124
+
125
+ def _build_filter(self, mentee_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
126
+ filter_dict = {}
127
+
128
+ if mentee_data.get("min_rating"):
129
+ filter_dict["rating"] = {"$gte": float(mentee_data["min_rating"])}
130
+
131
+ if mentee_data.get("require_availability"):
132
+ filter_dict["has_availability"] = True
133
+
134
+ if mentee_data.get("skill_ids"):
135
+ filter_dict["skill_ids"] = {"$in": [str(int(id)) for id in mentee_data["skill_ids"]]}
136
+
137
+ if mentee_data.get("domain_ids"):
138
+ filter_dict["domain_ids"] = {"$in": [str(int(id)) for id in mentee_data["domain_ids"]]}
139
+
140
+ if mentee_data.get("career_id"):
141
+ filter_dict["career_id"] = int(mentee_data["career_id"])
142
+
143
+ if mentee_data.get("status"):
144
+ filter_dict["status"] = str(mentee_data["status"])
145
+ else:
146
+ filter_dict["status"] = "ACTIVATED"
147
+
148
+ return filter_dict if filter_dict else None
149
+
150
+ def delete_mentor(self, mentor_id: str) -> bool:
151
+ try:
152
+ return self.pinecone_service.delete_mentor(mentor_id)
153
+ except Exception as e:
154
+ logger.error(f"Failed to delete mentor: {str(e)}")
155
+ raise
156
+
services/reranker_service.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ import torch
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class RerankerService:
8
+ _instance = None
9
+ _initialized = False
10
+
11
+ def __new__(cls):
12
+ if cls._instance is None:
13
+ cls._instance = super(RerankerService, cls).__new__(cls)
14
+ return cls._instance
15
+
16
+ def __init__(self):
17
+ if RerankerService._initialized:
18
+ return
19
+
20
+ try:
21
+ self.model_name = "AITeamVN/Vietnamese_Reranker"
22
+ logger.info(f"Loading reranker model: {self.model_name}")
23
+
24
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
25
+ self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
26
+ self.model.eval()
27
+
28
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+ self.model.to(self.device)
30
+
31
+ logger.info(f"Reranker model loaded successfully on device: {self.device}")
32
+ RerankerService._initialized = True
33
+ except Exception as e:
34
+ logger.error(f"Failed to load reranker model: {str(e)}", exc_info=True)
35
+ raise
36
+
37
+ def rerank(self, query_text: str, candidates: list, top_k: int = None) -> list:
38
+ if not candidates:
39
+ return []
40
+
41
+ if not query_text:
42
+ logger.warning("Empty query text provided to reranker")
43
+ return candidates
44
+
45
+ try:
46
+ pairs = []
47
+ valid_candidates = []
48
+
49
+ for cand in candidates:
50
+ mentor_text = cand.get("mentor_text", "")
51
+ if mentor_text:
52
+ pairs.append((query_text, mentor_text))
53
+ valid_candidates.append(cand)
54
+ else:
55
+ logger.warning(f"Mentor {cand.get('mentor_id', 'unknown')} missing mentor_text, skipping reranking")
56
+
57
+ if not pairs:
58
+ logger.warning("No valid candidate pairs for reranking, returning original candidates")
59
+ return candidates
60
+
61
+ inputs = self.tokenizer(
62
+ pairs,
63
+ padding=True,
64
+ truncation=True,
65
+ max_length=512,
66
+ return_tensors="pt"
67
+ ).to(self.device)
68
+
69
+ with torch.no_grad():
70
+ outputs = self.model(**inputs)
71
+ scores = torch.sigmoid(outputs.logits).squeeze(-1).cpu().tolist()
72
+
73
+ if isinstance(scores, float):
74
+ scores = [scores]
75
+
76
+ for cand, score in zip(valid_candidates, scores):
77
+ cand["reranker_score"] = float(score)
78
+
79
+ sorted_candidates = sorted(valid_candidates, key=lambda x: x.get("reranker_score", 0.0), reverse=True)
80
+
81
+ if top_k:
82
+ return sorted_candidates[:top_k]
83
+ return sorted_candidates
84
+ except Exception as e:
85
+ logger.error(f"Error during reranking: {str(e)}", exc_info=True)
86
+ for cand in candidates:
87
+ if "reranker_score" not in cand:
88
+ cand["reranker_score"] = cand.get("score", 0.0)
89
+ return sorted(candidates, key=lambda x: x.get("reranker_score", 0.0), reverse=True)[:top_k] if top_k else candidates
90
+
utils/__init__.py ADDED
File without changes
utils/scoring.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any
2
+ import math
3
+ from config.settings import get_settings
4
+
5
+ def rerank_mentors(
6
+ similar_mentors: List[Dict[str, Any]],
7
+ mentee_data: Dict[str, Any],
8
+ final_count: int = 8
9
+ ) -> List[Dict[str, Any]]:
10
+ settings = get_settings()
11
+ scored_mentors = []
12
+
13
+ for mentor in similar_mentors:
14
+ metadata = mentor.get("metadata", {})
15
+
16
+ reranker_score = mentor.get("reranker_score")
17
+ if reranker_score is None:
18
+ reranker_score = mentor.get("score", 0.0)
19
+
20
+ semantic_score = reranker_score * settings.SEMANTIC_WEIGHT
21
+
22
+ rating_score = _calculate_rating_score(metadata.get("rating", 0.0))
23
+ availability_score = _calculate_availability_score(metadata.get("available_slots", 0))
24
+
25
+ rule_based_score = (
26
+ rating_score * 0.5 +
27
+ availability_score * 0.5
28
+ ) * settings.RULE_BASED_WEIGHT
29
+
30
+ final_score = semantic_score + rule_based_score
31
+
32
+ original_score = mentor.get("score", 0.0)
33
+
34
+ reason = _generate_reason(
35
+ reranker_score,
36
+ metadata,
37
+ mentee_data,
38
+ rating_score,
39
+ availability_score
40
+ )
41
+
42
+ scored_mentors.append({
43
+ "mentor_id": mentor["mentor_id"],
44
+ "score": final_score,
45
+ "semantic_similarity": original_score,
46
+ "reranker_score": reranker_score,
47
+ "metadata": metadata,
48
+ "reason": reason
49
+ })
50
+
51
+ scored_mentors.sort(key=lambda x: x["score"], reverse=True)
52
+
53
+ return scored_mentors[:final_count]
54
+
55
+ def _calculate_rating_score(rating: float) -> float:
56
+ if rating <= 0:
57
+ return 0.0
58
+ return min(rating / 5.0, 1.0)
59
+
60
+ def _calculate_availability_score(available_slots: int) -> float:
61
+ if available_slots <= 0:
62
+ return 0.0
63
+ if available_slots >= 10:
64
+ return 1.0
65
+ return min(available_slots / 10.0, 1.0)
66
+
67
+
68
+ def _generate_reason(
69
+ reranker_score: float,
70
+ metadata: Dict[str, Any],
71
+ mentee_data: Dict[str, Any],
72
+ rating_score: float,
73
+ availability_score: float
74
+ ) -> str:
75
+ reasons = []
76
+
77
+ if reranker_score >= 0.8:
78
+ reasons.append("Highly relevant expertise")
79
+ elif reranker_score >= 0.6:
80
+ reasons.append("Good match for your goals")
81
+
82
+ rating = metadata.get("rating", 0.0)
83
+ if rating >= 4.5:
84
+ reasons.append("Excellent ratings")
85
+ elif rating >= 4.0:
86
+ reasons.append("High ratings")
87
+
88
+ available_slots = metadata.get("available_slots", 0)
89
+ if available_slots > 0:
90
+ reasons.append("Has available slots")
91
+
92
+ if not reasons:
93
+ reasons.append("Good overall match")
94
+
95
+ return "; ".join(reasons[:3])
96
+
97
+
98
+
utils/text_builder.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Optional
2
+ from datetime import datetime
3
+
4
+ def build_mentor_text(mentor_data: Dict[str, Any]) -> str:
5
+ parts = []
6
+
7
+ full_name = mentor_data.get("full_name", "")
8
+ if full_name:
9
+ parts.append(f"Mentor Profile: {full_name}")
10
+
11
+ bio = mentor_data.get("bio", "")
12
+ if bio:
13
+ parts.append(f"Bio: {bio}")
14
+
15
+ career = mentor_data.get("career", {})
16
+ if isinstance(career, dict) and career.get("name"):
17
+ parts.append(f"Career: {career['name']}")
18
+ elif isinstance(career, str):
19
+ parts.append(f"Career: {career}")
20
+
21
+ skills = mentor_data.get("skills", [])
22
+ if skills:
23
+ skill_names = []
24
+ for skill in skills:
25
+ if isinstance(skill, dict):
26
+ skill_names.append(skill.get("name", ""))
27
+ elif isinstance(skill, str):
28
+ skill_names.append(skill)
29
+ if skill_names:
30
+ parts.append(f"Skills: {', '.join(skill_names)}")
31
+
32
+ domains = mentor_data.get("domains", [])
33
+ if domains:
34
+ domain_names = []
35
+ for domain in domains:
36
+ if isinstance(domain, dict):
37
+ domain_names.append(domain.get("name", ""))
38
+ elif isinstance(domain, str):
39
+ domain_names.append(domain)
40
+ if domain_names:
41
+ parts.append(f"Domains: {', '.join(domain_names)}")
42
+
43
+ experiences = mentor_data.get("experiences", [])
44
+ if experiences:
45
+ exp_parts = []
46
+ for exp in experiences:
47
+ if isinstance(exp, dict):
48
+ position = exp.get("position", "")
49
+ company = exp.get("company", "")
50
+ start_date = exp.get("start_date", "")
51
+ end_date = exp.get("end_date", "") or "Present"
52
+ description = exp.get("description", "")
53
+
54
+ exp_str = f" - {position}"
55
+ if company:
56
+ exp_str += f" at {company}"
57
+ if start_date:
58
+ exp_str += f" ({start_date} - {end_date})"
59
+ if description:
60
+ exp_str += f": {description}"
61
+ exp_parts.append(exp_str)
62
+
63
+ if exp_parts:
64
+ parts.append("Experience Details:")
65
+ parts.extend(exp_parts)
66
+
67
+ educations = mentor_data.get("educations", [])
68
+ if educations:
69
+ edu_parts = []
70
+ for edu in educations:
71
+ if isinstance(edu, dict):
72
+ degree = edu.get("degree", "")
73
+ school = edu.get("school", "")
74
+ start_date = edu.get("start_date", "")
75
+ end_date = edu.get("end_date", "")
76
+ description = edu.get("description", "")
77
+
78
+ edu_str = " -"
79
+ if degree:
80
+ edu_str += f" {degree}"
81
+ if school:
82
+ edu_str += f" from {school}"
83
+ if start_date:
84
+ edu_str += f" ({start_date} - {end_date})"
85
+ if description:
86
+ edu_str += f": {description}"
87
+ edu_parts.append(edu_str)
88
+
89
+ if edu_parts:
90
+ parts.append("Education:")
91
+ parts.extend(edu_parts)
92
+
93
+ rating = mentor_data.get("rating", 0.0)
94
+ total_ratings = mentor_data.get("total_ratings", 0)
95
+ if rating or total_ratings:
96
+ parts.append(f"Rating: {rating:.1f}/5.0 ({total_ratings} reviews)")
97
+
98
+ session_count = mentor_data.get("session_count", 0)
99
+ if session_count:
100
+ parts.append(f"Sessions Conducted: {session_count}")
101
+
102
+ available_slots = mentor_data.get("available_slots", 0)
103
+ if available_slots:
104
+ parts.append(f"Available Slots: {available_slots}")
105
+
106
+ schedules = mentor_data.get("schedules", [])
107
+ if schedules:
108
+ schedule_summary = _build_schedule_summary(schedules)
109
+ if schedule_summary:
110
+ parts.append(f"Availability: {schedule_summary}")
111
+
112
+ return "\n".join(parts)
113
+
114
+ def build_mentee_query_text(mentee_data: Dict[str, Any]) -> str:
115
+ parts = ["Looking for mentor to help with:"]
116
+
117
+ goals = mentee_data.get("goals", "")
118
+ if goals:
119
+ parts.append(f"Goals: {goals}")
120
+
121
+ desired_skills = mentee_data.get("desired_skills", [])
122
+ if desired_skills:
123
+ skill_names = []
124
+ for skill in desired_skills:
125
+ if isinstance(skill, dict):
126
+ skill_names.append(skill.get("name", ""))
127
+ elif isinstance(skill, str):
128
+ skill_names.append(skill)
129
+ if skill_names:
130
+ parts.append(f"Desired Skills: {', '.join(skill_names)}")
131
+
132
+ current_skills = mentee_data.get("current_skills", [])
133
+ if current_skills:
134
+ skill_names = []
135
+ for skill in current_skills:
136
+ if isinstance(skill, dict):
137
+ skill_names.append(skill.get("name", ""))
138
+ elif isinstance(skill, str):
139
+ skill_names.append(skill)
140
+ if skill_names:
141
+ parts.append(f"Current Skills: {', '.join(skill_names)}")
142
+
143
+ interests = mentee_data.get("interests", [])
144
+ domains = mentee_data.get("domains", [])
145
+ if interests:
146
+ domain_names = []
147
+ for domain in interests:
148
+ if isinstance(domain, dict):
149
+ domain_names.append(domain.get("name", ""))
150
+ elif isinstance(domain, str):
151
+ domain_names.append(domain)
152
+ if domain_names:
153
+ parts.append(f"Interests: {', '.join(domain_names)}")
154
+ elif domains:
155
+ domain_names = []
156
+ for domain in domains:
157
+ if isinstance(domain, dict):
158
+ domain_names.append(domain.get("name", ""))
159
+ elif isinstance(domain, str):
160
+ domain_names.append(domain)
161
+ if domain_names:
162
+ parts.append(f"Interests: {', '.join(domain_names)}")
163
+
164
+ availability = mentee_data.get("availability", "")
165
+ preferred_availability = mentee_data.get("preferred_availability", "")
166
+ if availability or preferred_availability:
167
+ parts.append(f"Preferred Availability: {availability or preferred_availability}")
168
+
169
+ return "\n".join(parts)
170
+
171
+ def _build_schedule_summary(schedules: List[Dict[str, Any]]) -> str:
172
+ if not schedules:
173
+ return ""
174
+
175
+ day_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
176
+ active_schedules = [s for s in schedules if s.get("is_active", 1) == 1]
177
+
178
+ if not active_schedules:
179
+ return ""
180
+
181
+ schedule_groups = {}
182
+ for schedule in active_schedules:
183
+ day = schedule.get("day_of_week", 0)
184
+ start = schedule.get("start_time", "")
185
+ end = schedule.get("end_time", "")
186
+
187
+ if day < 7:
188
+ day_name = day_names[day]
189
+ time_str = f"{start}-{end}" if start and end else ""
190
+ if day_name not in schedule_groups:
191
+ schedule_groups[day_name] = []
192
+ if time_str:
193
+ schedule_groups[day_name].append(time_str)
194
+
195
+ if not schedule_groups:
196
+ return ""
197
+
198
+ summary_parts = []
199
+ for day, times in sorted(schedule_groups.items()):
200
+ if times:
201
+ summary_parts.append(f"{day} {', '.join(times)}")
202
+
203
+ return "; ".join(summary_parts)
204
+
205
+
206
+