GitHub Actions commited on
Commit
31e79c4
·
0 Parent(s):

Deploy FastAPI backend (backend.main:app) via GitHub Actions

Browse files
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1
5
+
6
+ WORKDIR /app
7
+
8
+ COPY requirements.txt /app/requirements.txt
9
+ RUN pip install --no-cache-dir -r /app/requirements.txt
10
+
11
+ # Copy the backend package under /app/backend
12
+ COPY backend /app/backend
13
+
14
+ # Ensure /app is on Python path (it is by default as WORKDIR)
15
+ ENV PORT=7860
16
+ EXPOSE 7860
17
+
18
+ # Start FastAPI using the package path backend.main:app
19
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: TalentScout AI Backend
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+ FastAPI backend (Docker Space) serving at port 7860.
10
+ Swagger: /docs
backend/__init__.py ADDED
File without changes
backend/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (165 Bytes). View file
 
backend/__pycache__/llm_client.cpython-313.pyc ADDED
Binary file (2.18 kB). View file
 
backend/__pycache__/main.cpython-313.pyc ADDED
Binary file (6.13 kB). View file
 
backend/__pycache__/models.cpython-313.pyc ADDED
Binary file (1.92 kB). View file
 
backend/__pycache__/pdf_parser.cpython-313.pyc ADDED
Binary file (4.59 kB). View file
 
backend/__pycache__/qdrant_client.cpython-313.pyc ADDED
Binary file (3.48 kB). View file
 
backend/__pycache__/sentiment_utils.cpython-313.pyc ADDED
Binary file (554 Bytes). View file
 
backend/__pycache__/session_utils.cpython-313.pyc ADDED
Binary file (744 Bytes). View file
 
backend/llm_client.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ api_key = os.getenv("GROQ_API_KEY")
8
+ client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
9
+ model_name = "llama-3.1-8b-instant"
10
+
11
+ def generate_technical_questions(tech_stack):
12
+ prompt = (
13
+ "You are an expert technical interviewer. Create 3-5 concise and relevant technical "
14
+ "questions to assess a candidate's proficiency in the following technologies: "
15
+ + ", ".join(tech_stack) +
16
+ ". Include conceptual, practical, and problem-solving questions."
17
+ )
18
+ messages = [
19
+ {"role": "system", "content": "You are a helpful, expert interviewer."},
20
+ {"role": "user", "content": prompt}
21
+ ]
22
+ response = client.chat.completions.create(
23
+ model=model_name,
24
+ messages=messages,
25
+ stream=False
26
+ )
27
+ content = response.choices[0].message.content.strip()
28
+ questions = [q.strip() for q in content.split("\n") if q.strip()]
29
+ return questions[:5]
30
+
31
+ def chat_with_llm(messages):
32
+ """Handle chat conversation with LLM"""
33
+ response = client.chat.completions.create(
34
+ model=model_name,
35
+ messages=messages,
36
+ stream=False
37
+ )
38
+ return response.choices[0].message.content.strip()
backend/main.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from fastapi import FastAPI, HTTPException, Body, UploadFile, File
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from tempfile import NamedTemporaryFile
5
+ from pydantic import BaseModel
6
+ from .sentiment_utils import analyze_sentiment
7
+ from .models import CandidateInfo, TechQuestionsRequest, TechQuestionsResponse, CandidateSessionId
8
+ from .llm_client import generate_technical_questions, chat_with_llm
9
+ from .qdrant_client import create_collection, store_candidate
10
+ from .session_utils import delete_session
11
+ from .pdf_parser import extract_text_from_pdf, parse_resume_text
12
+
13
+ app = FastAPI()
14
+
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ class ChatRequest(BaseModel):
24
+ session_id: str
25
+ user_message: str
26
+ conversation_history: list # List of past messages (user/assistant)
27
+
28
+ class ChatResponse(BaseModel):
29
+ reply: str
30
+
31
+ @app.on_event("startup")
32
+ async def startup_event():
33
+ create_collection()
34
+
35
+ @app.get("/")
36
+ def root():
37
+ return {"status": "ok", "service": "talentscout-backend"}
38
+
39
+
40
+ @app.get("/greet")
41
+ def greet():
42
+ return {
43
+ "message": "Hello! I'm TalentScout's AI Hiring Assistant. "
44
+ "I will guide you through the initial screening process."
45
+ }
46
+
47
+ @app.post("/candidate-info")
48
+ def save_candidate(candidate: CandidateInfo):
49
+ try:
50
+ session_id = str(uuid.uuid4())
51
+ candidate_dict = candidate.dict()
52
+ candidate_dict["session_id"] = session_id
53
+ store_candidate(candidate_dict)
54
+ return {
55
+ "status": "success",
56
+ "message": "Candidate info stored.",
57
+ "session_id": session_id
58
+ }
59
+ except Exception as e:
60
+ raise HTTPException(status_code=500, detail=str(e))
61
+
62
+ @app.post("/tech-questions", response_model=TechQuestionsResponse)
63
+ def get_technical_questions(req: TechQuestionsRequest):
64
+ questions = generate_technical_questions(req.tech_stack)
65
+ return TechQuestionsResponse(questions=questions)
66
+
67
+ @app.post("/parse-resume")
68
+ async def parse_resume(file: UploadFile = File(...)):
69
+ try:
70
+ # Create a temporary file that won't be auto-deleted
71
+ import tempfile
72
+ import os
73
+
74
+ # Create temp file with proper suffix
75
+ temp_fd, temp_path = tempfile.mkstemp(suffix='.pdf')
76
+
77
+ try:
78
+ # Write the uploaded file content to temp file
79
+ contents = await file.read()
80
+ with os.fdopen(temp_fd, 'wb') as tmp_file:
81
+ tmp_file.write(contents)
82
+
83
+ # Now extract text from the saved temp file
84
+ text = extract_text_from_pdf(temp_path)
85
+ parsed_data = parse_resume_text(text)
86
+
87
+ return {"status": "success", "parsed_data": parsed_data}
88
+
89
+ finally:
90
+ # Clean up: remove the temporary file
91
+ if os.path.exists(temp_path):
92
+ os.unlink(temp_path)
93
+
94
+ except Exception as e:
95
+ raise HTTPException(status_code=500, detail=f"Failed to parse resume: {e}")
96
+
97
+ @app.post("/chat", response_model=ChatResponse)
98
+ def chat_endpoint(req: ChatRequest):
99
+ try:
100
+ sentiment = analyze_sentiment(req.user_message)
101
+
102
+ # Build properly sanitized messages array
103
+ messages = [{"role": "system", "content": "You are a professional interviewer. Ask candidate questions based on context. Be polite and adaptive."}]
104
+
105
+ # Safely process conversation history
106
+ for msg in req.conversation_history:
107
+ if isinstance(msg, dict):
108
+ role = msg.get("role")
109
+ content = msg.get("content")
110
+ if role in ("user", "assistant", "system") and isinstance(content, str) and content.strip():
111
+ messages.append({"role": role, "content": content})
112
+
113
+ messages.append({"role": "user", "content": req.user_message})
114
+
115
+ reply_text = chat_with_llm(messages)
116
+ return ChatResponse(reply=reply_text, sentiment=sentiment)
117
+ except Exception as e:
118
+ raise HTTPException(status_code=500, detail=str(e))
119
+
120
+
121
+ @app.post("/clear-session")
122
+ def clear_session(session: CandidateSessionId = Body(...)):
123
+ if delete_session(session.session_id):
124
+ return {"status": "success", "message": "Session data cleared."}
125
+ else:
126
+ raise HTTPException(status_code=500, detail="Failed to clear session data.")
backend/models.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, EmailStr
2
+ from typing import List, Optional
3
+
4
+ class CandidateInfo(BaseModel):
5
+ full_name: str
6
+ email: EmailStr
7
+ phone: str
8
+ years_experience: int
9
+ desired_position: str
10
+ current_location: str
11
+ tech_stack: List[str]
12
+ education: Optional[str] = None
13
+ current_role: Optional[str] = None
14
+ linkedin: Optional[str] = None
15
+ github: Optional[str] = None
16
+ portfolio: Optional[str] = None
17
+
18
+ class TechQuestionsRequest(BaseModel):
19
+ tech_stack: List[str]
20
+
21
+ class TechQuestionsResponse(BaseModel):
22
+ questions: List[str]
23
+
24
+ class CandidateSessionId(BaseModel):
25
+ session_id: str
26
+
27
+ class ChatResponse(BaseModel):
28
+ reply: str
29
+ sentiment: Optional[str] = None
backend/pdf_parser.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymupdf as fitz
2
+ import re
3
+ from difflib import get_close_matches
4
+
5
+ def extract_text_from_pdf(file_path: str) -> str:
6
+ doc = fitz.open(file_path)
7
+ text = ""
8
+ for page in doc:
9
+ text += page.get_text()
10
+ doc.close()
11
+ return text
12
+
13
+ def parse_resume_text(text: str) -> dict:
14
+ """Enhanced resume parsing with skill validation"""
15
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
16
+ text_lower = text.lower()
17
+
18
+ extracted = {
19
+ "name": "",
20
+ "email": "",
21
+ "phone": "",
22
+ "skills": [],
23
+ "experience": ""
24
+ }
25
+
26
+ # Valid skills database for matching
27
+ valid_skills = [
28
+ 'FastAPI', 'React', 'Next.js', 'Flask', 'MongoDB', 'Tailwind CSS',
29
+ 'Machine Learning', 'Python', 'JavaScript', 'HTML', 'CSS', 'Node.js',
30
+ 'Docker', 'Kubernetes', 'AWS', 'Git', 'GitHub', 'TensorFlow', 'PyTorch',
31
+ 'Streamlit', 'Qdrant', 'LangChain', 'Gemini API', 'OpenAI', 'Gradio',
32
+ 'Pandas', 'NumPy', 'Scikit-learn', 'OpenCV', 'Django', 'Vue.js',
33
+ 'Angular', 'TypeScript', 'PostgreSQL', 'MySQL', 'Redis', 'GraphQL',
34
+ 'RESTful API', 'Microservices', 'CI/CD', 'Linux', 'Ubuntu', 'Nginx',
35
+ 'Apache', 'Jenkins', 'Terraform', 'Ansible', 'Elasticsearch'
36
+ ]
37
+
38
+ # Extract Email using regex
39
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
40
+ email_match = re.search(email_pattern, text)
41
+ if email_match:
42
+ extracted["email"] = email_match.group()
43
+
44
+ # Extract Phone using regex
45
+ phone_pattern = r'\b(?:\+91|91)?[6-9]\d{9}\b'
46
+ phone_match = re.search(phone_pattern, text)
47
+ if phone_match:
48
+ extracted["phone"] = phone_match.group()
49
+
50
+ # Extract Name
51
+ for i, line in enumerate(lines[:10]):
52
+ skip_keywords = ['course', 'email', 'mobile', 'cgpa', 'academic', 'details']
53
+ if any(keyword in line.lower() for keyword in skip_keywords):
54
+ continue
55
+
56
+ if re.match(r'^[A-Z][A-Z\s]+$', line) and len(line.split()) >= 2:
57
+ extracted["name"] = line.title()
58
+ break
59
+
60
+ # Extract and clean skills
61
+ raw_skills = []
62
+
63
+ # Look for explicit skill mentions
64
+ for skill in valid_skills:
65
+ if skill.lower() in text_lower:
66
+ raw_skills.append(skill)
67
+
68
+ # Extract from common skill patterns
69
+ skill_patterns = [
70
+ r'built with (.*?)(?:\.|,|;|\n)',
71
+ r'using (.*?)(?:\.|,|;|\n)',
72
+ r'technologies?:?\s*(.*?)(?:\.|,|;|\n)',
73
+ r'skills?:?\s*(.*?)(?:\.|,|;|\n)',
74
+ r'stack:?\s*(.*?)(?:\.|,|;|\n)'
75
+ ]
76
+
77
+ for pattern in skill_patterns:
78
+ matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
79
+ for match in matches:
80
+ # Split by common delimiters
81
+ words = re.split(r'[,\.\sand\s&\s]+', match.strip())
82
+ for word in words:
83
+ word = word.strip()
84
+ if len(word) > 2:
85
+ # Try to match with valid skills using fuzzy matching
86
+ close_matches = get_close_matches(word, valid_skills, n=1, cutoff=0.7)
87
+ if close_matches:
88
+ raw_skills.append(close_matches[0])
89
+
90
+ # Remove duplicates and limit
91
+ extracted["skills"] = list(set(raw_skills))[:12]
92
+
93
+ # Extract Experience
94
+ exp_patterns = [
95
+ r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
96
+ r'experience\s*:?\s*(\d+)\+?\s*years?'
97
+ ]
98
+
99
+ for pattern in exp_patterns:
100
+ match = re.search(pattern, text_lower)
101
+ if match:
102
+ extracted["experience"] = f"{match.group(1)} years"
103
+ break
104
+
105
+ if not extracted["experience"]:
106
+ if 'intern' in text_lower and 'b.tech' in text_lower:
107
+ extracted["experience"] = "0-1 years (Student/Intern)"
108
+ else:
109
+ extracted["experience"] = "Fresher"
110
+
111
+ return extracted
backend/qdrant_client.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from qdrant_client import QdrantClient
2
+ from qdrant_client.models import VectorParams, Distance, PayloadSchemaType
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ QDRANT_HOST = os.getenv(
9
+ "QDRANT_HOST",
10
+ "https://9485db48-8672-469a-a917-41a4ebbfd533.us-east4-0.gcp.cloud.qdrant.io"
11
+ )
12
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
13
+
14
+ COLLECTION_NAME = "TalentScout"
15
+
16
+ qdrant_client = QdrantClient(
17
+ url=QDRANT_HOST,
18
+ api_key=QDRANT_API_KEY,
19
+ prefer_grpc=False,
20
+ timeout=30,
21
+ check_compatibility=False,
22
+ )
23
+
24
+ def create_collection():
25
+ collections = [col.name for col in qdrant_client.get_collections().collections]
26
+ if COLLECTION_NAME not in collections:
27
+ qdrant_client.create_collection(
28
+ collection_name=COLLECTION_NAME,
29
+ vectors_config=VectorParams(size=128, distance=Distance.COSINE),
30
+ )
31
+ # Create payload indexes for filtering
32
+ for field in ["session_id", "email"]:
33
+ try:
34
+ qdrant_client.create_payload_index(
35
+ collection_name=COLLECTION_NAME,
36
+ field_name=field,
37
+ field_schema=PayloadSchemaType.KEYWORD,
38
+ )
39
+ except Exception as e:
40
+ if "already exists" in str(e).lower():
41
+ pass
42
+ else:
43
+ print(f"Error creating index for {field}: {e}")
44
+
45
+ def store_candidate(candidate_dict):
46
+ dummy_vector = [float(hash(candidate_dict.get("full_name", "")) % 1) for _ in range(128)]
47
+ qdrant_client.upsert(
48
+ collection_name=COLLECTION_NAME,
49
+ points=[
50
+ {
51
+ "id": hash(candidate_dict.get("email", "")) % (10 ** 8),
52
+ "payload": candidate_dict,
53
+ "vector": dummy_vector
54
+ }
55
+ ]
56
+ )
57
+
58
+ def delete_session_data(session_id: str) -> bool:
59
+ from qdrant_client.models import Filter, FieldCondition, MatchValue, FilterSelector
60
+
61
+ session_filter_condition = Filter(
62
+ must=[
63
+ FieldCondition(
64
+ key="session_id",
65
+ match=MatchValue(value=session_id)
66
+ )
67
+ ]
68
+ )
69
+
70
+ try:
71
+ qdrant_client.delete(
72
+ collection_name=COLLECTION_NAME,
73
+ points_selector=FilterSelector(filter=session_filter_condition)
74
+ )
75
+ print(f"Deleted all points with session_id={session_id} from {COLLECTION_NAME}.")
76
+ return True
77
+ except Exception as e:
78
+ print(f"Error deleting session data for session_id={session_id}: {e}")
79
+ return False
backend/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #requirements.txt
2
+ fastapi
3
+ uvicorn
4
+ qdrant-client
5
+ pydantic
6
+ python-dotenv
7
+ pydantic[email]
8
+ openai
9
+ PyMuPDF
10
+ python-multipart
11
+ textblob
backend/sentiment_utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from textblob import TextBlob
2
+
3
+ def analyze_sentiment(text):
4
+ blob = TextBlob(text)
5
+ polarity = blob.sentiment.polarity
6
+ if polarity > 0.25:
7
+ return "positive"
8
+ elif polarity < -0.25:
9
+ return "negative"
10
+ else:
11
+ return "neutral"
backend/session_utils.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .qdrant_client import delete_session_data
2
+
3
+ def delete_session(session_id: str) -> bool:
4
+ print(f"Initiating deletion for session_id={session_id}")
5
+ success = delete_session_data(session_id)
6
+ if success:
7
+ print(f"Session data for {session_id} deleted successfully.")
8
+ else:
9
+ print(f"Failed to delete session data for {session_id}.")
10
+ return success
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #requirements.txt
2
+ fastapi
3
+ uvicorn
4
+ qdrant-client
5
+ pydantic
6
+ python-dotenv
7
+ pydantic[email]
8
+ openai
9
+ PyMuPDF
10
+ python-multipart
11
+ textblob