Spaces:

Jayanthk2004
/

TalentScout-Ai

Sleeping

App Files Files Community

GitHub Actions commited on Aug 31, 2025

Commit

31e79c4

0 Parent(s):

Deploy FastAPI backend (backend.main:app) via GitHub Actions

Browse files

Files changed (20) hide show

Dockerfile +19 -0
README.md +10 -0
backend/__init__.py +0 -0
backend/__pycache__/__init__.cpython-313.pyc +0 -0
backend/__pycache__/llm_client.cpython-313.pyc +0 -0
backend/__pycache__/main.cpython-313.pyc +0 -0
backend/__pycache__/models.cpython-313.pyc +0 -0
backend/__pycache__/pdf_parser.cpython-313.pyc +0 -0
backend/__pycache__/qdrant_client.cpython-313.pyc +0 -0
backend/__pycache__/sentiment_utils.cpython-313.pyc +0 -0
backend/__pycache__/session_utils.cpython-313.pyc +0 -0
backend/llm_client.py +38 -0
backend/main.py +126 -0
backend/models.py +29 -0
backend/pdf_parser.py +111 -0
backend/qdrant_client.py +79 -0
backend/requirements.txt +11 -0
backend/sentiment_utils.py +11 -0
backend/session_utils.py +10 -0
requirements.txt +11 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.10-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+WORKDIR /app
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+# Copy the backend package under /app/backend
+COPY backend /app/backend
+# Ensure /app is on Python path (it is by default as WORKDIR)
+ENV PORT=7860
+EXPOSE 7860
+# Start FastAPI using the package path backend.main:app
+CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: TalentScout AI Backend
+emoji: 🤖
+colorFrom: blue
+colorTo: green
+sdk: docker
+pinned: false
+---
+FastAPI backend (Docker Space) serving at port 7860.
+Swagger: /docs

backend/__init__.py ADDED Viewed

File without changes

backend/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (165 Bytes). View file

backend/__pycache__/llm_client.cpython-313.pyc ADDED Viewed

Binary file (2.18 kB). View file

backend/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (6.13 kB). View file

backend/__pycache__/models.cpython-313.pyc ADDED Viewed

Binary file (1.92 kB). View file

backend/__pycache__/pdf_parser.cpython-313.pyc ADDED Viewed

Binary file (4.59 kB). View file

backend/__pycache__/qdrant_client.cpython-313.pyc ADDED Viewed

Binary file (3.48 kB). View file

backend/__pycache__/sentiment_utils.cpython-313.pyc ADDED Viewed

Binary file (554 Bytes). View file

backend/__pycache__/session_utils.cpython-313.pyc ADDED Viewed

Binary file (744 Bytes). View file

backend/llm_client.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+api_key = os.getenv("GROQ_API_KEY")
+client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
+model_name = "llama-3.1-8b-instant"
+def generate_technical_questions(tech_stack):
+    prompt = (
+        "You are an expert technical interviewer. Create 3-5 concise and relevant technical "
+        "questions to assess a candidate's proficiency in the following technologies: "
+        + ", ".join(tech_stack) +
+        ". Include conceptual, practical, and problem-solving questions."
+    )
+    messages = [
+        {"role": "system", "content": "You are a helpful, expert interviewer."},
+        {"role": "user", "content": prompt}
+    ]
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        stream=False
+    )
+    content = response.choices[0].message.content.strip()
+    questions = [q.strip() for q in content.split("\n") if q.strip()]
+    return questions[:5]
+def chat_with_llm(messages):
+    """Handle chat conversation with LLM"""
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        stream=False
+    )
+    return response.choices[0].message.content.strip()

backend/main.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import uuid
+from fastapi import FastAPI, HTTPException, Body, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from tempfile import NamedTemporaryFile
+from pydantic import BaseModel
+from .sentiment_utils import analyze_sentiment
+from .models import CandidateInfo, TechQuestionsRequest, TechQuestionsResponse, CandidateSessionId
+from .llm_client import generate_technical_questions, chat_with_llm
+from .qdrant_client import create_collection, store_candidate
+from .session_utils import delete_session
+from .pdf_parser import extract_text_from_pdf, parse_resume_text
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class ChatRequest(BaseModel):
+    session_id: str
+    user_message: str
+    conversation_history: list  # List of past messages (user/assistant)
+class ChatResponse(BaseModel):
+    reply: str
+@app.on_event("startup")
+async def startup_event():
+    create_collection()
+@app.get("/")
+def root():
+    return {"status": "ok", "service": "talentscout-backend"}
+@app.get("/greet")
+def greet():
+    return {
+        "message": "Hello! I'm TalentScout's AI Hiring Assistant. "
+                   "I will guide you through the initial screening process."
+    }
+@app.post("/candidate-info")
+def save_candidate(candidate: CandidateInfo):
+    try:
+        session_id = str(uuid.uuid4())
+        candidate_dict = candidate.dict()
+        candidate_dict["session_id"] = session_id
+        store_candidate(candidate_dict)
+        return {
+            "status": "success",
+            "message": "Candidate info stored.",
+            "session_id": session_id
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/tech-questions", response_model=TechQuestionsResponse)
+def get_technical_questions(req: TechQuestionsRequest):
+    questions = generate_technical_questions(req.tech_stack)
+    return TechQuestionsResponse(questions=questions)
+@app.post("/parse-resume")
+async def parse_resume(file: UploadFile = File(...)):
+    try:
+        # Create a temporary file that won't be auto-deleted
+        import tempfile
+        import os
+        # Create temp file with proper suffix
+        temp_fd, temp_path = tempfile.mkstemp(suffix='.pdf')
+        try:
+            # Write the uploaded file content to temp file
+            contents = await file.read()
+            with os.fdopen(temp_fd, 'wb') as tmp_file:
+                tmp_file.write(contents)
+            # Now extract text from the saved temp file
+            text = extract_text_from_pdf(temp_path)
+            parsed_data = parse_resume_text(text)
+            return {"status": "success", "parsed_data": parsed_data}
+        finally:
+            # Clean up: remove the temporary file
+            if os.path.exists(temp_path):
+                os.unlink(temp_path)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to parse resume: {e}")
+@app.post("/chat", response_model=ChatResponse)
+def chat_endpoint(req: ChatRequest):
+    try:
+        sentiment = analyze_sentiment(req.user_message)
+        # Build properly sanitized messages array
+        messages = [{"role": "system", "content": "You are a professional interviewer. Ask candidate questions based on context. Be polite and adaptive."}]
+        # Safely process conversation history
+        for msg in req.conversation_history:
+            if isinstance(msg, dict):
+                role = msg.get("role")
+                content = msg.get("content")
+                if role in ("user", "assistant", "system") and isinstance(content, str) and content.strip():
+                    messages.append({"role": role, "content": content})
+        messages.append({"role": "user", "content": req.user_message})
+        reply_text = chat_with_llm(messages)
+        return ChatResponse(reply=reply_text, sentiment=sentiment)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/clear-session")
+def clear_session(session: CandidateSessionId = Body(...)):
+    if delete_session(session.session_id):
+        return {"status": "success", "message": "Session data cleared."}
+    else:
+        raise HTTPException(status_code=500, detail="Failed to clear session data.")

backend/models.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pydantic import BaseModel, EmailStr
+from typing import List, Optional
+class CandidateInfo(BaseModel):
+    full_name: str
+    email: EmailStr
+    phone: str
+    years_experience: int
+    desired_position: str
+    current_location: str
+    tech_stack: List[str]
+    education: Optional[str] = None
+    current_role: Optional[str] = None
+    linkedin: Optional[str] = None
+    github: Optional[str] = None
+    portfolio: Optional[str] = None
+class TechQuestionsRequest(BaseModel):
+    tech_stack: List[str]
+class TechQuestionsResponse(BaseModel):
+    questions: List[str]
+class CandidateSessionId(BaseModel):
+    session_id: str
+class ChatResponse(BaseModel):
+    reply: str
+    sentiment: Optional[str] = None

backend/pdf_parser.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import pymupdf as fitz
+import re
+from difflib import get_close_matches
+def extract_text_from_pdf(file_path: str) -> str:
+    doc = fitz.open(file_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    doc.close()
+    return text
+def parse_resume_text(text: str) -> dict:
+    """Enhanced resume parsing with skill validation"""
+    lines = [line.strip() for line in text.split('\n') if line.strip()]
+    text_lower = text.lower()
+    extracted = {
+        "name": "",
+        "email": "",
+        "phone": "",
+        "skills": [],
+        "experience": ""
+    }
+    # Valid skills database for matching
+    valid_skills = [
+        'FastAPI', 'React', 'Next.js', 'Flask', 'MongoDB', 'Tailwind CSS',
+        'Machine Learning', 'Python', 'JavaScript', 'HTML', 'CSS', 'Node.js',
+        'Docker', 'Kubernetes', 'AWS', 'Git', 'GitHub', 'TensorFlow', 'PyTorch',
+        'Streamlit', 'Qdrant', 'LangChain', 'Gemini API', 'OpenAI', 'Gradio',
+        'Pandas', 'NumPy', 'Scikit-learn', 'OpenCV', 'Django', 'Vue.js',
+        'Angular', 'TypeScript', 'PostgreSQL', 'MySQL', 'Redis', 'GraphQL',
+        'RESTful API', 'Microservices', 'CI/CD', 'Linux', 'Ubuntu', 'Nginx',
+        'Apache', 'Jenkins', 'Terraform', 'Ansible', 'Elasticsearch'
+    ]
+    # Extract Email using regex
+    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+    email_match = re.search(email_pattern, text)
+    if email_match:
+        extracted["email"] = email_match.group()
+    # Extract Phone using regex
+    phone_pattern = r'\b(?:\+91|91)?[6-9]\d{9}\b'
+    phone_match = re.search(phone_pattern, text)
+    if phone_match:
+        extracted["phone"] = phone_match.group()
+    # Extract Name
+    for i, line in enumerate(lines[:10]):
+        skip_keywords = ['course', 'email', 'mobile', 'cgpa', 'academic', 'details']
+        if any(keyword in line.lower() for keyword in skip_keywords):
+            continue
+        if re.match(r'^[A-Z][A-Z\s]+$', line) and len(line.split()) >= 2:
+            extracted["name"] = line.title()
+            break
+    # Extract and clean skills
+    raw_skills = []
+    # Look for explicit skill mentions
+    for skill in valid_skills:
+        if skill.lower() in text_lower:
+            raw_skills.append(skill)
+    # Extract from common skill patterns
+    skill_patterns = [
+        r'built with (.*?)(?:\.|,|;|\n)',
+        r'using (.*?)(?:\.|,|;|\n)',
+        r'technologies?:?\s*(.*?)(?:\.|,|;|\n)',
+        r'skills?:?\s*(.*?)(?:\.|,|;|\n)',
+        r'stack:?\s*(.*?)(?:\.|,|;|\n)'
+    ]
+    for pattern in skill_patterns:
+        matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
+        for match in matches:
+            # Split by common delimiters
+            words = re.split(r'[,\.\sand\s&\s]+', match.strip())
+            for word in words:
+                word = word.strip()
+                if len(word) > 2:
+                    # Try to match with valid skills using fuzzy matching
+                    close_matches = get_close_matches(word, valid_skills, n=1, cutoff=0.7)
+                    if close_matches:
+                        raw_skills.append(close_matches[0])
+    # Remove duplicates and limit
+    extracted["skills"] = list(set(raw_skills))[:12]
+    # Extract Experience
+    exp_patterns = [
+        r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
+        r'experience\s*:?\s*(\d+)\+?\s*years?'
+    ]
+    for pattern in exp_patterns:
+        match = re.search(pattern, text_lower)
+        if match:
+            extracted["experience"] = f"{match.group(1)} years"
+            break
+    if not extracted["experience"]:
+        if 'intern' in text_lower and 'b.tech' in text_lower:
+            extracted["experience"] = "0-1 years (Student/Intern)"
+        else:
+            extracted["experience"] = "Fresher"
+    return extracted

backend/qdrant_client.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from qdrant_client import QdrantClient
+from qdrant_client.models import VectorParams, Distance, PayloadSchemaType
+import os
+from dotenv import load_dotenv
+load_dotenv()
+QDRANT_HOST = os.getenv(
+    "QDRANT_HOST",
+    "https://9485db48-8672-469a-a917-41a4ebbfd533.us-east4-0.gcp.cloud.qdrant.io"
+)
+QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+COLLECTION_NAME = "TalentScout"
+qdrant_client = QdrantClient(
+    url=QDRANT_HOST,
+    api_key=QDRANT_API_KEY,
+    prefer_grpc=False,
+    timeout=30,
+    check_compatibility=False,
+)
+def create_collection():
+    collections = [col.name for col in qdrant_client.get_collections().collections]
+    if COLLECTION_NAME not in collections:
+        qdrant_client.create_collection(
+            collection_name=COLLECTION_NAME,
+            vectors_config=VectorParams(size=128, distance=Distance.COSINE),
+        )
+    # Create payload indexes for filtering
+    for field in ["session_id", "email"]:
+        try:
+            qdrant_client.create_payload_index(
+                collection_name=COLLECTION_NAME,
+                field_name=field,
+                field_schema=PayloadSchemaType.KEYWORD,
+            )
+        except Exception as e:
+            if "already exists" in str(e).lower():
+                pass
+            else:
+                print(f"Error creating index for {field}: {e}")
+def store_candidate(candidate_dict):
+    dummy_vector = [float(hash(candidate_dict.get("full_name", "")) % 1) for _ in range(128)]
+    qdrant_client.upsert(
+        collection_name=COLLECTION_NAME,
+        points=[
+            {
+                "id": hash(candidate_dict.get("email", "")) % (10 ** 8),
+                "payload": candidate_dict,
+                "vector": dummy_vector
+            }
+        ]
+    )
+def delete_session_data(session_id: str) -> bool:
+    from qdrant_client.models import Filter, FieldCondition, MatchValue, FilterSelector
+    session_filter_condition = Filter(
+        must=[
+            FieldCondition(
+                key="session_id",
+                match=MatchValue(value=session_id)
+            )
+        ]
+    )
+    try:
+        qdrant_client.delete(
+            collection_name=COLLECTION_NAME,
+            points_selector=FilterSelector(filter=session_filter_condition)
+        )
+        print(f"Deleted all points with session_id={session_id} from {COLLECTION_NAME}.")
+        return True
+    except Exception as e:
+        print(f"Error deleting session data for session_id={session_id}: {e}")
+        return False

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+#requirements.txt
+fastapi
+uvicorn
+qdrant-client
+pydantic
+python-dotenv
+pydantic[email]
+openai
+PyMuPDF
+python-multipart
+textblob

backend/sentiment_utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from textblob import TextBlob
+def analyze_sentiment(text):
+    blob = TextBlob(text)
+    polarity = blob.sentiment.polarity
+    if polarity > 0.25:
+        return "positive"
+    elif polarity < -0.25:
+        return "negative"
+    else:
+        return "neutral"

backend/session_utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .qdrant_client import delete_session_data
+def delete_session(session_id: str) -> bool:
+    print(f"Initiating deletion for session_id={session_id}")
+    success = delete_session_data(session_id)
+    if success:
+        print(f"Session data for {session_id} deleted successfully.")
+    else:
+        print(f"Failed to delete session data for {session_id}.")
+    return success

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+#requirements.txt
+fastapi
+uvicorn
+qdrant-client
+pydantic
+python-dotenv
+pydantic[email]
+openai
+PyMuPDF
+python-multipart
+textblob