Spaces:

akshit7093
/

SHL

Sleeping

File size: 4,807 Bytes

import os
import logging
import asyncio
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import List
from query_processing import process_query, vector_search, extract_attributes
from scraper import is_url, scrape_job_description

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler()
    ]
)

logger = logging.getLogger('shl-api')

# Initialize FastAPI app
app = FastAPI()

@app.middleware("http")
async def log_requests(request, call_next):
    logger.info(f"Incoming {request.method} request to {request.url}")
    response = await call_next(request)
    logger.info(f"Completed {request.method} request to {request.url} with status {response.status_code}")
    return response

# Pydantic models for request/response validation
class Query(BaseModel):
    query: str

class Assessment(BaseModel):
    url: str
    adaptive_support: str
    description: str
    duration: int
    remote_support: str
    test_type: List[str]

class RecommendationResponse(BaseModel):
    recommended_assessments: List[Assessment]

@app.get("/health")
async def health_check():
    """Health check endpoint to verify API is running."""
    return {"status": "healthy"}

@app.post("/recommend", response_model=RecommendationResponse)
async def recommend(query: Query):
    """Endpoint to get assessment recommendations based on job description or query."""
    try:
        query_text = query.query.strip()
        if not query_text:
            logger.error("Empty query received")
            raise HTTPException(status_code=400, detail="Query cannot be empty")
        
        logger.info(f"Processing query: {query_text[:100]}...")
        
        # Check if input is a URL and scrape if needed
        if is_url(query_text):
            try:
                logger.info(f"Scraping job description from URL: {query_text}")
                query_text = await asyncio.to_thread(scrape_job_description, query_text)
                if not query_text:
                    logger.error(f"Failed to extract job description from URL: {query_text}")
                    raise HTTPException(status_code=400, detail="Could not extract job description from URL")
                logger.info("Successfully scraped job description")
            except Exception as e:
                logging.error(f"Error scraping URL {query_text}: {str(e)}")
                raise HTTPException(status_code=400, detail="Failed to scrape job description from URL")
        
        # Process the query asynchronously
        query_embedding = await asyncio.to_thread(process_query, query_text)
        
        # Perform vector search asynchronously
        distances, indices = await asyncio.to_thread(vector_search, query_embedding)
        
        # Reshape indices and distances if needed
        if len(indices.shape) == 1:
            indices = indices.reshape(1, -1)
            distances = distances.reshape(1, -1)
            
        # Extract attributes from search results asynchronously
        raw_results = await asyncio.to_thread(extract_attributes, distances=distances, indices=indices)
        
        # Transform results to match required response format
        recommended_assessments = []
        for idx, result in enumerate(raw_results[:10]):  # Limit to 10 assessments
            # Calculate similarity score (normalized between 0 and 1)
            similarity_score = 1 / (1 + distances[0][idx]) if idx < len(distances[0]) else 0.0
            
            assessment = Assessment(
                url=result['URL'],
                description=result.get('description', '').strip(),
                duration=int(''.join(filter(str.isdigit, result.get('Duration', '60')))) if result.get('Duration') and any(c.isdigit() for c in result.get('Duration')) else 60,
                remote_support="Yes" if result.get('Remote_Testing', False) else "No",
                adaptive_support="Yes" if any("adaptive" in feature.lower() for feature in result.get('Key_Features', [])) else "No",
                test_type=[feature.strip() for feature in result.get('Key_Features', []) if feature.strip()]
            )
            recommended_assessments.append(assessment)
        
        return RecommendationResponse(recommended_assessments=recommended_assessments)
        
    except Exception as e:
        logging.error(f"Error in recommendation pipeline: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "api:app",
        host="0.0.0.0",
        port=8000,
        workers=1,
        log_level="info"
    )