Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import json | |
| import google.generativeai as genai | |
| # Configure the Gemini API | |
| genai.configure(api_key=os.environ.get("GEMINI_API_KEY", "")) | |
| model = genai.GenerativeModel("models/gemini-2.0-flash") | |
| def rerank(query, candidates): | |
| """ | |
| Rerank the candidate assessments using Gemini. | |
| Args: | |
| query: The job description | |
| candidates: List of assessment dictionaries | |
| Returns: | |
| Dictionary containing the recommended assessments | |
| """ | |
| # Ensure we have candidates | |
| if not candidates: | |
| return {"error": "No candidate assessments to rerank"} | |
| # Print debugging info | |
| print(f"Reranking {len(candidates)} candidates") | |
| print(f"Sample candidate: {json.dumps(candidates[0], indent=2)}") | |
| # Clean up candidates data for API | |
| cleaned_candidates = [] | |
| for candidate in candidates: | |
| # Create a clean copy | |
| clean_candidate = {} | |
| # Copy required fields | |
| for field in ["url", "adaptive_support", "remote_support", "description", "duration", "test_type"]: | |
| if field in candidate: | |
| # Special handling for test_type | |
| if field == "test_type" and isinstance(candidate[field], list): | |
| clean_candidate[field] = candidate[field] | |
| else: | |
| clean_candidate[field] = candidate[field] | |
| else: | |
| # Default values for missing fields | |
| if field == "test_type": | |
| clean_candidate[field] = ["Unknown"] | |
| elif field == "duration": | |
| clean_candidate[field] = None | |
| else: | |
| clean_candidate[field] = "Unknown" | |
| cleaned_candidates.append(clean_candidate) | |
| # Create the prompt for Gemini | |
| prompt = f""" | |
| Given a job description, rank the most relevant SHL assessments based on how well they match the job requirements. | |
| Job description: "{query}" | |
| Candidate SHL assessments: {json.dumps(cleaned_candidates, indent=2)} | |
| Rank the most relevant assessments and return a JSON list in this format: | |
| {{ | |
| "recommended_assessments": [ | |
| {{ | |
| "url": "...", | |
| "adaptive_support": "Yes/No", | |
| "remote_support": "Yes/No", | |
| "description": "...", | |
| "duration": integer or null, | |
| "test_type": ["type1", "type2", ...] | |
| }} | |
| ] | |
| }} | |
| CRITICAL INSTRUCTIONS: | |
| 1. Return ONLY valid JSON without any markdown code blocks or extra text | |
| 2. Preserve the exact URL values from the input - do not modify them | |
| 3. Include all fields from the original assessment data | |
| 4. Limit to the top 10 most relevant assessments | |
| 5. Ensure the JSON is properly formatted with all fields | |
| 6. Keep all test_type values as arrays/lists, even if there's only one type | |
| """ | |
| # Generate response | |
| try: | |
| response = model.generate_content(prompt) | |
| response_text = response.text | |
| # Try to extract JSON from possible markdown code blocks | |
| json_match = re.search(r'```(?:json)?\s*(.*?)```', response_text, re.DOTALL) | |
| if json_match: | |
| response_text = json_match.group(1).strip() | |
| # Parse the JSON | |
| result = json.loads(response_text) | |
| # Validate the response structure | |
| if "recommended_assessments" not in result: | |
| return {"error": "Invalid response format: missing recommended_assessments key"} | |
| # Ensure each assessment has the required fields | |
| for assessment in result["recommended_assessments"]: | |
| if "url" not in assessment: | |
| assessment["url"] = "https://www.shl.com/missing-url" | |
| if "test_type" not in assessment: | |
| assessment["test_type"] = ["Unknown"] | |
| if not isinstance(assessment["test_type"], list): | |
| assessment["test_type"] = [assessment["test_type"]] | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error in reranking: {str(e)}" | |
| print(error_msg) | |
| return {"error": error_msg} |