Spaces:
Sleeping
Sleeping
| import json | |
| import yaml | |
| import logging | |
| from .llms import LLM | |
| from .resume_service import clean_json_response | |
| # Configure logging | |
| logger = logging.getLogger(__name__) | |
| def build_interview_scoring_prompt(jd: str, rubric: dict, categorized_qa_pairs: dict) -> str: | |
| qa_section = "" | |
| for category, pairs in categorized_qa_pairs.items(): | |
| if pairs: | |
| qa_section += f"\n### Category: {category}\n" | |
| for pair in pairs: | |
| qa_section += f"**ID: {pair['qa_id']}**\n" | |
| qa_section += f"**Question:** {pair['question']}\n" | |
| qa_section += f"**Answer:** {pair['answer']}\n\n" | |
| json_example = """ | |
| { | |
| "results": { | |
| "technical_skills": [ | |
| { | |
| "qa_id": 1, | |
| "analysis": { | |
| "score": 8, | |
| "reasoning": "Demonstrates experience with NLP tasks and relevant models." | |
| } | |
| }, | |
| { | |
| "qa_id": 3, | |
| "analysis": { | |
| "score": 8, | |
| "reasoning": "Identifies a key challenge in Vietnamese NLP and describes a practical approach." | |
| } | |
| } | |
| ], | |
| "problem_solving": [ | |
| { | |
| "qa_id": 3, | |
| "analysis": { | |
| "score": 7, | |
| "reasoning": "The solution is practical but could be more detailed." | |
| } | |
| } | |
| ] | |
| } | |
| } | |
| """ | |
| prompt_parts = [ | |
| "You are a world-class technical recruiter and interview analyst.", | |
| "Your task is to score a candidate's answers for an entire interview based on a job description (JD) and a scoring rubric.\n", | |
| "**1. Job Description (What we're looking for):**\n", | |
| jd, | |
| "\n\n**2. Scoring Rubric (How to score):**\n", | |
| yaml.dump(rubric, allow_unicode=True), | |
| "\n\n**3. Interview Transcript (Questions and Answers):**\n", | |
| qa_section, | |
| "\n\n**INSTRUCTIONS:**\n", | |
| "Analyze EACH question and answer pair from the transcript in the context of the JD and rubric.\n", | |
| "For EACH Q&A pair, provide a score for each category explicitly listed in the 'Scoring Rubric' section. Do NOT create new categories. The score must be an integer from 1 (poor) to 10 (excellent).\n", | |
| "Provide a concise reasoning for each score.\n", | |
| "\n**OUTPUT FORMAT (MUST be a single valid JSON object):**\n", | |
| "Provide a JSON object with a key 'results'. The value should be a dictionary where keys are the categories (e.g., 'technical_skills'). For each category, the value is a list of analyses. Each analysis item MUST contain the 'qa_id' and an 'analysis' object with 'score' and 'reasoning'.\n", | |
| "Example:\n```json\n", | |
| json_example, | |
| "\n```" | |
| ] | |
| return "".join(prompt_parts) | |
| def categorize_qa_pairs(qa_pairs: list, rubric: dict) -> dict: | |
| llm = LLM() | |
| categorized_pairs = {"general": []} | |
| for criterion in rubric.get('criteria', []): | |
| categorized_pairs[criterion['name']] = [] | |
| categorization_prompt = f""" | |
| You are an expert in interview analysis. Your task is to categorize each question and answer pair based on the provided scoring criteria. | |
| **Scoring Criteria:** | |
| {yaml.dump(rubric.get('criteria', []), allow_unicode=True)} | |
| **Instructions:** | |
| For each Q&A pair, identify the MOST relevant criterion from the 'Scoring Criteria' list. If a Q&A pair does not clearly fit any specific criterion, categorize it as 'general'. | |
| Return a JSON object where keys are the criterion names (or 'general') and values are lists of the original Q&A pairs that belong to that category. | |
| **Example Output Format:** | |
| {{ | |
| "Criterion 1 Name": [ | |
| {{"qa_id": 1, "question": "...", "answer": "..."}}, | |
| {{"qa_id": 2, "question": "...", "answer": "..."}} | |
| ], | |
| "Criterion 2 Name": [ | |
| {{"qa_id": 3, "question": "...", "answer": "..."}} | |
| ], | |
| "general": [ | |
| {{"qa_id": 4, "question": "...", "answer": "..."}} | |
| ] | |
| }} | |
| **Q&A Pairs to Categorize:** | |
| {yaml.dump(qa_pairs, allow_unicode=True)} | |
| """ | |
| try: | |
| response_text = llm.generate(categorization_prompt) | |
| parsed_categories = clean_json_response(response_text) | |
| if parsed_categories is None: | |
| print("Warning: Failed to parse categorization response. Returning all as general.") | |
| return {"general": qa_pairs} | |
| return parsed_categories | |
| except Exception as e: | |
| print(f"Error during Q&A categorization: {e}. Returning all as general.") | |
| return {"general": qa_pairs} | |
| def postprocess_transcript(raw_transcript: str) -> str: | |
| """ | |
| Uses the LLM class to clean up and structure a raw interview transcript. | |
| Args: | |
| raw_transcript: The raw transcript text from the STT service. | |
| Returns: | |
| A cleaned and structured version of the transcript, formatted as Q&A. | |
| """ | |
| try: | |
| logger.info("Starting transcript post-processing with LLM.") | |
| llm = LLM(model="gemini-2.0-flash", temperature=0.7) | |
| prompt = f"""Please process the following raw interview transcript. Your task is to: | |
| 1. Identify who is the 'Interviewer' and who is the 'Candidate'. The interviewer is the one asking questions and guiding the conversation. The candidate is the one answering. | |
| 2. Replace all occurrences of 'Speaker 0', 'Speaker 1', etc., with 'Interviewer:' or 'Candidate:'. | |
| 3. Correct any obvious spelling, grammatical, or transcription errors to make the text clear and readable. | |
| 4. Structure the entire conversation into a clear, turn-by-turn dialogue format. | |
| 5. Ensure the final output is only the cleaned, structured dialogue, without any extra commentary or explanation from you. | |
| Here is the raw transcript: | |
| --- | |
| {raw_transcript} | |
| --- | |
| Cleaned Transcript:""" | |
| cleaned_transcript = llm.generate(prompt) | |
| logger.info("Transcript post-processing completed successfully.") | |
| return cleaned_transcript | |
| except Exception as e: | |
| logger.error(f"Error during transcript post-processing: {e}") | |
| return raw_transcript # Return the original transcript if processing fails | |
| def score_interview(qa_pairs: list, job_description: str, rubric_content: str) -> dict: | |
| """ | |
| Scores a list of Q&A pairs using the Gemini API via LangChain. | |
| """ | |
| llm = LLM() | |
| try: | |
| rubric = yaml.safe_load(rubric_content) | |
| except yaml.YAMLError as e: | |
| raise ValueError(f"Invalid YAML format in rubric: {e}") | |
| print("Categorizing Q&A pairs...") | |
| categorized_qa_pairs = categorize_qa_pairs(qa_pairs, rubric) | |
| print("Scoring categorized Q&A pairs...") | |
| prompt = build_interview_scoring_prompt(job_description, rubric, categorized_qa_pairs) | |
| try: | |
| response_text = llm.generate(prompt) | |
| print("="*50) | |
| print("RAW GEMINI RESPONSE:") | |
| print(response_text) | |
| print("="*50) | |
| # Clean the response to ensure it's valid JSON | |
| # Remove markdown backticks for JSON block | |
| cleaned_response = response_text.strip().replace('`', '').replace('json', '') | |
| if cleaned_response.startswith("```json"): | |
| cleaned_response = cleaned_response[7:] | |
| if cleaned_response.endswith("```"): | |
| cleaned_response = cleaned_response[:-3] | |
| try: | |
| batch_results = json.loads(cleaned_response) | |
| if 'results' in batch_results and isinstance(batch_results['results'], dict): | |
| # Create a lookup map by qa_id for efficient updates | |
| qa_lookup = {p['qa_id']: p for p in qa_pairs} | |
| for category, scored_items in batch_results['results'].items(): | |
| for item in scored_items: | |
| qa_id = item.get('qa_id') | |
| if qa_id in qa_lookup: | |
| target_pair = qa_lookup[qa_id] | |
| # Initialize 'analysis' if it doesn't exist | |
| if 'analysis' not in target_pair: | |
| target_pair['analysis'] = {'scores': []} | |
| # Extract score and reasoning from the simplified analysis | |
| analysis = item.get('analysis', {}) | |
| score = analysis.get('score') | |
| reasoning = analysis.get('reasoning') | |
| if score is not None: | |
| # Check if a score for this category already exists for this Q&A pair | |
| # This prevents duplicate entries if the LLM returns the same qa_id under multiple categories | |
| # but we only want one score per Q&A per category. | |
| # If a Q&A can genuinely have multiple scores for the *same* category, this logic needs adjustment. | |
| existing_categories = {s['category'] for s in target_pair['analysis']['scores']} | |
| if category not in existing_categories: | |
| # Append the new score, adding the category back in for internal processing | |
| target_pair['analysis']['scores'].append({ | |
| 'category': category, | |
| 'score': score, | |
| 'reasoning': reasoning | |
| }) | |
| # Set the primary category for frontend grouping. | |
| # The first category assigned wins, or based on a specific logic. | |
| if 'category' not in target_pair: | |
| target_pair['category'] = category | |
| # The original qa_pairs list is now updated via the lookup. | |
| else: | |
| # If mapping fails, add a generic error to all | |
| for pair in qa_pairs: | |
| pair['analysis'] = {"error": "Failed to parse batch response from AI."} | |
| print("Error: AI response did not contain a valid 'results' dictionary.") | |
| except json.JSONDecodeError as e: | |
| print(f"Error parsing batch Gemini response: {e}") | |
| print("--- Problematic JSON string ---") | |
| print(cleaned_response) | |
| print("---------------------------------") | |
| # If the whole batch fails, add an error to all pairs | |
| for pair in qa_pairs: | |
| pair['analysis'] = {"error": "Failed to get a valid score from AI for the batch."} | |
| except (AttributeError, ValueError, KeyError) as e: | |
| print(f"Error parsing batch Gemini response: {e}") | |
| # If the whole batch fails, add an error to all pairs | |
| for pair in qa_pairs: | |
| pair['analysis'] = {"error": "Failed to get a valid score from AI for the batch."} | |
| return {"scored_qa_pairs": qa_pairs} | |