Spaces:
Build error
Build error
| import { convertDocToPdf } from "./document-converter" | |
| import { convertPdfToImages } from "./pdf-to-image" | |
| import { extractTextFromFile } from "./simple-text-parser" | |
| import { performOcr } from "./ocr-service" | |
| import { parseResumeWithLLM } from "./llm-parser" | |
| import type { Resume, Experience } from "@/types/resume" | |
| import { existsSync } from "fs" | |
| import { v4 as uuidv4 } from "uuid" | |
| interface FileData { | |
| id: string | |
| originalName: string | |
| filePath: string | |
| extension: string | |
| status: string | |
| uploadedAt: string | |
| } | |
| // Function to calculate total years of experience | |
| export function calculateTotalExperience(experiences: Experience[]): string { | |
| if (!experiences || experiences.length === 0) { | |
| console.log("No experiences provided, returning 0 years"); | |
| return "0 years"; | |
| } | |
| console.log(`Processing ${experiences.length} experience entries:`); | |
| experiences.forEach((exp, i) => { | |
| console.log(` [${i+1}] ${exp.title || 'No title'} at ${exp.company || 'No company'}: "${exp.duration || 'No duration'}"`); | |
| }); | |
| let totalMonths = 0; | |
| let hasValidDurations = false; | |
| let attemptedParsing = false; | |
| // Process each experience entry | |
| for (const exp of experiences) { | |
| if (!exp.duration) { | |
| console.log(`Skipping experience with no duration: ${exp.title || 'Unknown position'}`); | |
| continue; | |
| } | |
| console.log(`Processing: "${exp.duration}" for ${exp.title || 'Unknown position'}`); | |
| attemptedParsing = true; | |
| // Clean the duration string - remove commas, multiple spaces, and standardize hyphens | |
| const cleanDuration = exp.duration.replace(/,/g, ' ') | |
| .replace(/\s+/g, ' ') | |
| .replace(/–|—/g, '-') | |
| .trim(); | |
| // Try multiple approaches to parse the duration | |
| // Approach 1: Look for explicit year/month mentions (e.g., "2 years 5 months") | |
| const yearPattern = /(\d+\.?\d*)\s*(?:year|yr|y)s?/i; | |
| const monthPattern = /(\d+\.?\d*)\s*(?:month|mon|m)s?/i; | |
| const yearMatch = cleanDuration.match(yearPattern); | |
| const monthMatch = cleanDuration.match(monthPattern); | |
| if (yearMatch || monthMatch) { | |
| console.log(` Found explicit year/month format`); | |
| if (yearMatch) { | |
| const years = parseFloat(yearMatch[1]); | |
| totalMonths += years * 12; | |
| console.log(` Added ${years} years (${years * 12} months)`); | |
| } | |
| if (monthMatch) { | |
| const months = parseFloat(monthMatch[1]); | |
| totalMonths += months; | |
| console.log(` Added ${months} months`); | |
| } | |
| hasValidDurations = true; | |
| continue; | |
| } | |
| // Approach 2: Date ranges with various formats | |
| try { | |
| // Format: "Month Year - Month Year" or with "to" instead of "-" | |
| const dateRangePattern = /([a-z]+\s+\d{4})\s*[-to]+\s*([a-z]+\s+\d{4}|present|current|now)/i; | |
| const dateRangeMatch = cleanDuration.match(dateRangePattern); | |
| // Format: "Month Year - Present" (special case) | |
| const currentDatePattern = /([a-z]+\s+\d{4})\s*[-to]+\s*(present|current|now)/i; | |
| const currentDateMatch = cleanDuration.match(currentDatePattern); | |
| // Format: "Year - Year" or "Year to Present" | |
| const yearRangePattern = /(\d{4})\s*[-to]+\s*(\d{4}|present|current|now)/i; | |
| const yearRangeMatch = cleanDuration.match(yearRangePattern); | |
| // Format: "MM/YYYY - MM/YYYY" or "MM-YYYY - MM-YYYY" | |
| const numericDatePattern = /(\d{1,2})[\/\-](\d{4})\s*[-to]+\s*(\d{1,2})[\/\-](\d{4}|present|current|now)/i; | |
| const numericDateMatch = cleanDuration.match(numericDatePattern); | |
| // Format: "Month Year" (single date, assume until present) | |
| const singleDatePattern = /^([a-z]+\s+\d{4})$/i; | |
| const singleDateMatch = cleanDuration.match(singleDatePattern); | |
| // Format: "Year" (single year, assume until present) | |
| const singleYearPattern = /^(\d{4})$/i; | |
| const singleYearMatch = cleanDuration.match(singleYearPattern); | |
| if (dateRangeMatch || currentDateMatch) { | |
| // Extract dates from "Month Year - Month Year" format | |
| const match = dateRangeMatch || currentDateMatch; | |
| console.log(` Matched date range: "${match?.[1]}" to "${match?.[2]}"`); | |
| if (match) { | |
| const startDate = parseDate(match[1]); | |
| let endDate: Date | null = null; | |
| if (/present|current|now/i.test(match[2])) { | |
| endDate = new Date(); // Current date | |
| console.log(` End date is present/current: ${endDate.toISOString().slice(0,10)}`); | |
| } else { | |
| endDate = parseDate(match[2]); | |
| } | |
| if (startDate && endDate) { | |
| // Calculate difference in months | |
| const monthCount = calculateMonthsBetweenDates(startDate, endDate); | |
| if (monthCount >= 0) { | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Added ${monthCount} months (from ${startDate.toISOString().slice(0,10)} to ${endDate.toISOString().slice(0,10)})`); | |
| } else { | |
| console.log(` Invalid negative duration: ${monthCount} months`); | |
| } | |
| } | |
| } | |
| } | |
| else if (yearRangeMatch) { | |
| // Extract years from "Year - Year" format | |
| console.log(` Matched year range: "${yearRangeMatch[1]}" to "${yearRangeMatch[2]}"`); | |
| const startYear = parseInt(yearRangeMatch[1]); | |
| let endYear: number; | |
| if (/present|current|now/i.test(yearRangeMatch[2])) { | |
| endYear = new Date().getFullYear(); | |
| console.log(` End year is current year: ${endYear}`); | |
| } else { | |
| endYear = parseInt(yearRangeMatch[2]); | |
| } | |
| if (!isNaN(startYear) && !isNaN(endYear) && endYear >= startYear) { | |
| const yearDiff = endYear - startYear; | |
| const monthCount = yearDiff * 12; | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Added ${monthCount} months (${yearDiff} years) from ${startYear} to ${endYear}`); | |
| } else { | |
| console.log(` Invalid years or range: ${startYear} to ${endYear}`); | |
| } | |
| } | |
| else if (numericDateMatch) { | |
| // Extract dates from "MM/YYYY - MM/YYYY" format | |
| console.log(` Matched numeric date range: ${numericDateMatch[0]}`); | |
| const startMonth = parseInt(numericDateMatch[1]) - 1; // Months are 0-indexed in JS | |
| const startYear = parseInt(numericDateMatch[2]); | |
| let endMonth: number; | |
| let endYear: number; | |
| if (/present|current|now/i.test(numericDateMatch[4])) { | |
| const now = new Date(); | |
| endMonth = now.getMonth(); | |
| endYear = now.getFullYear(); | |
| } else { | |
| endMonth = parseInt(numericDateMatch[3]) - 1; | |
| endYear = parseInt(numericDateMatch[4]); | |
| } | |
| if (!isNaN(startMonth) && !isNaN(startYear) && !isNaN(endMonth) && !isNaN(endYear)) { | |
| const startDate = new Date(startYear, startMonth, 1); | |
| const endDate = new Date(endYear, endMonth, 1); | |
| if (endDate >= startDate) { | |
| const monthCount = calculateMonthsBetweenDates(startDate, endDate); | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Added ${monthCount} months from ${startDate.toISOString().slice(0,10)} to ${endDate.toISOString().slice(0,10)}`); | |
| } else { | |
| console.log(` Invalid date range: end date before start date`); | |
| } | |
| } else { | |
| console.log(` Invalid numeric date components: ${startMonth}/${startYear} - ${endMonth}/${endYear}`); | |
| } | |
| } | |
| else if (singleDateMatch) { | |
| // Process a single date (e.g., "January 2020") - assume it's ongoing | |
| console.log(` Matched single date: "${singleDateMatch[1]}"`); | |
| const startDate = parseDate(singleDateMatch[1]); | |
| if (startDate) { | |
| const endDate = new Date(); // Current date | |
| const monthCount = calculateMonthsBetweenDates(startDate, endDate); | |
| if (monthCount >= 0) { | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Added ${monthCount} months (from ${startDate.toISOString().slice(0,10)} to present)`); | |
| } | |
| } | |
| } | |
| else if (singleYearMatch) { | |
| // Process a single year (e.g., "2020") - assume it's ongoing | |
| console.log(` Matched single year: "${singleYearMatch[1]}"`); | |
| const startYear = parseInt(singleYearMatch[1]); | |
| const currentYear = new Date().getFullYear(); | |
| if (!isNaN(startYear) && startYear <= currentYear) { | |
| // Assume it's been from January of that year until now | |
| const startDate = new Date(startYear, 0, 1); // January 1st | |
| const endDate = new Date(); // Current date | |
| const monthCount = calculateMonthsBetweenDates(startDate, endDate); | |
| if (monthCount >= 0) { | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Added ${monthCount} months (from ${startDate.toISOString().slice(0,10)} to present)`); | |
| } | |
| } | |
| } | |
| else { | |
| // Approach 3: Try to extract any years from the text as a last resort | |
| const yearsInText = cleanDuration.match(/\b(19|20)\d{2}\b/g); | |
| if (yearsInText && yearsInText.length >= 2) { | |
| console.log(` Found years in text: ${yearsInText.join(', ')}`); | |
| // Take first and last years | |
| const startYear = parseInt(yearsInText[0]); | |
| const endYear = parseInt(yearsInText[yearsInText.length - 1]); | |
| if (!isNaN(startYear) && !isNaN(endYear) && endYear >= startYear) { | |
| const yearDiff = endYear - startYear; | |
| const monthCount = yearDiff * 12; | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Added ${monthCount} months (${yearDiff} years) from extracted years ${startYear}-${endYear}`); | |
| } | |
| } | |
| // Approach 4: Check if it's just a single number (years of experience) | |
| else if (/^[\d\.]+$/.test(cleanDuration.trim())) { | |
| const years = parseFloat(cleanDuration.trim()); | |
| if (!isNaN(years) && years > 0) { | |
| const monthCount = Math.round(years * 12); | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Found numerical value: ${years} years (${monthCount} months)`); | |
| } | |
| } | |
| // Approach 5: Check for duration expressions like "X+ years", "over X years", etc. | |
| else { | |
| const durationExprMatch = cleanDuration.match(/(?:over|more than|about|approximately|around|nearly|almost|(\d+)\+)\s*(\d+\.?\d*)\s*(?:year|yr|y)s?/i); | |
| if (durationExprMatch) { | |
| const years = parseFloat(durationExprMatch[2]); | |
| if (!isNaN(years) && years > 0) { | |
| const monthCount = Math.round(years * 12); | |
| totalMonths += monthCount; | |
| hasValidDurations = true; | |
| console.log(` Found duration expression: at least ${years} years (${monthCount} months)`); | |
| } | |
| } else { | |
| console.log(` Could not parse duration format: "${cleanDuration}"`); | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.error(` Error processing duration "${exp.duration}":`, error); | |
| } | |
| } | |
| if (!hasValidDurations) { | |
| // If we couldn't parse any durations but have experience entries and attempted parsing | |
| if (experiences.length > 0 && attemptedParsing) { | |
| // Assume average job length of 2 years per position | |
| const estimatedMonths = experiences.length * 24; | |
| console.log(`No valid durations found, estimating based on ${experiences.length} positions (${estimatedMonths} months)`); | |
| totalMonths = estimatedMonths; | |
| } else { | |
| console.log(`No valid durations found, returning "Unknown"`); | |
| return "Unknown"; | |
| } | |
| } | |
| // Calculate years and months from total months | |
| const years = Math.floor(totalMonths / 12); | |
| const months = totalMonths % 12; | |
| // Format the output | |
| let result: string; | |
| if (years === 0) { | |
| result = `${months} month${months !== 1 ? 's' : ''}`; | |
| } else if (months === 0) { | |
| result = `${years} year${years !== 1 ? 's' : ''}`; | |
| } else { | |
| result = `${years} year${years !== 1 ? 's' : ''} ${months} month${months !== 1 ? 's' : ''}`; | |
| } | |
| console.log(`Total experience calculated: ${result} (${totalMonths} months)`); | |
| return result; | |
| } | |
| // Helper function to calculate months between two dates | |
| function calculateMonthsBetweenDates(start: Date, end: Date): number { | |
| const yearDiff = end.getFullYear() - start.getFullYear(); | |
| const monthDiff = end.getMonth() - start.getMonth(); | |
| return (yearDiff * 12) + monthDiff; | |
| } | |
| // Helper function to parse a date string | |
| function parseDate(dateStr: string): Date | null { | |
| const parts = dateStr.trim().split(/\s+/); | |
| if (parts.length >= 2) { | |
| // Format: "Month Year" | |
| const monthName = parts[0].toLowerCase(); | |
| const year = parseInt(parts[parts.length - 1]); | |
| if (!isNaN(year)) { | |
| const monthIndex = getMonthIndex(monthName); | |
| if (monthIndex !== -1) { | |
| return new Date(year, monthIndex, 1); | |
| } | |
| } | |
| } | |
| // Try alternate formats if month-year parsing failed | |
| // Try MM/YYYY or MM-YYYY format | |
| const numericDateMatch = dateStr.match(/(\d{1,2})[\/\-](\d{4})/); | |
| if (numericDateMatch) { | |
| const month = parseInt(numericDateMatch[1]) - 1; // 0-indexed months | |
| const year = parseInt(numericDateMatch[2]); | |
| if (!isNaN(month) && !isNaN(year) && month >= 0 && month < 12) { | |
| return new Date(year, month, 1); | |
| } | |
| } | |
| console.warn(` Failed to parse date string: "${dateStr}"`); | |
| return null; | |
| } | |
| // Helper function to convert month name to index | |
| function getMonthIndex(monthName: string): number { | |
| const monthMap: {[key: string]: number} = { | |
| 'january': 0, 'jan': 0, | |
| 'february': 1, 'feb': 1, | |
| 'march': 2, 'mar': 2, | |
| 'april': 3, 'apr': 3, | |
| 'may': 4, | |
| 'june': 5, 'jun': 5, | |
| 'july': 6, 'jul': 6, | |
| 'august': 7, 'aug': 7, | |
| 'september': 8, 'sep': 8, 'sept': 8, | |
| 'october': 9, 'oct': 9, | |
| 'november': 10, 'nov': 10, | |
| 'december': 11, 'dec': 11 | |
| }; | |
| // First try direct lookup | |
| if (monthName.toLowerCase() in monthMap) { | |
| return monthMap[monthName.toLowerCase()]; | |
| } | |
| // Try with first 3 characters | |
| const prefix = monthName.substring(0, 3).toLowerCase(); | |
| if (prefix in monthMap) { | |
| return monthMap[prefix]; | |
| } | |
| return -1; | |
| } | |
| export async function parseResume(fileData: FileData): Promise<Resume> { | |
| console.log("=== Starting resume parsing ===") | |
| try { | |
| console.log("1. Starting resume parsing for file:", fileData.id) | |
| // Verify file exists | |
| console.log("2. Checking if file exists:", fileData.filePath) | |
| if (!existsSync(fileData.filePath)) { | |
| console.error("File does not exist:", fileData.filePath) | |
| return createBasicResume(fileData, `File does not exist: ${fileData.filePath}. Cannot process this resume.`) | |
| } | |
| console.log("3. File exists") | |
| // Step 1: Convert DOC/DOCX to PDF if needed | |
| let pdfPath = fileData.filePath | |
| try { | |
| if (fileData.extension === "doc" || fileData.extension === "docx") { | |
| console.log("4. Converting DOC/DOCX to PDF...") | |
| pdfPath = await convertDocToPdf(fileData.filePath) | |
| console.log("5. Conversion completed:", pdfPath) | |
| } else { | |
| console.log("4. File is already PDF, no conversion needed") | |
| } | |
| } catch (conversionError) { | |
| console.error("Error converting document to PDF:", conversionError) | |
| pdfPath = fileData.filePath | |
| } | |
| // Step 2: Extract text directly from PDF or file | |
| console.log("6. Extracting text from file...") | |
| let extractedText = "" | |
| try { | |
| extractedText = await extractTextFromFile(pdfPath) | |
| console.log("7. Extracted text length:", extractedText.length) | |
| } catch (extractionError) { | |
| console.error("Error extracting text:", extractionError) | |
| extractedText = `Text extraction failed: ${extractionError instanceof Error ? extractionError.message : "Unknown error"}` | |
| } | |
| // Add filename and metadata to extracted text to provide context | |
| let contextInfo = `\n\nFile Information:\nFilename: ${fileData.originalName}\nFile type: ${fileData.extension}\nUploaded: ${fileData.uploadedAt}\n`; | |
| // If text extraction failed or returned very little text, use a placeholder | |
| if (!extractedText || extractedText.length < 100) { | |
| console.log("8. Text extraction insufficient, using file metadata only") | |
| extractedText = `This appears to be a ${fileData.extension.toUpperCase()} document that couldn't be fully parsed.` + contextInfo; | |
| } else { | |
| console.log("8. Text extraction successful, adding file metadata") | |
| extractedText += contextInfo; | |
| } | |
| // Step 5: Parse resume with LLM | |
| console.log("9. Parsing resume with LLM...") | |
| let parsedData = null | |
| try { | |
| parsedData = await parseResumeWithLLM(extractedText) | |
| console.log("10. LLM parsing completed") | |
| } catch (llmError) { | |
| console.error("Error parsing with LLM:", llmError) | |
| // Create minimal parsed data with the file name as the person's name | |
| parsedData = { | |
| name: getNameFromFilename(fileData.originalName), | |
| email: "", | |
| phone: "", | |
| location: "", | |
| title: "", | |
| summary: "Resume parsing failed, but the document was saved. Please try re-uploading or processing manually.", | |
| skills: [], | |
| experience: [], | |
| education: [], | |
| educationDetails: [], | |
| certifications: [], | |
| languages: [], | |
| experienceLevel: "Not specified" | |
| } | |
| } | |
| // Calculate total experience | |
| const totalExperience = calculateTotalExperience(parsedData.experience || []); | |
| console.log("Total experience calculated:", totalExperience); | |
| // Return the parsed resume | |
| console.log("11. Returning parsed resume") | |
| return { | |
| id: fileData.id, | |
| originalName: fileData.originalName, | |
| filePath: fileData.filePath, | |
| pdfPath: pdfPath !== fileData.filePath ? pdfPath : null, | |
| extractedText, | |
| name: parsedData.name || fileData.originalName, | |
| email: parsedData.email || "", | |
| phone: parsedData.phone || "", | |
| location: parsedData.location || "", | |
| title: parsedData.title || "", | |
| summary: parsedData.summary || "", | |
| skills: parsedData.skills || [], | |
| experience: parsedData.experience || [], | |
| education: parsedData.education || [], | |
| educationDetails: parsedData.educationDetails || [], | |
| certifications: parsedData.certifications || [], | |
| languages: parsedData.languages || [], | |
| experienceLevel: parsedData.experienceLevel || "Not specified", | |
| totalExperience, | |
| status: "Processed", | |
| matchScore: 0, | |
| matchedSkills: [], | |
| missingSkills: [], | |
| experienceMatch: 0, | |
| educationMatch: 0, | |
| overallAssessment: "", | |
| recommendations: [], | |
| uploadedAt: fileData.uploadedAt, | |
| processingStartedAt: new Date().toISOString(), | |
| processingCompletedAt: new Date().toISOString() | |
| } | |
| } catch (error) { | |
| console.error("Error parsing resume:", error) | |
| return createBasicResume(fileData, `Failed to parse resume: ${error instanceof Error ? error.message : "Unknown error"}`) | |
| } | |
| } | |
| // Helper function to create a basic resume object when parsing fails | |
| function createBasicResume(fileData: FileData, errorMessage: string): Resume { | |
| return { | |
| id: fileData.id, | |
| originalName: fileData.originalName, | |
| filePath: fileData.filePath, | |
| pdfPath: null, | |
| extractedText: errorMessage, | |
| name: getNameFromFilename(fileData.originalName), | |
| email: "", | |
| phone: "", | |
| location: "", | |
| title: "", | |
| summary: "Error during resume processing. " + errorMessage, | |
| skills: [], | |
| experience: [], | |
| education: [], | |
| educationDetails: [], | |
| certifications: [], | |
| languages: [], | |
| experienceLevel: "Unknown", | |
| totalExperience: "Unknown", | |
| status: "Error", | |
| matchScore: 0, | |
| matchedSkills: [], | |
| missingSkills: [], | |
| experienceMatch: 0, | |
| educationMatch: 0, | |
| overallAssessment: "", | |
| recommendations: [], | |
| uploadedAt: fileData.uploadedAt, | |
| processingStartedAt: new Date().toISOString(), | |
| processingCompletedAt: new Date().toISOString() | |
| } | |
| } | |
| // Helper function to extract a name from a filename | |
| function getNameFromFilename(filename: string): string { | |
| // Remove extension | |
| const nameWithoutExt = filename.replace(/\.[^/.]+$/, "") | |
| // Replace underscores and hyphens with spaces | |
| const nameWithSpaces = nameWithoutExt.replace(/[_-]/g, " ") | |
| // Capitalize first letter of each word | |
| return nameWithSpaces | |
| .split(" ") | |
| .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) | |
| .join(" ") | |
| } |