Spaces:
Sleeping
Sleeping
| import re | |
| def extract_fallback(text: str) -> dict: | |
| """ | |
| A dumb Regex-based fallback extractor if Gemini fails. | |
| Extracts basic info like Email, Phone, Links, and keyword-matched Skills. | |
| """ | |
| # 1. Email (Basic) | |
| email_params = r"[\w\.-]+@[\w\.-]+\.\w+" | |
| email_match = re.search(email_params, text) | |
| email = email_match.group(0) if email_match else None | |
| # 2. Phone (Very Basic - catches 10-12 digit numbers) | |
| phone_match = re.search(r"(\+?\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}", text) | |
| phone = phone_match.group(0) if phone_match else None | |
| # 3. Links (LinkedIn / GitHub / Portfolio) | |
| links = re.findall(r"https?://[^\s]+", text) | |
| linkedin = next((l for l in links if "linkedin.com" in l), None) | |
| github = next((l for l in links if "github.com" in l), None) | |
| portfolio = next((l for l in links if l not in [linkedin, github]), None) | |
| # 4. Keyword Matching for Skills (Static List) | |
| COMMON_SKILLS = [ | |
| "Python", "Java", "JavaScript", "TypeScript", "C++", "C#", "SQL", "NoSQL", | |
| "React", "Angular", "Vue", "Node.js", "Django", "Flask", "FastAPI", | |
| "AWS", "Azure", "GCP", "Docker", "Kubernetes", "Git", "CI/CD", | |
| "Machine Learning", "Deep Learning", "NLP", "Pandas", "NumPy", "TensorFlow", "PyTorch" | |
| ] | |
| found_skills = [skill for skill in COMMON_SKILLS if re.search(r"\b" + re.escape(skill) + r"\b", text, re.IGNORECASE)] | |
| # 5. Construct Payload (Matches Schema) | |
| return { | |
| "headline": None, | |
| "summary": text[:500] + "..." if len(text) > 500 else text, # Fallback summary is just first 500 chars | |
| "skills": found_skills, | |
| "technical_skills": found_skills, # Duplicate for safety | |
| "education": [], | |
| "work_experience": [], | |
| "certifications": [], | |
| "languages": [], | |
| "experience_years": None, | |
| # Extra fields specific to Supabase Ingest (mapped later) | |
| # "email": email, # Backend doesn't use extracted email usually (uses auth), but good to have | |
| "phone": phone, | |
| "linkedin": linkedin, | |
| "github": github, | |
| "portfolio": portfolio | |
| } | |