Spaces:
Sleeping
Sleeping
File size: 2,165 Bytes
ea9ca44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | import re
def extract_fallback(text: str) -> dict:
"""
A dumb Regex-based fallback extractor if Gemini fails.
Extracts basic info like Email, Phone, Links, and keyword-matched Skills.
"""
# 1. Email (Basic)
email_params = r"[\w\.-]+@[\w\.-]+\.\w+"
email_match = re.search(email_params, text)
email = email_match.group(0) if email_match else None
# 2. Phone (Very Basic - catches 10-12 digit numbers)
phone_match = re.search(r"(\+?\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}", text)
phone = phone_match.group(0) if phone_match else None
# 3. Links (LinkedIn / GitHub / Portfolio)
links = re.findall(r"https?://[^\s]+", text)
linkedin = next((l for l in links if "linkedin.com" in l), None)
github = next((l for l in links if "github.com" in l), None)
portfolio = next((l for l in links if l not in [linkedin, github]), None)
# 4. Keyword Matching for Skills (Static List)
COMMON_SKILLS = [
"Python", "Java", "JavaScript", "TypeScript", "C++", "C#", "SQL", "NoSQL",
"React", "Angular", "Vue", "Node.js", "Django", "Flask", "FastAPI",
"AWS", "Azure", "GCP", "Docker", "Kubernetes", "Git", "CI/CD",
"Machine Learning", "Deep Learning", "NLP", "Pandas", "NumPy", "TensorFlow", "PyTorch"
]
found_skills = [skill for skill in COMMON_SKILLS if re.search(r"\b" + re.escape(skill) + r"\b", text, re.IGNORECASE)]
# 5. Construct Payload (Matches Schema)
return {
"headline": None,
"summary": text[:500] + "..." if len(text) > 500 else text, # Fallback summary is just first 500 chars
"skills": found_skills,
"technical_skills": found_skills, # Duplicate for safety
"education": [],
"work_experience": [],
"certifications": [],
"languages": [],
"experience_years": None,
# Extra fields specific to Supabase Ingest (mapped later)
# "email": email, # Backend doesn't use extracted email usually (uses auth), but good to have
"phone": phone,
"linkedin": linkedin,
"github": github,
"portfolio": portfolio
}
|