Shami96 commited on
Commit
6c66af4
·
verified ·
1 Parent(s): f345bcf

Update extract_resume_content.py

Browse files
Files changed (1) hide show
  1. extract_resume_content.py +5 -15
extract_resume_content.py CHANGED
@@ -1,14 +1,11 @@
1
- import pdfplumber
2
  from docx import Document
 
3
  from bs4 import BeautifulSoup
4
- import os
5
- import re
6
 
7
  def extract_text_from_pdf(file):
8
  with pdfplumber.open(file) as pdf:
9
- return "\n".join(
10
- page.extract_text() for page in pdf.pages if page.extract_text()
11
- )
12
 
13
  def extract_text_from_docx(file):
14
  doc = Document(file)
@@ -34,20 +31,13 @@ def extract_resume_text(file, file_type):
34
  return ""
35
 
36
  def extract_fields_from_text(text):
37
- # Extract email
38
  email_match = re.search(r'[\w\.-]+@[\w\.-]+', text)
39
- email = email_match.group() if email_match else "Not found"
40
-
41
- # Extract phone number
42
  phone_match = re.search(r'(\+?\d{1,3})?[\s\-]?\(?\d{2,4}\)?[\s\-]?\d{3,4}[\s\-]?\d{3,4}', text)
43
- phone = phone_match.group() if phone_match else "Not found"
44
-
45
- # Try to find a name (optional heuristic - first line of text)
46
  lines = text.strip().split("\n")
47
  name = lines[0] if lines and len(lines[0].split()) <= 4 else "Not found"
48
 
49
  return {
50
  "name": name,
51
- "email": email,
52
- "phone": phone,
53
  }
 
1
+ import re
2
  from docx import Document
3
+ import pdfplumber
4
  from bs4 import BeautifulSoup
 
 
5
 
6
  def extract_text_from_pdf(file):
7
  with pdfplumber.open(file) as pdf:
8
+ return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
 
 
9
 
10
  def extract_text_from_docx(file):
11
  doc = Document(file)
 
31
  return ""
32
 
33
  def extract_fields_from_text(text):
 
34
  email_match = re.search(r'[\w\.-]+@[\w\.-]+', text)
 
 
 
35
  phone_match = re.search(r'(\+?\d{1,3})?[\s\-]?\(?\d{2,4}\)?[\s\-]?\d{3,4}[\s\-]?\d{3,4}', text)
 
 
 
36
  lines = text.strip().split("\n")
37
  name = lines[0] if lines and len(lines[0].split()) <= 4 else "Not found"
38
 
39
  return {
40
  "name": name,
41
+ "email": email_match.group() if email_match else "Not found",
42
+ "phone": phone_match.group() if phone_match else "Not found"
43
  }