DevNumb commited on
Commit
265e52e
Β·
verified Β·
1 Parent(s): d9d5b32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -10,15 +10,15 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
10
 
11
 
12
  # ---- Text extraction ----
13
- def extract_text(file_bytes, filename):
14
  if filename.endswith(".pdf"):
15
  text = ""
16
- with fitz.open(stream=file_bytes, filetype="pdf") as doc:
17
  for page in doc:
18
  text += page.get_text("text") + "\n"
19
  return text
20
  elif filename.endswith(".docx"):
21
- docf = docx.Document(file_bytes)
22
  return "\n".join(p.text for p in docf.paragraphs)
23
  return ""
24
 
@@ -38,12 +38,11 @@ def rank_cvs(job_description, files):
38
  job_emb = get_embedding(job_description)
39
  scores, names = [], []
40
 
41
- for f in files:
42
- # Extract filename and file bytes
43
- filename = f.name
44
- file_bytes = f.read()
45
 
46
- text = extract_text(file_bytes, filename)
47
  if not text.strip():
48
  continue
49
  cv_emb = get_embedding(text[:4000]) # limit text length
@@ -64,7 +63,7 @@ demo = gr.Interface(
64
  fn=rank_cvs,
65
  inputs=[
66
  gr.Textbox(label="πŸ’Ό Job Description", lines=5),
67
- gr.File(label="πŸ“ Upload CVs (PDF/DOCX)", file_count="multiple", type="file"),
68
  ],
69
  outputs=gr.Markdown(),
70
  title="πŸ“„ AI CV Ranker (Local Model)",
 
10
 
11
 
12
  # ---- Text extraction ----
13
+ def extract_text(file_path, filename):
14
  if filename.endswith(".pdf"):
15
  text = ""
16
+ with fitz.open(file_path) as doc:
17
  for page in doc:
18
  text += page.get_text("text") + "\n"
19
  return text
20
  elif filename.endswith(".docx"):
21
+ docf = docx.Document(file_path)
22
  return "\n".join(p.text for p in docf.paragraphs)
23
  return ""
24
 
 
38
  job_emb = get_embedding(job_description)
39
  scores, names = [], []
40
 
41
+ for file_path in files:
42
+ # Get filename from file path
43
+ filename = os.path.basename(file_path)
 
44
 
45
+ text = extract_text(file_path, filename)
46
  if not text.strip():
47
  continue
48
  cv_emb = get_embedding(text[:4000]) # limit text length
 
63
  fn=rank_cvs,
64
  inputs=[
65
  gr.Textbox(label="πŸ’Ό Job Description", lines=5),
66
+ gr.File(label="πŸ“ Upload CVs (PDF/DOCX)", file_count="multiple", type="filepath"),
67
  ],
68
  outputs=gr.Markdown(),
69
  title="πŸ“„ AI CV Ranker (Local Model)",