DevNumb commited on
Commit
d9d5b32
Β·
verified Β·
1 Parent(s): cc93d27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -10,15 +10,15 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
10
 
11
 
12
  # ---- Text extraction ----
13
- def extract_text(file):
14
- if file.name.endswith(".pdf"):
15
  text = ""
16
- with fitz.open(stream=file.read(), filetype="pdf") as doc:
17
  for page in doc:
18
  text += page.get_text("text") + "\n"
19
  return text
20
- elif file.name.endswith(".docx"):
21
- docf = docx.Document(file)
22
  return "\n".join(p.text for p in docf.paragraphs)
23
  return ""
24
 
@@ -39,7 +39,11 @@ def rank_cvs(job_description, files):
39
  scores, names = [], []
40
 
41
  for f in files:
42
- text = extract_text(f)
 
 
 
 
43
  if not text.strip():
44
  continue
45
  cv_emb = get_embedding(text[:4000]) # limit text length
@@ -47,7 +51,7 @@ def rank_cvs(job_description, files):
47
  np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
48
  )
49
  scores.append(sim)
50
- names.append(f.name)
51
 
52
  top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
53
  return "\n\n".join(
@@ -60,8 +64,7 @@ demo = gr.Interface(
60
  fn=rank_cvs,
61
  inputs=[
62
  gr.Textbox(label="πŸ’Ό Job Description", lines=5),
63
- # Option 2: Get raw bytes
64
- gr.File(label="πŸ“ Upload CVs (PDF/DOCX)", file_count="multiple", type="binary"),
65
  ],
66
  outputs=gr.Markdown(),
67
  title="πŸ“„ AI CV Ranker (Local Model)",
 
10
 
11
 
12
  # ---- Text extraction ----
13
+ def extract_text(file_bytes, filename):
14
+ if filename.endswith(".pdf"):
15
  text = ""
16
+ with fitz.open(stream=file_bytes, filetype="pdf") as doc:
17
  for page in doc:
18
  text += page.get_text("text") + "\n"
19
  return text
20
+ elif filename.endswith(".docx"):
21
+ docf = docx.Document(file_bytes)
22
  return "\n".join(p.text for p in docf.paragraphs)
23
  return ""
24
 
 
39
  scores, names = [], []
40
 
41
  for f in files:
42
+ # Extract filename and file bytes
43
+ filename = f.name
44
+ file_bytes = f.read()
45
+
46
+ text = extract_text(file_bytes, filename)
47
  if not text.strip():
48
  continue
49
  cv_emb = get_embedding(text[:4000]) # limit text length
 
51
  np.linalg.norm(job_emb) * np.linalg.norm(cv_emb)
52
  )
53
  scores.append(sim)
54
+ names.append(filename)
55
 
56
  top = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)[:10]
57
  return "\n\n".join(
 
64
  fn=rank_cvs,
65
  inputs=[
66
  gr.Textbox(label="πŸ’Ό Job Description", lines=5),
67
+ gr.File(label="πŸ“ Upload CVs (PDF/DOCX)", file_count="multiple", type="file"),
 
68
  ],
69
  outputs=gr.Markdown(),
70
  title="πŸ“„ AI CV Ranker (Local Model)",