Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

App Files Files Community

TKM03 commited on Jul 2, 2025

Commit

59bc749

verified ·

1 Parent(s): 1d2993e

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -84

app.py CHANGED Viewed

@@ -4,121 +4,138 @@ import shutil
 import PyPDF2
 import gradio as gr
 from transformers import pipeline
-from collections import defaultdict
-# Load job classification model
-text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
-# Expanded label map (you can update based on actual model labels returned)
 CATEGORY_MAP = {
-    "LABEL_0": "Information Technology / Software Engineering",
-    "LABEL_1": "Healthcare / Medical / Nursing",
-    "LABEL_2": "Finance / Accounting / Auditing",
-    "LABEL_3": "Engineering / Mechanical / Civil / Electrical",
-    "LABEL_4": "Education / Training / Teaching",
-    "LABEL_5": "Sales / Marketing / Business Development",
-    "LABEL_6": "Customer Service / Support",
-    "LABEL_7": "Human Resources / Recruitment / Talent Acquisition",
-    "LABEL_8": "Legal / Compliance",
-    "LABEL_9": "Administration / Clerical",
-    "LABEL_10": "Operations / Logistics / Supply Chain",
-    "LABEL_11": "Management / Strategy / Consulting",
-    "LABEL_12": "Science / Research / R&D",
-    "LABEL_13": "Design / UI-UX / Creative",
-    "LABEL_14": "Manufacturing / Production",
-    "LABEL_15": "Hospitality / Tourism / Travel",
-    "LABEL_16": "Construction / Architecture",
-    "LABEL_17": "Media / Communication / PR / Journalism",
-    "LABEL_18": "Procurement / Purchasing",
-    "LABEL_19": "Security / Safety",
-    "LABEL_20": "Real Estate / Property Management",
-    "LABEL_21": "Energy / Oil & Gas / Utilities",
-    "LABEL_22": "Agriculture / Environmental / Forestry",
-    "LABEL_23": "Transportation / Automotive / Aviation",
-    "LABEL_24": "Retail / Merchandising / E-commerce",
-    "LABEL_25": "Data Science / Machine Learning / AI",
-    "LABEL_26": "Product Management / Project Management",
-    "LABEL_27": "Quality Assurance / Control",
-    "LABEL_28": "Telecommunication / Network Engineering",
-    "LABEL_29": "Entrepreneurship / Startups / Freelancing",
-    "LABEL_30": "Other / Miscellaneous"
 }
-# Helper functions
-def clean_resume_text(text):
     text = re.sub(r'http\S+', ' ', text)
-    text = re.sub(r'#\S+', '', text)
-    text = re.sub(r'@\S+', ' ', text)
-    text = re.sub(r'[^\w\s]', ' ', text)
     text = re.sub(r'[^\x00-\x7f]', ' ', text)
-    return re.sub(r'\s+', ' ', text).strip()
-def extract_resume_text(file):
     try:
         reader = PyPDF2.PdfReader(file)
         text = ""
         for page in reader.pages:
-            page_text = page.extract_text()
-            if page_text:
-                text += page_text + " "
-        return text, None if text.strip() else "No text found in PDF"
     except Exception as e:
-        return None, f"Error reading PDF: {str(e)}"
 def classify_resumes(files):
-    categorized = defaultdict(list)
-    label_scores = {}
-    os.makedirs("classified_resumes", exist_ok=True)
     for file in files:
         file_name = os.path.basename(file.name)
-        resume_text, error = extract_resume_text(file)
         if error:
             continue
-        cleaned_text = clean_resume_text(resume_text)
-        result = text_classifier(cleaned_text[:512])[0]
-        label = result['label']
         score = round(result['score'], 4)
-        category = CATEGORY_MAP.get(label, label)
-        # Save to relevant folder
-        cat_folder = os.path.join("classified_resumes", category.replace(" ", "_"))
-        os.makedirs(cat_folder, exist_ok=True)
-        save_path = os.path.join(cat_folder, file_name)
-        with open(file.name, "rb") as f_in, open(save_path, "wb") as f_out:
             shutil.copyfileobj(f_in, f_out)
-        categorized[category].append(save_path)
-        label_scores[file_name] = {"Predicted Job Category": category, "Confidence Score": score}
-    return label_scores, categorized
-def show_category_files(selected_category):
-    category_path = os.path.join("classified_resumes", selected_category.replace(" ", "_"))
-    if not os.path.exists(category_path):
-        return []
-    return [os.path.join(category_path, f) for f in os.listdir(category_path) if f.endswith(".pdf")]
-# Gradio UI
-with gr.Blocks(title="🧠 Resume Screening & Categorization") as demo:
-    gr.Markdown("""## 📄 Resume Screening by Job Role/Industry
-Upload resumes below. The app classifies each into categories like IT, HR, Sales, etc. Then click on any category to view/download relevant resumes.""")
-    file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
-    classify_button = gr.Button("📊 Classify Resumes")
-    output_json = gr.JSON(label="Classification Summary")
-    category_dropdown = gr.Dropdown(label="Select Category to View Files", choices=sorted(list(CATEGORY_MAP.values())))
-    resume_file_list = gr.File(label="Filtered Resumes in Selected Category", file_count="multiple")
-    def update_dropdown_options(files):
-        _, cat_data = classify_resumes(files)
-        return sorted(list(cat_data.keys()))
-    classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
-    category_dropdown.change(fn=show_category_files, inputs=[category_dropdown], outputs=[resume_file_list])
 if __name__ == "__main__":
-    demo.launch()

 import PyPDF2
 import gradio as gr
 from transformers import pipeline
+# ------------------- Category Mapping -------------------
 CATEGORY_MAP = {
+    "C0": "Administration / Clerical",
+    "C1": "Agriculture / Environmental / Forestry",
+    "C2": "Information Technology / Software Engineering",
+    "C3": "Data Science / Machine Learning / AI",
+    "C4": "Finance / Accounting / Auditing",
+    "C5": "Human Resources / Recruitment / Talent Acquisition",
+    "C6": "Sales / Marketing / Business Development",
+    "C7": "Engineering / Mechanical / Civil / Electrical",
+    "C8": "Customer Service / Support",
+    "C9": "Design / UI-UX / Creative",
+    "C10": "Healthcare / Medical / Nursing",
+    "C11": "Education / Training / Teaching",
+    "C12": "Retail / Merchandising / E-commerce",
+    "C13": "Telecommunication / Network Engineering",
+    "C14": "Operations / Logistics / Supply Chain",
+    "C15": "Entrepreneurship / Startups / Freelancing",
+    "C16": "Product Management / Project Management",
+    "C17": "Legal / Compliance",
+    "C18": "Real Estate / Property Management",
+    "C19": "Transportation / Automotive / Aviation",
+    "C20": "Construction / Architecture",
+    "C21": "Energy / Oil & Gas / Utilities",
+    "C22": "Security / Safety",
+    "C23": "Procurement / Purchasing",
+    "C24": "Manufacturing / Production",
+    "C25": "Media / Communication / PR / Journalism",
+    "C26": "Science / Research / R&D",
+    "C27": "Quality Assurance / Control",
+    "C28": "Hospitality / Tourism / Travel",
+    "C29": "Management / Strategy / Consulting",
+    "C30": "Other / Miscellaneous"
 }
+# ------------------- Load Classification Model -------------------
+classifier = pipeline("text-classification", model="CleveGreen/JobClassifier_v2")
+# ------------------- Resume Utilities -------------------
+def clean_text(text):
     text = re.sub(r'http\S+', ' ', text)
     text = re.sub(r'[^\x00-\x7f]', ' ', text)
+    text = re.sub(r'[^\w\s]', ' ', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def extract_text_from_pdf(file):
     try:
         reader = PyPDF2.PdfReader(file)
         text = ""
         for page in reader.pages:
+            content = page.extract_text()
+            if content:
+                text += content + " "
+        return text.strip(), None if text else "No text found in PDF."
     except Exception as e:
+        return None, str(e)
+# ------------------- Resume Classification & Organization -------------------
 def classify_resumes(files):
+    predictions = {}
+    classified_files = {}
+    if os.path.exists("classified_resumes"):
+        shutil.rmtree("classified_resumes")
+    os.makedirs("classified_resumes")
     for file in files:
         file_name = os.path.basename(file.name)
+        resume_text, error = extract_text_from_pdf(file)
         if error:
+            predictions[file_name] = {"error": error}
             continue
+        cleaned_text = clean_text(resume_text)
+        result = classifier(cleaned_text[:512])[0]  # Truncate to avoid max token
+        label = result['label']  # e.g., C2
         score = round(result['score'], 4)
+        category = CATEGORY_MAP.get(label, "Other / Miscellaneous")
+        predictions[file_name] = {
+            "Predicted Job Category": label,
+            "Category Name": category,
+            "Confidence Score": score
+        }
+        category_folder = os.path.join("classified_resumes", category)
+        os.makedirs(category_folder, exist_ok=True)
+        dest_path = os.path.join(category_folder, file_name)
+        with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
             shutil.copyfileobj(f_in, f_out)
+        if category not in classified_files:
+            classified_files[category] = []
+        classified_files[category].append(dest_path)
+    return predictions, classified_files
+# ------------------- Gradio App -------------------
+def filter_by_category(category, all_classified):
+    return all_classified.get(category, [])
+with gr.Blocks(title="Resume Screening & Classification") as app:
+    gr.Markdown("""
+    # 📄 Resume Screening Tool
+    Upload resumes in PDF format. The system will classify them into job categories using a pretrained AI model.
+    """)
+    with gr.Row():
+        uploaded_files = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resumes")
+        classify_button = gr.Button("Classify Resumes")
+    classification_results = gr.JSON(label="Classification Output")
+    category_selector = gr.Dropdown(choices=list(CATEGORY_MAP.values()), label="Filter by Job Category")
+    filtered_resumes_output = gr.File(file_types=[".pdf"], file_count="multiple", label="Filtered Resumes")
+    all_classified_state = gr.State({})
+    classify_button.click(
+        fn=classify_resumes,
+        inputs=[uploaded_files],
+        outputs=[classification_results, all_classified_state]
+    )
+    category_selector.change(
+        fn=filter_by_category,
+        inputs=[category_selector, all_classified_state],
+        outputs=[filtered_resumes_output]
+    )
 if __name__ == "__main__":
+    app.launch()