Spaces:

ma4389
/

Fake_Job_Prediction_

Sleeping

App Files Files Community

ma4389 commited on Jul 23, 2025

Commit

6873958

verified ·

1 Parent(s): d30f886

Upload 3 files

Browse files

Files changed (3) hide show

app.py +63 -0
job_model.pth +3 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import gradio as gr
+import re
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+# Download required NLTK resources
+nltk.download('punkt')
+nltk.download('stopwords')
+# Load tokenizer and model
+tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
+model.load_state_dict(torch.load("job_model.pth", map_location=torch.device("cpu")))
+model.eval()
+# ✅ Preprocess a single job description string (adapted from your DataFrame version)
+def preprocess_text(text):
+    # Lowercase
+    text = text.lower()
+    # Remove non-alphabetic characters
+    text = re.sub(r'[^a-z\s]', '', text)
+    # Tokenize
+    tokens = word_tokenize(text)
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    tokens = [word for word in tokens if word not in stop_words]
+    # Join tokens back into string
+    return ' '.join(tokens)
+# ✅ Inference function
+def classify_job(description):
+    cleaned_text = preprocess_text(description)
+    inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=256)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1)
+        pred = torch.argmax(probs, dim=1).item()
+        label_map = {
+            0: "✅ Legitimate Job Post (Real)",
+            1: "🚨 FAKE Job Post (Fraudulent)"
+        }
+        label = label_map[pred]
+        confidence = probs[0][pred].item()
+    return f"{label}\nConfidence: {confidence:.2%}"
+# ✅ Gradio Interface
+interface = gr.Interface(
+    fn=classify_job,
+    inputs=gr.Textbox(lines=6, placeholder="Paste the job description here..."),
+    outputs=gr.Textbox(),
+    title="Job Description Fraud Detector",
+    description="Classifies job descriptions as real or fake using DistilBERT. Uses full text preprocessing."
+)
+if __name__ == "__main__":
+    interface.launch()

job_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f50b64d99a3531abd66d378cc1d8b10692feb29c9247d329dd62b7cadde12f7c
+size 267861754

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+gradio
+nltk