Spaces:

ma4389
/

Fake_Job_Prediction_

Sleeping

App Files Files Community

ma4389 commited on Jul 23, 2025

Commit

017869d

verified ·

1 Parent(s): 6873958

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -63

app.py CHANGED Viewed

@@ -1,63 +1,63 @@
-import torch
-from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
-import gradio as gr
-import re
-import nltk
-from nltk.tokenize import word_tokenize
-from nltk.corpus import stopwords
-# Download required NLTK resources
-nltk.download('punkt')
-nltk.download('stopwords')
-# Load tokenizer and model
-tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
-model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
-model.load_state_dict(torch.load("job_model.pth", map_location=torch.device("cpu")))
-model.eval()
-# ✅ Preprocess a single job description string (adapted from your DataFrame version)
-def preprocess_text(text):
-    # Lowercase
-    text = text.lower()
-    # Remove non-alphabetic characters
-    text = re.sub(r'[^a-z\s]', '', text)
-    # Tokenize
-    tokens = word_tokenize(text)
-    # Remove stopwords
-    stop_words = set(stopwords.words('english'))
-    tokens = [word for word in tokens if word not in stop_words]
-    # Join tokens back into string
-    return ' '.join(tokens)
-# ✅ Inference function
-def classify_job(description):
-    cleaned_text = preprocess_text(description)
-    inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=256)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probs = torch.softmax(outputs.logits, dim=1)
-        pred = torch.argmax(probs, dim=1).item()
-        label_map = {
-            0: "✅ Legitimate Job Post (Real)",
-            1: "🚨 FAKE Job Post (Fraudulent)"
-        }
-        label = label_map[pred]
-        confidence = probs[0][pred].item()
-    return f"{label}\nConfidence: {confidence:.2%}"
-# ✅ Gradio Interface
-interface = gr.Interface(
-    fn=classify_job,
-    inputs=gr.Textbox(lines=6, placeholder="Paste the job description here..."),
-    outputs=gr.Textbox(),
-    title="Job Description Fraud Detector",
-    description="Classifies job descriptions as real or fake using DistilBERT. Uses full text preprocessing."
-)
-if __name__ == "__main__":
-    interface.launch()

+import torch
+from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import gradio as gr
+import re
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+# Download required NLTK resources
+nltk.download('punkt_tab')
+nltk.download('stopwords')
+# Load tokenizer and model
+tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
+model.load_state_dict(torch.load("job_model.pth", map_location=torch.device("cpu")))
+model.eval()
+# ✅ Preprocess a single job description string (adapted from your DataFrame version)
+def preprocess_text(text):
+    # Lowercase
+    text = text.lower()
+    # Remove non-alphabetic characters
+    text = re.sub(r'[^a-z\s]', '', text)
+    # Tokenize
+    tokens = word_tokenize(text)
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    tokens = [word for word in tokens if word not in stop_words]
+    # Join tokens back into string
+    return ' '.join(tokens)
+# ✅ Inference function
+def classify_job(description):
+    cleaned_text = preprocess_text(description)
+    inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=256)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1)
+        pred = torch.argmax(probs, dim=1).item()
+        label_map = {
+            0: "✅ Legitimate Job Post (Real)",
+            1: "🚨 FAKE Job Post (Fraudulent)"
+        }
+        label = label_map[pred]
+        confidence = probs[0][pred].item()
+    return f"{label}\nConfidence: {confidence:.2%}"
+# ✅ Gradio Interface
+interface = gr.Interface(
+    fn=classify_job,
+    inputs=gr.Textbox(lines=6, placeholder="Paste the job description here..."),
+    outputs=gr.Textbox(),
+    title="Job Description Fraud Detector",
+    description="Classifies job descriptions as real or fake using DistilBERT. Uses full text preprocessing."
+)
+if __name__ == "__main__":
+    interface.launch()