Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import DistilBertTokenizer, DistilBertForSequenceClassification | |
| import gradio as gr | |
| import re | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| # Download required NLTK resources | |
| nltk.download('punkt_tab') | |
| nltk.download('stopwords') | |
| # Load tokenizer and model | |
| tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") | |
| model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2) | |
| model.load_state_dict(torch.load("job_model.pth", map_location=torch.device("cpu"))) | |
| model.eval() | |
| # β Preprocess a single job description string (adapted from your DataFrame version) | |
| def preprocess_text(text): | |
| # Lowercase | |
| text = text.lower() | |
| # Remove non-alphabetic characters | |
| text = re.sub(r'[^a-z\s]', '', text) | |
| # Tokenize | |
| tokens = word_tokenize(text) | |
| # Remove stopwords | |
| stop_words = set(stopwords.words('english')) | |
| tokens = [word for word in tokens if word not in stop_words] | |
| # Join tokens back into string | |
| return ' '.join(tokens) | |
| # β Inference function | |
| def classify_job(description): | |
| cleaned_text = preprocess_text(description) | |
| inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=256) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.softmax(outputs.logits, dim=1) | |
| pred = torch.argmax(probs, dim=1).item() | |
| label_map = { | |
| 0: "β Legitimate Job Post (Real)", | |
| 1: "π¨ FAKE Job Post (Fraudulent)" | |
| } | |
| label = label_map[pred] | |
| confidence = probs[0][pred].item() | |
| return f"{label}\nConfidence: {confidence:.2%}" | |
| # β Gradio Interface | |
| interface = gr.Interface( | |
| fn=classify_job, | |
| inputs=gr.Textbox(lines=6, placeholder="Paste the job description here..."), | |
| outputs=gr.Textbox(), | |
| title="Job Description Fraud Detector", | |
| description="Classifies job descriptions as real or fake using DistilBERT. Uses full text preprocessing." | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |