File size: 12,534 Bytes
2331762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8d7718
2331762
 
3d37190
 
2331762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8d7718
2331762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8d7718
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2331762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8d7718
2331762
 
 
 
 
 
 
 
 
 
a8d7718
2331762
 
 
 
 
 
 
 
 
a8d7718
2331762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5294901
b8245c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def train_model():
    df = pd.read_csv("mixed_linkedin_profiles.csv")
    df.fillna("", inplace=True)
    features = ['Skills', 'Education', 'Job Title', 'Summary', 'Connections', 'Experience (Years)']
    X = df[features]
    y = df['Label'].astype(int)
    preprocessor = ColumnTransformer([
        ('skills_vec', CountVectorizer(max_features=30), 'Skills'),
        ('education_vec', CountVectorizer(max_features=10), 'Education'),
        ('jobtitle_vec', CountVectorizer(max_features=10), 'Job Title'),
        ('summary_tfidf', TfidfVectorizer(max_features=40), 'Summary'),
        ('num_features', StandardScaler(), ['Connections', 'Experience (Years)'])
    ])
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(n_estimators=120, random_state=42))
    ])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.18, random_state=42)
    pipeline.fit(X_train, y_train)
    return pipeline

model = train_model()

def predict_profile(skills, education, job_title, summary, connections, experience, company_name, job_posting):
    input_data = pd.DataFrame([{
        'Skills': skills,
        'Education': education,
        'Job Title': job_title,
        'Summary': summary,
        'Connections': int(connections),
        'Experience (Years)': int(experience)
    }])
    pred = model.predict(input_data)[0]
    prob = model.predict_proba(input_data)[0][1]
    is_fake_company = False
    is_fake_job = False
    company_warnings = []
    job_warnings = []
    if len(company_name) < 3:
        company_warnings.append("⚠️ Company name is too short or generic")
        is_fake_company = True
    if not any(c.isupper() for c in company_name):
        company_warnings.append("⚠️ Company name lacks proper capitalization")
        is_fake_company = True
    if len(job_posting) < 30:
        job_warnings.append("⚠️ Job description is too short or generic")
        is_fake_job = True
    if len(job_posting.split()) < 10:
        job_warnings.append("⚠️ Job description is too brief")
        is_fake_job = True
    profile_result = f"⚠️ Likely FAKE profile ({prob*100:.1f}% confidence)" if pred == 1 else f"✅ Likely REAL profile ({(1-prob)*100:.1f}% confidence)"
    company_result = "⚠️ Likely FAKE company" if is_fake_company else "✅ Likely REAL company"
    job_result = "⚠️ Likely FAKE job posting" if is_fake_job else "✅ Likely REAL job posting"
    confidence = float(prob) if pred == 1 else float(1-prob)
    tips = """
    <div class="tips-card">
        <h4>How to Spot a Fake LinkedIn Profile or Company/Job Posting</h4>
        <ul>
            <li><strong>Too good to be true credentials:</strong> e.g., CEO at 22 with 5 PhDs.</li>
            <li><strong>Very few connections:</strong> Usually less than 50.</li>
            <li><strong>Generic or stolen profile photos:</strong> Search them on Google Images.</li>
            <li><strong>No activity/posts, endorsements, or interactions.</strong></li>
            <li><strong>Inconsistent info:</strong> Overlapping jobs, vague company names.</li>
            <li><strong>Strange grammar or unnatural English.</strong></li>
            <li><strong>Company/job posting checks:</strong> Short/odd company names, generic job descriptions, no company website or reviews.</li>
        </ul>
    </div>
    """
    company_warnings_html = "<br>".join(company_warnings) if company_warnings else "<span style='color:#22c55e;'>No warnings detected.</span>"
    job_warnings_html = "<br>".join(job_warnings) if job_warnings else "<span style='color:#22c55e;'>No warnings detected.</span>"
    return (profile_result, confidence, company_result, company_warnings_html, job_result, job_warnings_html, tips)

# Cleaned custom CSS with responsive media queries
custom_css = """
body, .gradio-container {
    background: url('https://tse3.mm.bing.net/th?id=OIP.DwukLU73pXKo7c68jGhN1AHaEo&pid=Api&P=0&h=220') no-repeat center center fixed !important;
    background-size: cover !important;
}
.gradio-container {
    min-height: 100vh;
}
.gradio-block, .gradio-row, .gradio-column {
    background: rgba(255,255,255,0.12) !important;
    border-radius: 32px !important;
    box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important;
    backdrop-filter: blur(20px) !important;
    border: 1px solid rgba(255,255,255,0.18) !important;
    margin-bottom: 24px !important;
    animation: fadeInUp 1.2s cubic-bezier(.39,.575,.565,1.000) both;
}
@keyframes fadeInUp {
    0% {opacity:0;transform:translateY(40px);}
    100% {opacity:1;transform:translateY(0);}
}
.gradio-markdown, .gradio-html, .gradio-textbox, .gradio-number, .gradio-slider {
    background: rgba(255,255,255,0.85) !important;
    border-radius: 18px !important;
    box-shadow: 0 2px 12px rgba(0,0,0,0.08) !important;
    margin-bottom: 12px !important;
    font-size: 1.09em !important;
    animation: fadeIn 1.2s;
}
@keyframes fadeIn {
    from {opacity:0;}
    to {opacity:1;}
}
.gradio-button {
    background: linear-gradient(90deg, #4a6baf 0%, #6dd5ed 100%) !important;
    color: white !important;
    border: none !important;
    border-radius: 18px !important;
    padding: 16px 36px !important;
    font-size: 1.15em !important;
    font-weight: bold !important;
    transition: background 0.5s, transform 0.2s, box-shadow 0.3s;
    box-shadow: 0 0 16px 4px #6dd5ed66, 0 6px 30px 0 rgba(76, 201, 240, 0.19);
    animation: pulseGlow 2s infinite alternate;
}
@keyframes pulseGlow {
    0% {box-shadow: 0 0 16px 4px #6dd5ed66, 0 6px 30px 0 rgba(76, 201, 240, 0.19);}
    100% {box-shadow: 0 0 32px 8px #4a6baf66, 0 12px 40px 0 rgba(76, 201, 240, 0.25);}
}
.gradio-button:hover {
    background: linear-gradient(90deg, #4776e6 0%, #43cea2 100%) !important;
    transform: scale(1.07) rotate(-2deg);
}
.tips-card {
    background: rgb(128, 128, 128);
    border-radius: 16px;
    padding: 20px 24px;
    margin-top: 22px;
    box-shadow: 0 2px 12px rgba(0,0,0,0.10);
    animation: fadeIn 1.6s;
}
.title-card {
    background: #fffacd !important;
    border-radius: 24px;
    padding: 24px 32px;
    margin: 0 auto 32px;
    max-width: 800px;
    box-shadow: 0 8px 24px rgba(0,0,0,0.12);
    border: 1px solid rgba(255,255,255,0.3);
    display: flex;
    flex-direction: column;
    align-items: center;
    animation: textPop 1.3s cubic-bezier(.23,1.01,.32,1) both;
}
.title-card h1 {
    color: #e63946;
    margin: 0;
    font-size: 2.2em;
    font-weight: 700;
    text-align: center;
    text-shadow: 0 2px 12px #ff999977;
    letter-spacing: 0.05em;
}
.features-list {
    background: #fffacd !important;
    border-radius: 24px;
    padding: 24px 28px;
    margin: 0 auto 24px;
    max-width: 800px;
    color: #000 !important;
    font-size: 1.1em;
    line-height: 1.7;
    box-shadow: 0 2px 12px rgba(0,0,0,0.10);
}
.features-list h2,
.features-list ul,
.features-list li,
.features-list strong {
    color: #000 !important;
}
.tips-card h4 {
    color: #e63946;
    margin-bottom: 12px;
    letter-spacing: 0.03em;
    animation: slideInLeft 1.2s;
}
@keyframes slideInLeft {
    from {opacity:0;transform:translateX(-40px);}
    to {opacity:1;transform:translateX(0);}
}
.tips-card ul {
    padding-left: 20px;
    color: #222;
}
.gradio-textbox[readonly], .gradio-html {
    font-weight: bold;
    letter-spacing: 0.01em;
    color: #222 !important;
    border: 2px solid #4a6baf !important;
    background: rgba(255,255,255,0.93) !important;
    animation: fadeInUp 1s;
}
h1, h2, h3, h4, h5 {
    animation: textPop 1.3s cubic-bezier(.23,1.01,.32,1) both;
}
@keyframes textPop {
    0% {opacity:0;transform:scale(0.7);}
    100% {opacity:1;transform:scale(1);}
}
.gradio-slider .noUi-base {
    background: linear-gradient(90deg, #4a6baf 0%, #6dd5ed 100%) !important;
}
.credits-footnote {
    text-align: center;
    margin-top: 24px;
    font-size: 0.95em;
    color: #555;
    font-weight: 500;
    padding-bottom: 16px;
}
footer {visibility: hidden;}

/* Responsive adjustments for mobile */
@media (max-width: 768px) {
    .gradio-container {
        padding: 8px !important;
    }
    .gradio-block, .gradio-row, .gradio-column {
        width: 100% !important;
        margin-bottom: 12px !important;
    }
    .gradio-textbox, .gradio-number, .gradio-slider, .gradio-button {
        width: 100% !important;
        font-size: 1em !important;
    }
    .gradio-button {
        padding: 12px 24px !important;
    }
    .title-card h1 {
        font-size: 1.6em !important;
    }
    .features-list {
        font-size: 0.9em !important;
        padding: 12px !important;
    }
}
"""

features_html = """
<div class="features-list">
    <h2>Our App Features</h2>
    <ul>
        <li><strong>LinkedIn Profile Authenticity Check:</strong> Analyzes skills, education, job title, summary, connections, and experience to detect fake profiles.</li>
        <li><strong>Company and Job Posting Verification:</strong> Detects fake company names and suspicious job postings based on text analysis.</li>
        <li><strong>Confidence Score:</strong> Provides a confidence level for each prediction.</li>
        <li><strong>Tips for Spotting Fakes:</strong> Lists common warning signs for fake profiles and job postings.</li>
        <li><strong>User-Friendly Interface:</strong> Modern, animated UI with clear results and warnings.</li>
    </ul>
</div>
"""

with gr.Blocks(css=custom_css, title="LinkShield | LinkedIn Fake Profile & Company Detector", fill_width=True) as demo:
    gr.HTML("""
    <div class="title-card">
        <h1>LinkShield (LinkedIn Fake Profile and Company Detector)</h1>
    </div>
    """)
    gr.HTML(features_html)
    gr.Markdown(
        "<div style='text-align:center;font-size:1.13em;margin-bottom:18px;'>Enter LinkedIn profile or company/job posting details.<br>The model will predict if they are likely <b>Fake</b> or <b>Real</b>.</div>"
    )
    with gr.Row():
        with gr.Column(min_width=300):
            skills = gr.Textbox(label="Skills (comma-separated)", value="Python, SQL, Marketing")
            education = gr.Textbox(label="Education", value="MBA in Marketing")
            job_title = gr.Textbox(label="Job Title", value="Marketing Specialist")
            summary = gr.Textbox(label="Profile Summary", lines=3, value="Experienced professional with proven track record...")
            connections = gr.Number(label="Connections", value=500, precision=0)
            experience = gr.Number(label="Years of Experience", value=5, precision=0)
            company_name = gr.Textbox(label="Company Name", value="TechCorp Inc.")
            job_posting = gr.Textbox(label="Job Posting Description", lines=3, value="Seeking a motivated individual to join our team...")
            submit_btn = gr.Button("✨ Check Profile & Company")
        with gr.Column(min_width=300):
            result = gr.Textbox(label="Profile Prediction", interactive=False)
            confidence = gr.Slider(label="Confidence", minimum=0, maximum=1, step=0.01, interactive=False)
            company_result = gr.Textbox(label="Company Prediction", interactive=False)
            company_warnings = gr.HTML(label="Company Warnings")
            job_result = gr.Textbox(label="Job Posting Prediction", interactive=False)
            job_warnings = gr.HTML(label="Job Posting Warnings")
            tips = gr.HTML(label="Tips for Spotting Fakes")
    submit_btn.click(
        predict_profile,
        inputs=[skills, education, job_title, summary, connections, experience, company_name, job_posting],
        outputs=[result, confidence, company_result, company_warnings, job_result, job_warnings, tips]
    )
    gr.Markdown("---")
    gr.Markdown("<div style='text-align:center;font-size:1.06em;'>The model uses profile features (skills, education, job title, summary, connections, experience) and text analysis to estimate the likelihood of a profile or company being fake.<br>For best results, provide as much detail as possible.</div>")
    gr.HTML("<div class='credits-footnote'>Created by Sreelekha Putta</div>")
if __name__ == "__main__":
    demo.launch()  # Only for local testing