Spaces:

sreelekhaputta2
/

LinkShield

Sleeping

File size: 12,534 Bytes

import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def train_model():
    df = pd.read_csv("mixed_linkedin_profiles.csv")
    df.fillna("", inplace=True)
    features = ['Skills', 'Education', 'Job Title', 'Summary', 'Connections', 'Experience (Years)']
    X = df[features]
    y = df['Label'].astype(int)
    preprocessor = ColumnTransformer([
        ('skills_vec', CountVectorizer(max_features=30), 'Skills'),
        ('education_vec', CountVectorizer(max_features=10), 'Education'),
        ('jobtitle_vec', CountVectorizer(max_features=10), 'Job Title'),
        ('summary_tfidf', TfidfVectorizer(max_features=40), 'Summary'),
        ('num_features', StandardScaler(), ['Connections', 'Experience (Years)'])
    ])
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(n_estimators=120, random_state=42))
    ])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.18, random_state=42)
    pipeline.fit(X_train, y_train)
    return pipeline

model = train_model()

def predict_profile(skills, education, job_title, summary, connections, experience, company_name, job_posting):
    input_data = pd.DataFrame([{
        'Skills': skills,
        'Education': education,
        'Job Title': job_title,
        'Summary': summary,
        'Connections': int(connections),
        'Experience (Years)': int(experience)
    }])
    pred = model.predict(input_data)[0]
    prob = model.predict_proba(input_data)[0][1]
    is_fake_company = False
    is_fake_job = False
    company_warnings = []
    job_warnings = []
    if len(company_name) < 3:
        company_warnings.append("⚠️ Company name is too short or generic")
        is_fake_company = True
    if not any(c.isupper() for c in company_name):
        company_warnings.append("⚠️ Company name lacks proper capitalization")
        is_fake_company = True
    if len(job_posting) < 30:
        job_warnings.append("⚠️ Job description is too short or generic")
        is_fake_job = True
    if len(job_posting.split()) < 10:
        job_warnings.append("⚠️ Job description is too brief")
        is_fake_job = True
    profile_result = f"⚠️ Likely FAKE profile ({prob*100:.1f}% confidence)" if pred == 1 else f"✅ Likely REAL profile ({(1-prob)*100:.1f}% confidence)"
    company_result = "⚠️ Likely FAKE company" if is_fake_company else "✅ Likely REAL company"
    job_result = "⚠️ Likely FAKE job posting" if is_fake_job else "✅ Likely REAL job posting"
    confidence = float(prob) if pred == 1 else float(1-prob)
    tips = """
    <div class="tips-card">
        <h4>How to Spot a Fake LinkedIn Profile or Company/Job Posting</h4>
        <ul>
            <li><strong>Too good to be true credentials:</strong> e.g., CEO at 22 with 5 PhDs.</li>
            <li><strong>Very few connections:</strong> Usually less than 50.</li>
            <li><strong>Generic or stolen profile photos:</strong> Search them on Google Images.</li>
            <li><strong>No activity/posts, endorsements, or interactions.</strong></li>
            <li><strong>Inconsistent info:</strong> Overlapping jobs, vague company names.</li>
            <li><strong>Strange grammar or unnatural English.</strong></li>
            <li><strong>Company/job posting checks:</strong> Short/odd company names, generic job descriptions, no company website or reviews.</li>
        </ul>
    </div>
    """
    company_warnings_html = "<br>".join(company_warnings) if company_warnings else "<span style='color:#22c55e;'>No warnings detected.</span>"
    job_warnings_html = "<br>".join(job_warnings) if job_warnings else "<span style='color:#22c55e;'>No warnings detected.</span>"
    return (profile_result, confidence, company_result, company_warnings_html, job_result, job_warnings_html, tips)

# Cleaned custom CSS with responsive media queries
custom_css = """
body, .gradio-container {
    background: url('https://tse3.mm.bing.net/th?id=OIP.DwukLU73pXKo7c68jGhN1AHaEo&pid=Api&P=0&h=220') no-repeat center center fixed !important;
    background-size: cover !important;
}
.gradio-container {
    min-height: 100vh;
}
.gradio-block, .gradio-row, .gradio-column {
    background: rgba(255,255,255,0.12) !important;
    border-radius: 32px !important;
    box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important;
    backdrop-filter: blur(20px) !important;
    border: 1px solid rgba(255,255,255,0.18) !important;
    margin-bottom: 24px !important;
    animation: fadeInUp 1.2s cubic-bezier(.39,.575,.565,1.000) both;
}
@keyframes fadeInUp {
    0% {opacity:0;transform:translateY(40px);}
    100% {opacity:1;transform:translateY(0);}
}
.gradio-markdown, .gradio-html, .gradio-textbox, .gradio-number, .gradio-slider {
    background: rgba(255,255,255,0.85) !important;
    border-radius: 18px !important;
    box-shadow: 0 2px 12px rgba(0,0,0,0.08) !important;
    margin-bottom: 12px !important;
    font-size: 1.09em !important;
    animation: fadeIn 1.2s;
}
@keyframes fadeIn {
    from {opacity:0;}
    to {opacity:1;}
}
.gradio-button {
    background: linear-gradient(90deg, #4a6baf 0%, #6dd5ed 100%) !important;
    color: white !important;
    border: none !important;
    border-radius: 18px !important;
    padding: 16px 36px !important;
    font-size: 1.15em !important;
    font-weight: bold !important;
    transition: background 0.5s, transform 0.2s, box-shadow 0.3s;
    box-shadow: 0 0 16px 4px #6dd5ed66, 0 6px 30px 0 rgba(76, 201, 240, 0.19);
    animation: pulseGlow 2s infinite alternate;
}
@keyframes pulseGlow {
    0% {box-shadow: 0 0 16px 4px #6dd5ed66, 0 6px 30px 0 rgba(76, 201, 240, 0.19);}
    100% {box-shadow: 0 0 32px 8px #4a6baf66, 0 12px 40px 0 rgba(76, 201, 240, 0.25);}
}
.gradio-button:hover {
    background: linear-gradient(90deg, #4776e6 0%, #43cea2 100%) !important;
    transform: scale(1.07) rotate(-2deg);
}
.tips-card {
    background: rgb(128, 128, 128);
    border-radius: 16px;
    padding: 20px 24px;
    margin-top: 22px;
    box-shadow: 0 2px 12px rgba(0,0,0,0.10);
    animation: fadeIn 1.6s;
}
.title-card {
    background: #fffacd !important;
    border-radius: 24px;
    padding: 24px 32px;
    margin: 0 auto 32px;
    max-width: 800px;
    box-shadow: 0 8px 24px rgba(0,0,0,0.12);
    border: 1px solid rgba(255,255,255,0.3);
    display: flex;
    flex-direction: column;
    align-items: center;
    animation: textPop 1.3s cubic-bezier(.23,1.01,.32,1) both;
}
.title-card h1 {
    color: #e63946;
    margin: 0;
    font-size: 2.2em;
    font-weight: 700;
    text-align: center;
    text-shadow: 0 2px 12px #ff999977;
    letter-spacing: 0.05em;
}
.features-list {
    background: #fffacd !important;
    border-radius: 24px;
    padding: 24px 28px;
    margin: 0 auto 24px;
    max-width: 800px;
    color: #000 !important;
    font-size: 1.1em;
    line-height: 1.7;
    box-shadow: 0 2px 12px rgba(0,0,0,0.10);
}
.features-list h2,
.features-list ul,
.features-list li,
.features-list strong {
    color: #000 !important;
}
.tips-card h4 {
    color: #e63946;
    margin-bottom: 12px;
    letter-spacing: 0.03em;
    animation: slideInLeft 1.2s;
}
@keyframes slideInLeft {
    from {opacity:0;transform:translateX(-40px);}
    to {opacity:1;transform:translateX(0);}
}
.tips-card ul {
    padding-left: 20px;
    color: #222;
}
.gradio-textbox[readonly], .gradio-html {
    font-weight: bold;
    letter-spacing: 0.01em;
    color: #222 !important;
    border: 2px solid #4a6baf !important;
    background: rgba(255,255,255,0.93) !important;
    animation: fadeInUp 1s;
}
h1, h2, h3, h4, h5 {
    animation: textPop 1.3s cubic-bezier(.23,1.01,.32,1) both;
}
@keyframes textPop {
    0% {opacity:0;transform:scale(0.7);}
    100% {opacity:1;transform:scale(1);}
}
.gradio-slider .noUi-base {
    background: linear-gradient(90deg, #4a6baf 0%, #6dd5ed 100%) !important;
}
.credits-footnote {
    text-align: center;
    margin-top: 24px;
    font-size: 0.95em;
    color: #555;
    font-weight: 500;
    padding-bottom: 16px;
}
footer {visibility: hidden;}

/* Responsive adjustments for mobile */
@media (max-width: 768px) {
    .gradio-container {
        padding: 8px !important;
    }
    .gradio-block, .gradio-row, .gradio-column {
        width: 100% !important;
        margin-bottom: 12px !important;
    }
    .gradio-textbox, .gradio-number, .gradio-slider, .gradio-button {
        width: 100% !important;
        font-size: 1em !important;
    }
    .gradio-button {
        padding: 12px 24px !important;
    }
    .title-card h1 {
        font-size: 1.6em !important;
    }
    .features-list {
        font-size: 0.9em !important;
        padding: 12px !important;
    }
}
"""

features_html = """
<div class="features-list">
    <h2>Our App Features</h2>
    <ul>
        <li><strong>LinkedIn Profile Authenticity Check:</strong> Analyzes skills, education, job title, summary, connections, and experience to detect fake profiles.</li>
        <li><strong>Company and Job Posting Verification:</strong> Detects fake company names and suspicious job postings based on text analysis.</li>
        <li><strong>Confidence Score:</strong> Provides a confidence level for each prediction.</li>
        <li><strong>Tips for Spotting Fakes:</strong> Lists common warning signs for fake profiles and job postings.</li>
        <li><strong>User-Friendly Interface:</strong> Modern, animated UI with clear results and warnings.</li>
    </ul>
</div>
"""

with gr.Blocks(css=custom_css, title="LinkShield | LinkedIn Fake Profile & Company Detector", fill_width=True) as demo:
    gr.HTML("""
    <div class="title-card">
        <h1>LinkShield (LinkedIn Fake Profile and Company Detector)</h1>
    </div>
    """)
    gr.HTML(features_html)
    gr.Markdown(
        "<div style='text-align:center;font-size:1.13em;margin-bottom:18px;'>Enter LinkedIn profile or company/job posting details.<br>The model will predict if they are likely <b>Fake</b> or <b>Real</b>.</div>"
    )
    with gr.Row():
        with gr.Column(min_width=300):
            skills = gr.Textbox(label="Skills (comma-separated)", value="Python, SQL, Marketing")
            education = gr.Textbox(label="Education", value="MBA in Marketing")
            job_title = gr.Textbox(label="Job Title", value="Marketing Specialist")
            summary = gr.Textbox(label="Profile Summary", lines=3, value="Experienced professional with proven track record...")
            connections = gr.Number(label="Connections", value=500, precision=0)
            experience = gr.Number(label="Years of Experience", value=5, precision=0)
            company_name = gr.Textbox(label="Company Name", value="TechCorp Inc.")
            job_posting = gr.Textbox(label="Job Posting Description", lines=3, value="Seeking a motivated individual to join our team...")
            submit_btn = gr.Button("✨ Check Profile & Company")
        with gr.Column(min_width=300):
            result = gr.Textbox(label="Profile Prediction", interactive=False)
            confidence = gr.Slider(label="Confidence", minimum=0, maximum=1, step=0.01, interactive=False)
            company_result = gr.Textbox(label="Company Prediction", interactive=False)
            company_warnings = gr.HTML(label="Company Warnings")
            job_result = gr.Textbox(label="Job Posting Prediction", interactive=False)
            job_warnings = gr.HTML(label="Job Posting Warnings")
            tips = gr.HTML(label="Tips for Spotting Fakes")
    submit_btn.click(
        predict_profile,
        inputs=[skills, education, job_title, summary, connections, experience, company_name, job_posting],
        outputs=[result, confidence, company_result, company_warnings, job_result, job_warnings, tips]
    )
    gr.Markdown("---")
    gr.Markdown("<div style='text-align:center;font-size:1.06em;'>The model uses profile features (skills, education, job title, summary, connections, experience) and text analysis to estimate the likelihood of a profile or company being fake.<br>For best results, provide as much detail as possible.</div>")
    gr.HTML("<div class='credits-footnote'>Created by Sreelekha Putta</div>")
if __name__ == "__main__":
    demo.launch()  # Only for local testing