Spaces:

Adillega
/

AdIlleagal

Sleeping

File size: 4,528 Bytes

850e9c9
27bd413
850e9c9
 
afefed4
ba3f8f3
a3fed16
cb0139c
 
 
a83b920
27bd413
850e9c9
 
 
 
afefed4
 
4843d43
afefed4
850e9c9
 
 
 
4843d43
850e9c9
 
4843d43
 
850e9c9
4843d43
850e9c9
4843d43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93733b3
4843d43
 
 
 
 
 
 
850e9c9
 
cb0139c
afefed4
 
 
bfb7595
cb0139c
 
 
 
 
a83b920
cb0139c
3a838cd
a83b920
 
 
 
 
cb0139c
a83b920
 
afefed4
4843d43
afefed4
cb0139c
4843d43
 
cb0139c
 
 
 
4843d43
 
 
 
 
 
850e9c9
4843d43
 
27bd413
850e9c9
4843d43
850e9c9
 
afefed4
27bd413
850e9c9
 
27bd413
4843d43
850e9c9
4843d43
 
850e9c9
4843d43

import os
import streamlit as st
import pandas as pd
from transformers import pipeline
import joblib
from offensive_keywords import offensive_keywords

# ✅ كلمات جزئية مهمة مثل "امرأة" و"نساء" إلخ
flag_keywords = ["امرأة", "نساء", "أنثى", "بنت", "للنساء", "للرجال", "سعودية", "جنسية", "العمر", "مظهر", "عقد", "ضغط العمل"]

st.write(f"🔎 تم تحميل {len(offensive_keywords)} عبارة من الملف.")

# ✅ إعداد التخزين المؤقت للنماذج
os.environ["HF_HOME"] = "/tmp"
os.environ["TRANSFORMERS_CACHE"] = "/tmp"

@st.cache_resource
def load_classifier():
    return joblib.load("src/job_classifier.pkl")

@st.cache_resource
def load_gpt2():
    return pipeline("text-generation", model="gpt2", model_kwargs={"cache_dir": "/tmp"})

classifier = load_classifier()
gpt2_pipeline = load_gpt2()

# ✅ إعداد الصفحة
st.set_page_config(page_title="نظام رصد الإعلانات", layout="wide")

# ✅ تنسيق CSS مستوحى من الهاكاثون
st.markdown("""
<style>
body {
    background-color: #f6f9fc;
    font-family: 'Segoe UI', sans-serif;
}
h1, h2, h3 {
    color: #004c97;
}
.stButton > button {
    background: linear-gradient(90deg, #007bff, #00b8a9);
    color: white;
    font-weight: 600;
    border-radius: 8px;
    padding: 0.6em 1.4em;
    border: none;
}
.stButton > button:hover {
    transform: scale(1.03);
}
.stTextArea textarea {
    background-color: #ffffff;
    border-radius: 10px;
    border: 1px solid #d0d7de;
    padding: 12px;
}
.metric-container {
    background-color: white;
    padding: 20px;
    border-radius: 12px;
    box-shadow: 0 2px 8px rgba(0,0,0,0.05);
    margin-top: 20px;
}
</style>
""", unsafe_allow_html=True)

# ✅ رأس الصفحة
col1, col2 = st.columns([1, 6])
with col1:
    st.image("https://hackathon.hrsd.gov.sa/_next/image?url=%2Fhachathoonresourses%2Ficon%2Flogoc.png&w=384&q=75", width=120)
with col2:
    st.markdown("""
        <h1>📢 نظام رصد الإعلانات المخالفة</h1>
        <p style="color:#5BA241; margin-top: -10px;">تحليل ذكي يستند إلى القوانين السعودية</p>
    """, unsafe_allow_html=True)

# ✅ إدخال الإعلان
uploaded_text = st.text_area("✍️ أدخل نص الإعلان هنا:", height=150)

# ✅ تصنيف الإعلان + فحص العبارات
if uploaded_text:
    try:
        pred = classifier.predict([uploaded_text])[0]

        # فحص مطابقة العبارات بالكامل أو جزئياً
        violations = list(set(
            [kw for kw in offensive_keywords if kw in uploaded_text] +
            [kw for kw in flag_keywords if kw in uploaded_text]
        ))

        if violations:
            final_label = "❌ إعلان مخالف"
        elif pred == 1:
            final_label = "✅ إعلان سليم"
        else:
            final_label = "❌ إعلان مخالف"

        # ✅ عرض النتيجة
        st.markdown(f'<div class="metric-container"><h4>{final_label}</h4></div>', unsafe_allow_html=True)

    except Exception as e:
        st.error(f"حدث خطأ في التنبؤ: {str(e)}")

# ✅ زر تحليل العبارات
if st.button("🔍 تحليل العبارات"):
    if uploaded_text:
        violations = list(set(
            [kw for kw in offensive_keywords if kw in uploaded_text] +
            [kw for kw in flag_keywords if kw in uploaded_text]
        ))
        if violations:
            st.error("❌ الإعلان يحتوي على عبارات مخالفة:")
            for v in violations:
                st.markdown(f"<li style='color:#b00020'>{v}</li>", unsafe_allow_html=True)
        else:
            st.success("✅ لا توجد عبارات مخالفة.")

# ✅ توصيات GPT-2
st.subheader("💡 توصيات الذكاء الاصطناعي")

if uploaded_text:
    prompt = f"""
You are an AI that checks for violations in job advertisements based on Saudi labor laws.
Here is the ad:
\"{uploaded_text}\"

Please identify any potential violations and give recommendations to fix them.
Violations:
"""
    with st.spinner("📡 جاري التحليل..."):
        try:
            result = gpt2_pipeline(prompt, max_new_tokens=150)[0]["generated_text"]
            st.text_area("📋 نتائج GPT-2:", result, height=300)
        except Exception as e:
            st.error(f"خطأ في توليد النتائج: {str(e)}")