import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# 모델 및 토크나이저 로드

model_directory = "."  # 현재 작업 디렉토리를 사용하여 모델 로드
model = AutoModelForSequenceClassification.from_pretrained(model_directory)
tokenizer = AutoTokenizer.from_pretrained(model_directory)
# model = AutoModelForSequenceClassification.from_pretrained("pytorch_model_ethics8multilable_acc8997.bin")
# tokenizer = AutoTokenizer.from_pretrained("tokenizer.json")

# 추론 파이프라인 설정
inference_pipeline = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True,
)

# Streamlit UI 구성
st.title("텍스트 윤리성 분석")
st.write(''' 아래에 텍스트를 입력하면 8가지 기준에 따라 윤리적 문제를 탐지해 줍니다(텍스트는 100자 이내 권장)''')
st.write('기준: IMMORAL_NONE(문제없음), CRIME(범죄), SEXUAL(선정), HATE(혐오), DISCRIMINATION(차별), CENSURE(비난), ABUSE(욕설), VIOLENCE(폭력)')

# 예시 문장 입력
example_sentence = st.text_area("텍스트를 입력하세요", value="")

if st.button("분석"):
    # 추론 수행
    results = inference_pipeline(example_sentence)

    # 결과 해석 (0.5를 기준으로 라벨 예측)
    id2label = {
        0: 'IMMORAL_NONE(문제없음)', 1: 'CRIME(범죄)', 2: 'SEXUAL(선정)', 3: 'HATE(혐오)',
        4: 'DISCRIMINATION(차별)', 5: 'CENSURE(비난)', 6: 'ABUSE(욕설)', 7: 'VIOLENCE(폭력)'
    }

    st.write("분석 결과:")
    for result in results[0]:
        label = id2label[int(result['label'].split('_')[-1])]
        score = result['score']
        prediction = 1 if score > 0.5 else 0
        st.write(f"{label}: {'YES' if prediction else 'NO'} (확률: {score:.4f})")