import pandas as pd
import streamlit as st

# 엑셀 파일 로드
file_path = "book_introductions_with_predictions.xlsx"
df = pd.read_excel(file_path)

# 책 제목 검색
st.title("책 소개글 기반 감정분석")

# 사용자가 검색할 책 제목 입력
search_title = st.text_input("책 제목을 입력하세요:")

if search_title:
    # 검색한 책 제목에 해당하는 최종 감정 가져오기
    result = df[df['책 제목'] == search_title]

    if not result.empty:
        st.write(f"**{search_title}**에 대한 최종 감정: **{result.iloc[0]['최종감정']}**")
    else:
        st.write("해당 책 제목을 찾을 수 없습니다.")

# 각 감정에 대해 상위 10개의 책 제목과 확률을 추출
st.title("각 감정별 확률 상위 10개의 책")

# 확률 데이터를 공백으로 분리하여 긍정, 부정, 중립 열로 분리
df[['긍정', '부정', '중립']] = df['확률'].str.strip('[]').str.split(expand=True).astype(float)

def get_top_n_unique(df, column, n=10):
    # 상위 n개의 도서를 추출하되 중복 제거
    top_n = df.sort_values(by=column, ascending=False)
    top_n_unique = []
    seen_titles = set()

    for index, row in top_n.iterrows():
        if len(top_n_unique) >= n:
            break
        title = row['책 제목']
        if title not in seen_titles:
            top_n_unique.append(row)
            seen_titles.add(title)
    
    return pd.DataFrame(top_n_unique)[['책 제목', column]]

# 긍정 확률 상위 10개
top_positive = get_top_n_unique(df, '긍정', n=10)
st.subheader("긍정 확률 상위 10개 책")
st.table(top_positive)

# 부정 확률 상위 10개
top_negative = get_top_n_unique(df, '부정', n=10)
st.subheader("부정 확률 상위 10개 책")
st.table(top_negative)

# 중립 확률 상위 10개
top_neutral = get_top_n_unique(df, '중립', n=10)
st.subheader("중립 확률 상위 10개 책")
st.table(top_neutral)