Spaces:

soojeongcrystal
/

curation

Build error

File size: 5,169 Bytes

# 필요한 라이브러리 불러오기
import streamlit as st
import pandas as pd
from dotenv import load_dotenv
import os
import openpyxl
from soynlp.noun import LRNounExtractor_v2
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import openai

# 환경 변수 로드
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

# OpenAI 클라이언트 생성
openai.api_key = openai_api_key

# 엑셀 파일 경로
file_path = 'contents.xlsx'

# 엑셀 파일 로드
wb = openpyxl.load_workbook(file_path)

# 명사 추출기는 앱 시작 시 한 번만 초기화
noun_extractor = LRNounExtractor_v2()

@st.cache(allow_output_mutation=True)
def calculate_similarity_with_soynlp(text1, text2):
    nouns1 = noun_extractor.train_extract([text1])
    nouns2 = noun_extractor.train_extract([text2])

    text1_nouns = ' '.join(list(nouns1.keys()))
    text2_nouns = ' '.join(list(nouns2.keys()))

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1_nouns, text2_nouns])
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

    return cosine_sim[0][0]

@st.cache
def load_sheet_data(sheet_name):
    ws = wb[sheet_name]
    data = list(ws.values)
    columns = data[0]
    df = pd.DataFrame(data[1:], columns=columns)
    return df

def filter_internal_contents(df, search_criteria, threshold=0.1):
    filtered_contents = []
    for _, row in df.iterrows():
        intro_text = row['소개']
        if intro_text:
            similarity_score = calculate_similarity_with_soynlp(search_criteria, intro_text.lower())
            if similarity_score >= threshold:
                filtered_contents.append({'제목': row['제목'], '소개': row['소개'], '시간': row['시간']})
    return filtered_contents

# 추가된 키워드 추출 함수
def extract_keywords(purpose, target):
    prompt = f"학습 목적 '{purpose}'과 대상 '{target}'를 종합하여 어떤 커리큘럼의 학습 컨텐츠가 필요한지를 서술해주세요."
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4-0613",  # gpt-4 모델 사용
            messages=[{"role": "user", "content": prompt}]
        )
        return response['choices'][0]['message']['content'].strip().split(', ')
    except Exception as e:
        st.error(f"OpenAI API error: {e}")
        return []

def recommend_with_gpt(sheet_name, purpose, target, time, internal_contents):
    summary = ", ".join([f"{content['제목']}: {content['소개']}" for content in internal_contents])
    
    prompt = f"""
        '{sheet_name}' 카테고리 내에서 '{purpose}' 학습 목적을 달성하고자 하는 '{target}' 대상을 위해,
        주어진 '{time}' 시간 내에 다룰 수 있는 컨텐츠를 추천해주세요.  
        현재 필터링된 내부 컨텐츠는 다음과 같습니다: {summary}
        추가로, 이 내부 컨텐츠를 보완하거나 확장할 수 있는 외부 컨텐츠(예: 책, 영상, 온라인 강의 등)도 함께 추천해주세요.
    """
    
    response = openai.Completion.create(
        model="gpt-4-0613",
        prompt=prompt,
        temperature=0.6,
        max_tokens=150,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    return response.choices[0].text.strip()

# 스트림릿 UI 구성
st.set_page_config(page_title="Curation", page_icon="🌷")
st.title("컨텐츠 큐레이션")

purpose = st.text_area("학습 목적을 입력해주세요", placeholder="예: 행복 이해 및 적용")
target = st.text_area("학습 대상을 입력하세요", placeholder="예: 일반 구성원")
time = st.number_input("학습 시간을 입력하세요", min_value=1, max_value=120, value=30, step=1)

sheet_names = wb.sheetnames
sheet_name = st.selectbox("컨텐츠 범위를 선택해주세요", sheet_names)

if st.button('컨텐츠 추천받기'):
    if not purpose or not target or not time:
        st.error("모든 필드를 올바르게 채워주세요.")
    else:
        # 키워드 추출 함수 호출 및 결과 표시
        keywords = extract_keywords(purpose, target)
        if keywords:
            st.markdown("### 추출된 키워드")
            st.write(", ".join(keywords))
        else:
            st.write("키워드를 추출하지 못했습니다.")

        # 내부 컨텐츠 필터링 및 표시
        df = load_sheet_data(sheet_name)
        search_criteria = f"{purpose} {target}".lower()
        internal_contents = filter_internal_contents(df, search_criteria, threshold=0.75)

        if internal_contents:
            st.subheader("내부 컨텐츠 필터링 결과")
            for content in internal_contents:
                with st.expander(f"{content['제목']} ({content['시간']}분)"):
                    st.write(content['소개'])
        else:
            st.write("내부 컨텐츠가 없습니다.")

        external_recommendation = recommend_with_gpt(sheet_name, purpose, target, str(time), internal_contents)
        st.subheader("추천 컨텐츠 리스트")
        st.write(external_recommendation)