Spaces:

soojeongcrystal
/

curation

Build error

App Files Files Community

curation / app.py

soojeongcrystal

Update app.py

4dea241 verified almost 2 years ago

raw

history blame contribute delete

5.17 kB

	# 필요한 라이브러리 불러오기
	import streamlit as st
	import pandas as pd
	from dotenv import load_dotenv
	import os
	import openpyxl
	from soynlp.noun import LRNounExtractor_v2
	from sklearn.metrics.pairwise import cosine_similarity
	from sklearn.feature_extraction.text import TfidfVectorizer
	import openai

	# 환경 변수 로드
	load_dotenv()
	openai_api_key = os.getenv("OPENAI_API_KEY")

	# OpenAI 클라이언트 생성
	openai.api_key = openai_api_key

	# 엑셀 파일 경로
	file_path = 'contents.xlsx'

	# 엑셀 파일 로드
	wb = openpyxl.load_workbook(file_path)

	# 명사 추출기는 앱 시작 시 한 번만 초기화
	noun_extractor = LRNounExtractor_v2()

	@st.cache(allow_output_mutation=True)
	def calculate_similarity_with_soynlp(text1, text2):
	nouns1 = noun_extractor.train_extract([text1])
	nouns2 = noun_extractor.train_extract([text2])

	text1_nouns = ' '.join(list(nouns1.keys()))
	text2_nouns = ' '.join(list(nouns2.keys()))

	vectorizer = TfidfVectorizer()
	tfidf_matrix = vectorizer.fit_transform([text1_nouns, text2_nouns])
	cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

	return cosine_sim[0][0]

	@st.cache
	def load_sheet_data(sheet_name):
	ws = wb[sheet_name]
	data = list(ws.values)
	columns = data[0]
	df = pd.DataFrame(data[1:], columns=columns)
	return df

	def filter_internal_contents(df, search_criteria, threshold=0.1):
	filtered_contents = []
	for _, row in df.iterrows():
	intro_text = row['소개']
	if intro_text:
	similarity_score = calculate_similarity_with_soynlp(search_criteria, intro_text.lower())
	if similarity_score >= threshold:
	filtered_contents.append({'제목': row['제목'], '소개': row['소개'], '시간': row['시간']})
	return filtered_contents

	# 추가된 키워드 추출 함수
	def extract_keywords(purpose, target):
	prompt = f"학습 목적 '{purpose}'과 대상 '{target}'를 종합하여 어떤 커리큘럼의 학습 컨텐츠가 필요한지를 서술해주세요."
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4-0613", # gpt-4 모델 사용
	messages=[{"role": "user", "content": prompt}]
	)
	return response['choices'][0]['message']['content'].strip().split(', ')
	except Exception as e:
	st.error(f"OpenAI API error: {e}")
	return []

	def recommend_with_gpt(sheet_name, purpose, target, time, internal_contents):
	summary = ", ".join([f"{content['제목']}: {content['소개']}" for content in internal_contents])

	prompt = f"""
	'{sheet_name}' 카테고리 내에서 '{purpose}' 학습 목적을 달성하고자 하는 '{target}' 대상을 위해,
	주어진 '{time}' 시간 내에 다룰 수 있는 컨텐츠를 추천해주세요.
	현재 필터링된 내부 컨텐츠는 다음과 같습니다: {summary}
	추가로, 이 내부 컨텐츠를 보완하거나 확장할 수 있는 외부 컨텐츠(예: 책, 영상, 온라인 강의 등)도 함께 추천해주세요.
	"""

	response = openai.Completion.create(
	model="gpt-4-0613",
	prompt=prompt,
	temperature=0.6,
	max_tokens=150,
	top_p=1.0,
	frequency_penalty=0.0,
	presence_penalty=0.0
	)
	return response.choices[0].text.strip()

	# 스트림릿 UI 구성
	st.set_page_config(page_title="Curation", page_icon="🌷")
	st.title("컨텐츠 큐레이션")

	purpose = st.text_area("학습 목적을 입력해주세요", placeholder="예: 행복 이해 및 적용")
	target = st.text_area("학습 대상을 입력하세요", placeholder="예: 일반 구성원")
	time = st.number_input("학습 시간을 입력하세요", min_value=1, max_value=120, value=30, step=1)

	sheet_names = wb.sheetnames
	sheet_name = st.selectbox("컨텐츠 범위를 선택해주세요", sheet_names)

	if st.button('컨텐츠 추천받기'):
	if not purpose or not target or not time:
	st.error("모든 필드를 올바르게 채워주세요.")
	else:
	# 키워드 추출 함수 호출 및 결과 표시
	keywords = extract_keywords(purpose, target)
	if keywords:
	st.markdown("### 추출된 키워드")
	st.write(", ".join(keywords))
	else:
	st.write("키워드를 추출하지 못했습니다.")

	# 내부 컨텐츠 필터링 및 표시
	df = load_sheet_data(sheet_name)
	search_criteria = f"{purpose} {target}".lower()
	internal_contents = filter_internal_contents(df, search_criteria, threshold=0.75)

	if internal_contents:
	st.subheader("내부 컨텐츠 필터링 결과")
	for content in internal_contents:
	with st.expander(f"{content['제목']} ({content['시간']}분)"):
	st.write(content['소개'])
	else:
	st.write("내부 컨텐츠가 없습니다.")

	external_recommendation = recommend_with_gpt(sheet_name, purpose, target, str(time), internal_contents)
	st.subheader("추천 컨텐츠 리스트")
	st.write(external_recommendation)