Spaces:

ssboost
/

NOW_Product_Keyword

Sleeping

App Files Files Community

NOW_Product_Keyword / main.py

ssboost

Create main.py

3bd055b verified over 1 year ago

raw

history blame

3.25 kB

	import pandas as pd
	import re
	from datetime import datetime
	import pytz
	from openpyxl import load_workbook
	from openpyxl.drawing.image import Image
	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.responses import FileResponse
	import io

	app = FastAPI()

	def extract_keywords(product_names):
	unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트
	all_unique_words = []

	for product in unique_products:
	# 특수 문자를 공백으로 대체
	cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
	words = cleaned_product.split()
	unique_words = set(words)
	all_unique_words.extend(unique_words)

	final_unique_words = set(all_unique_words)
	word_count = {word: all_unique_words.count(word) for word in final_unique_words}
	df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
	df = df.sort_values(by='빈도수', ascending=False)

	# Get the current date and time in Korean timezone
	korea_timezone = pytz.timezone('Asia/Seoul')
	now = datetime.now(korea_timezone)
	formatted_date = now.strftime('%Y%m%d_%H%M%S')

	# Create the filename with the current date and time
	filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
	df.to_excel(filename, index=False, startrow=3) # Save the DataFrame starting from A4

	# Load the workbook and edit the cells
	wb = load_workbook(filename)
	ws = wb.active

	# Insert the image
	logo = Image("ssboost-logo.png")
	logo.height = 55 # set the height to 55px
	logo.width = 206 # set the width to 206px
	ws.add_image(logo, "A1")

	# Add the hyperlink text
	ws['D1'] = "▼ 홈페이지 바로가기 ▼"
	ws['D2'] = "https://www.ssboost.co.kr"
	ws['D2'].hyperlink = "https://www.ssboost.co.kr"
	ws['D2'].style = "Hyperlink"

	wb.save(filename)
	return filename

	@app.post("/extract_keywords_from_file/")
	async def extract_keywords_from_file(file: UploadFile = File(...)):
	try:
	contents = await file.read()
	df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
	if df.empty:
	raise HTTPException(status_code=400, detail="No data found in the specified range.")
	unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
	output_filename = extract_keywords(unique_product_names)
	return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

	@app.post("/extract_keywords_from_text/")
	async def extract_keywords_from_text(text: str):
	if not text.strip():
	raise HTTPException(status_code=400, detail="No text provided.")
	product_names = text.split('\n')
	output_filename = extract_keywords(product_names)
	return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)