Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import re | |
| from datetime import datetime | |
| import pytz | |
| from openpyxl import load_workbook | |
| from openpyxl.drawing.image import Image | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.responses import FileResponse | |
| import io | |
| app = FastAPI() | |
| def extract_keywords(product_names): | |
| unique_products = list(set(product_names)) # 중복 제거된 상품명 리스트 | |
| all_unique_words = [] | |
| for product in unique_products: | |
| # 특수 문자를 공백으로 대체 | |
| cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower() | |
| words = cleaned_product.split() | |
| unique_words = set(words) | |
| all_unique_words.extend(unique_words) | |
| final_unique_words = set(all_unique_words) | |
| word_count = {word: all_unique_words.count(word) for word in final_unique_words} | |
| df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수']) | |
| df = df.sort_values(by='빈도수', ascending=False) | |
| # Get the current date and time in Korean timezone | |
| korea_timezone = pytz.timezone('Asia/Seoul') | |
| now = datetime.now(korea_timezone) | |
| formatted_date = now.strftime('%Y%m%d_%H%M%S') | |
| # Create the filename with the current date and time | |
| filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx' | |
| df.to_excel(filename, index=False, startrow=3) # Save the DataFrame starting from A4 | |
| # Load the workbook and edit the cells | |
| wb = load_workbook(filename) | |
| ws = wb.active | |
| # Insert the image | |
| logo = Image("ssboost-logo.png") | |
| logo.height = 55 # set the height to 55px | |
| logo.width = 206 # set the width to 206px | |
| ws.add_image(logo, "A1") | |
| # Add the hyperlink text | |
| ws['D1'] = "▼ 홈페이지 바로가기 ▼" | |
| ws['D2'] = "https://www.ssboost.co.kr" | |
| ws['D2'].hyperlink = "https://www.ssboost.co.kr" | |
| ws['D2'].style = "Hyperlink" | |
| wb.save(filename) | |
| return filename | |
| async def extract_keywords_from_file(file: UploadFile = File(...)): | |
| try: | |
| contents = await file.read() | |
| df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl') | |
| if df.empty: | |
| raise HTTPException(status_code=400, detail="No data found in the specified range.") | |
| unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist() | |
| output_filename = extract_keywords(unique_product_names) | |
| return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") | |
| async def extract_keywords_from_text(text: str): | |
| if not text.strip(): | |
| raise HTTPException(status_code=400, detail="No text provided.") | |
| product_names = text.split('\n') | |
| output_filename = extract_keywords(product_names) | |
| return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |