Spaces:

ssboost
/

NOW_Product_Keyword

Sleeping

App Files Files Community

ssboost commited on Aug 7, 2024

Commit

3bd055b

verified ·

1 Parent(s): f0d70fd

Create main.py

Browse files

Files changed (1) hide show

main.py +80 -0

main.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import pandas as pd
+import re
+from datetime import datetime
+import pytz
+from openpyxl import load_workbook
+from openpyxl.drawing.image import Image
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import FileResponse
+import io
+app = FastAPI()
+def extract_keywords(product_names):
+    unique_products = list(set(product_names))  # 중복 제거된 상품명 리스트
+    all_unique_words = []
+    for product in unique_products:
+        # 특수 문자를 공백으로 대체
+        cleaned_product = re.sub(r'[,\[\]/()]+', ' ', product).lower()
+        words = cleaned_product.split()
+        unique_words = set(words)
+        all_unique_words.extend(unique_words)
+    final_unique_words = set(all_unique_words)
+    word_count = {word: all_unique_words.count(word) for word in final_unique_words}
+    df = pd.DataFrame(list(word_count.items()), columns=['키워드', '빈도수'])
+    df = df.sort_values(by='빈도수', ascending=False)
+    # Get the current date and time in Korean timezone
+    korea_timezone = pytz.timezone('Asia/Seoul')
+    now = datetime.now(korea_timezone)
+    formatted_date = now.strftime('%Y%m%d_%H%M%S')
+    # Create the filename with the current date and time
+    filename = f'소싱부스트_키워드분석기_{formatted_date}.xlsx'
+    df.to_excel(filename, index=False, startrow=3)  # Save the DataFrame starting from A4
+    # Load the workbook and edit the cells
+    wb = load_workbook(filename)
+    ws = wb.active
+    # Insert the image
+    logo = Image("ssboost-logo.png")
+    logo.height = 55  # set the height to 55px
+    logo.width = 206  # set the width to 206px
+    ws.add_image(logo, "A1")
+    # Add the hyperlink text
+    ws['D1'] = "▼ 홈페이지 바로가기 ▼"
+    ws['D2'] = "https://www.ssboost.co.kr"
+    ws['D2'].hyperlink = "https://www.ssboost.co.kr"
+    ws['D2'].style = "Hyperlink"
+    wb.save(filename)
+    return filename
+@app.post("/extract_keywords_from_file/")
+async def extract_keywords_from_file(file: UploadFile = File(...)):
+    try:
+        contents = await file.read()
+        df = pd.read_excel(io.BytesIO(contents), usecols="D", skiprows=2, nrows=1997, engine='openpyxl')
+        if df.empty:
+            raise HTTPException(status_code=400, detail="No data found in the specified range.")
+        unique_product_names = df.iloc[:, 0].dropna().astype(str).unique().tolist()
+        output_filename = extract_keywords(unique_product_names)
+        return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+@app.post("/extract_keywords_from_text/")
+async def extract_keywords_from_text(text: str):
+    if not text.strip():
+        raise HTTPException(status_code=400, detail="No text provided.")
+    product_names = text.split('\n')
+    output_filename = extract_keywords(product_names)
+    return FileResponse(output_filename, media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', filename=output_filename)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)