Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| import jieba | |
| jieba.cut('你好') | |
| from wordcloud import WordCloud | |
| from PIL import Image | |
| import matplotlib.pyplot as plt | |
| from loguru import logger | |
| from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory | |
| load_dotenv() | |
| # logger = logging.getLogger(__name__) | |
| # logger.setLevel(logging.DEBUG) | |
| classes = list([ x for x in category2supercategory.keys() if len(x)>0]) | |
| def plot_wordcloud( text): | |
| """ | |
| """ | |
| if os.getenv("FONT_PATH", None) is not None: | |
| wc_generator = WordCloud(font_path=os.getenv("FONT_PATH")) | |
| else: | |
| wc_generator = WordCloud() | |
| img = wc_generator.generate( " ".join(jieba.cut(text))) | |
| # fig, ax = plt.subplots() | |
| # ax.imshow(wordcloud, interpolation='bilinear') | |
| # ax.axis("off") | |
| return img.to_image() | |
| def format_category( formatted_results): | |
| """ | |
| """ | |
| return "\n\n".join([ | |
| f"> 大類別:{formatted_results['supercategory'].values[0]}", | |
| f"> 小類別:{formatted_results['category'].values[0]}", | |
| f"> 推測提供酒品:{ '是' if formatted_results['provide_alcohol'].values[0] else '否' }", | |
| f"> 商家名稱:{formatted_results['store_name'].values[0]}", | |
| f"> 電話:{formatted_results['phone_number'].values[0]}", | |
| f"> 描述:{formatted_results['description'].values[0]}" | |
| ]) | |
| def do( business_name: str, address: str): | |
| """ | |
| """ | |
| crawled_results = [] | |
| provider = os.environ.get("DEFAULT_PROVIDER", "openai") | |
| model = os.environ.get("DEFAULT_MODEL", "'gpt-4o'") | |
| google_domain = "google.com.tw" | |
| gl = 'tw' | |
| lr = 'lang_zh-TW' | |
| business_id = 12345678 | |
| query = compose_query(address, business_name) | |
| try: | |
| res = get_serp( query, google_domain, gl, lr) | |
| except Exception as e: | |
| return f"Error: {e}" | |
| cond_res = get_condensed_result(res) | |
| crawled_results.append( { | |
| "index": 0, | |
| "business_id": business_id, | |
| "business_name": business_name, | |
| "serp": res, | |
| "evidence": cond_res, | |
| "address": address | |
| } ) | |
| crawled_results = pd.DataFrame(crawled_results) | |
| # logger.debug(crawled_results) | |
| extracted_results = extract_results( crawled_results, classes=classes, provider = provider, model = model) | |
| # logger.error(extracted_results['extracted_results'].columns) | |
| extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name', 'provide_alcohol'] ] | |
| logger.debug( extracted_results['category']) | |
| postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory) | |
| os.remove("/tmp/postprocessed_results.joblib") | |
| formatted_results = format_output( postprocessed_results) | |
| logger.debug( formatted_results) | |
| formatted_output = format_category( formatted_results) | |
| img = plot_wordcloud(formatted_results['formatted_evidence'].values[0]) | |
| return f"【搜尋結果】\n{formatted_results['formatted_evidence'].values[0][6:]}", img, f"【判斷結果】\n{formatted_output}" | |
| def load( blob, progress=gr.Progress()): | |
| """ | |
| """ | |
| if isinstance(blob, str): | |
| # df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"]) | |
| df = pd.read_csv(blob, names=COLUMNS, header=None) # parse_dates=[ "Start", "Finish"] | |
| else: | |
| df = pd.read_csv(blob.name, names=COLUMNS, header=None) # parse_dates=[ "Start", "Finish"] | |
| print( df.head() ) | |
| return df | |
| ## --- interface --- ## | |
| # outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)] | |
| # demo = gr.Interface( | |
| # fn=do, | |
| # inputs=[ "text", "text", "text"], | |
| # outputs=outputs, | |
| # ) | |
| COLUMNS = ['營業地址', '統一編號', '總機構統一編號', '營業人名稱', '資本額', '設立日期', '組織別名稱', '使用統一發票', | |
| '行業代號', '名稱', '行業代號1', '名稱1', '行業代號2', '名稱2', '行業代號3', '名稱3'] | |
| CSS = """ | |
| h1 { | |
| text-align: center; | |
| display:block; | |
| } | |
| """ | |
| ## --- block --- ## | |
| with gr.Blocks(css=CSS) as demo: | |
| gr.Markdown("# 🌟 自動分類餐廳型態 🌟") | |
| with gr.Tab('單筆'): | |
| with gr.Row(): | |
| inputs = [ gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")] | |
| with gr.Row(): | |
| btn = gr.Button("Submit") | |
| with gr.Row(): | |
| outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )] | |
| btn.click(fn=do, inputs=inputs, outputs=outputs) | |
| with gr.Tab('批次'): | |
| with gr.Row(): | |
| batch_inputs = [ gr.UploadButton("上傳檔案", file_count="single")] | |
| with gr.Row(): | |
| batch_btn = gr.Button("批量處理") | |
| with gr.Row(): | |
| batch_outputs = [ gr.Dataframe( | |
| headers=COLUMNS, | |
| datatype=["str"] * 16 | |
| )] | |
| batch_btn.click(fn=load, inputs=batch_inputs, outputs=batch_outputs) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| # share=True, | |
| server_name = '0.0.0.0', auth=( os.environ.get('USERNAME'), os.environ.get('PASSWORD')) | |
| ) | |