Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.cluster import KMeans | |
| from transformers import pipeline | |
| from prophet import Prophet | |
| import matplotlib.pyplot as plt | |
| import gradio as gr | |
| # model | |
| embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
| sentiment_model = pipeline( | |
| "text-classification", | |
| model="uer/roberta-base-finetuned-dianping-chinese", | |
| tokenizer="uer/roberta-base-finetuned-dianping-chinese" | |
| ) | |
| #main | |
| def full_pipeline(file, num_clusters): | |
| df = pd.read_csv(file) | |
| if "text" not in df.columns: | |
| return "❌ 錯誤:CSV 檔案需包含 text 欄位" | |
| if "timestamp" not in df.columns: | |
| return "❌ 錯誤:CSV 檔案需包含 timestamp 欄位(例如新聞時間)" | |
| #降維 | |
| texts = df["text"].astype(str).tolist() | |
| embeddings = embedder.encode(texts, show_progress_bar=True) | |
| kmeans = KMeans(n_clusters=num_clusters, random_state=42) | |
| df["topic"] = kmeans.fit_predict(embeddings) | |
| # 情緒分析 | |
| sentiments = [] | |
| for text in texts: | |
| try: | |
| result = sentiment_model(text)[0] | |
| label = result["label"] | |
| if label == "LABEL_0": | |
| sentiment = "負向" | |
| elif label == "LABEL_1": | |
| sentiment = "中立" | |
| elif label == "LABEL_2": | |
| sentiment = "正向" | |
| else: | |
| sentiment = "未知" | |
| except: | |
| sentiment = "錯誤" | |
| sentiments.append(sentiment) | |
| df["sentiment"] = sentiments | |
| # 熱度預測 | |
| df["timestamp"] = pd.to_datetime(df["timestamp"]) | |
| topic0 = df[df["topic"] == 0] | |
| daily_counts = topic0.groupby(df["timestamp"].dt.date).size().reset_index(name="count") | |
| daily_counts.columns = ["ds", "y"] | |
| if len(daily_counts) < 2: | |
| return "❌ 無法預測:topic=0 數據太少" | |
| m = Prophet() | |
| m.fit(daily_counts) | |
| future = m.make_future_dataframe(periods=7) | |
| forecast = m.predict(future) | |
| fig = m.plot(forecast) | |
| #output | |
| output_csv = "/tmp/final_output.csv" | |
| output_img = "/tmp/forecast.png" | |
| df.to_csv(output_csv, index=False) | |
| fig.savefig(output_img) | |
| return output_csv, output_img | |
| #gradio | |
| gr.Interface( | |
| fn=full_pipeline, | |
| inputs=[ | |
| gr.File(label="上傳 CSV(需含 text 與 timestamp 欄)"), | |
| gr.Number(label="分幾群?(聚類數)", value=5) | |
| ], | |
| outputs=[ | |
| gr.File(label="結果 CSV(含 topic, sentiment)"), | |
| gr.Image(label="topic=0 熱度預測圖(Prophet)") | |
| ], | |
| title="話題雷達", | |
| description="自動分群、分析情緒,並預測熱度走勢(topic=0 為例)" | |
| ).launch() |