Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import random | |
| import csv | |
| import io | |
| from datetime import datetime, timedelta | |
| from flask import Flask, request, jsonify, render_template | |
| # Try to import pandas, fallback to mock if unavailable (e.g. Python 3.14 env) | |
| try: | |
| import pandas as pd | |
| HAS_PANDAS = True | |
| except ImportError: | |
| HAS_PANDAS = False | |
| print("Warning: Pandas not found. Running in fallback mode.") | |
| app = Flask(__name__) | |
| app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max upload size | |
| # Configuration | |
| UPLOAD_FOLDER = '/tmp' | |
| ALLOWED_EXTENSIONS = {'csv'} | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def generate_demo_data_raw(): | |
| """Generates demo data as list of dicts (Fallback)""" | |
| data = [] | |
| activities = [ | |
| "Create Purchase Requisition", | |
| "Approve Requisition", | |
| "Create Purchase Order", | |
| "Receive Goods", | |
| "Receive Invoice", | |
| "Match Invoice", | |
| "Pay Invoice", | |
| "Close Case" | |
| ] | |
| # Generate 50 cases | |
| for i in range(1, 51): | |
| case_id = f"CASE-{i:03d}" | |
| current_time = datetime.now() - timedelta(days=random.randint(1, 30)) | |
| path = activities[:] | |
| if random.random() < 0.2: | |
| path.remove("Approve Requisition") | |
| if random.random() < 0.1: | |
| idx = path.index("Receive Invoice") | |
| path.insert(idx + 1, "Reject Invoice") | |
| path.insert(idx + 2, "Receive Invoice") | |
| for activity in path: | |
| duration_minutes = random.randint(60, 2880) | |
| current_time += timedelta(minutes=duration_minutes) | |
| data.append({ | |
| "case_id": case_id, | |
| "activity": activity, | |
| "timestamp": current_time.strftime("%Y-%m-%d %H:%M:%S") | |
| }) | |
| return data | |
| def index(): | |
| return render_template('index.html') | |
| def get_demo_data(): | |
| if HAS_PANDAS: | |
| df = pd.DataFrame(generate_demo_data_raw()) | |
| return df.to_csv(index=False) | |
| else: | |
| # Manual CSV generation | |
| data = generate_demo_data_raw() | |
| output = io.StringIO() | |
| if data: | |
| writer = csv.DictWriter(output, fieldnames=data[0].keys()) | |
| writer.writeheader() | |
| writer.writerows(data) | |
| return output.getvalue() | |
| def analyze(): | |
| try: | |
| if 'file' not in request.files: | |
| return jsonify({"error": "没有上传文件"}), 400 | |
| file = request.files['file'] | |
| if file.filename == '': | |
| return jsonify({"error": "未选择文件"}), 400 | |
| if file and allowed_file(file.filename): | |
| # Process Logic | |
| nodes = {} | |
| links = {} | |
| total_cases = 0 | |
| total_events = 0 | |
| # Use Pandas if available, otherwise fallback | |
| if HAS_PANDAS: | |
| try: | |
| df = pd.read_csv(file) | |
| except Exception as e: | |
| return jsonify({"error": f"CSV读取失败: {str(e)}"}), 400 | |
| cols = [c.lower() for c in df.columns] | |
| df.columns = cols | |
| case_col = next((c for c in cols if 'case' in c or 'id' in c), None) | |
| act_col = next((c for c in cols if 'activity' in c or 'event' in c or 'name' in c), None) | |
| time_col = next((c for c in cols if 'time' in c or 'date' in c), None) | |
| if not (case_col and act_col and time_col): | |
| return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400 | |
| try: | |
| df[time_col] = pd.to_datetime(df[time_col]) | |
| except: | |
| return jsonify({"error": "时间戳格式无效"}), 400 | |
| df = df.sort_values(by=[case_col, time_col]) | |
| cases = df.groupby(case_col) | |
| total_cases = len(cases) | |
| total_events = len(df) | |
| for case_id, group in cases: | |
| events = group.to_dict('records') | |
| process_case_events(events, nodes, links, act_col, time_col) | |
| else: | |
| # --- FALLBACK IMPLEMENTATION (Standard Lib) --- | |
| stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None) | |
| reader = csv.DictReader(stream) | |
| rows = list(reader) | |
| if not rows: | |
| return jsonify({"error": "空文件"}), 400 | |
| # Detect columns | |
| headers = [h.lower() for h in reader.fieldnames] | |
| case_key = next((h for h in reader.fieldnames if 'case' in h.lower() or 'id' in h.lower()), None) | |
| act_key = next((h for h in reader.fieldnames if 'activity' in h.lower() or 'event' in h.lower() or 'name' in h.lower()), None) | |
| time_key = next((h for h in reader.fieldnames if 'time' in h.lower() or 'date' in h.lower()), None) | |
| if not (case_key and act_key and time_key): | |
| return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400 | |
| # Group by Case | |
| case_map = {} | |
| for row in rows: | |
| c_id = row[case_key] | |
| if c_id not in case_map: | |
| case_map[c_id] = [] | |
| case_map[c_id].append(row) | |
| total_cases = len(case_map) | |
| total_events = len(rows) | |
| # Sort and Process | |
| for c_id, events in case_map.items(): | |
| # Parse dates | |
| for e in events: | |
| try: | |
| # Try ISO format first, then others | |
| e['_dt'] = datetime.fromisoformat(e[time_key].replace('Z', '+00:00')) | |
| except: | |
| try: | |
| e['_dt'] = datetime.strptime(e[time_key], "%Y-%m-%d %H:%M:%S") | |
| except: | |
| # Fallback for demo data format if generated locally | |
| e['_dt'] = datetime.now() | |
| events.sort(key=lambda x: x['_dt']) | |
| process_case_events(events, nodes, links, act_key, '_dt') | |
| # --- COMMON FORMATTING --- | |
| echarts_nodes = [] | |
| max_count = 0 | |
| for name, data in nodes.items(): | |
| max_count = max(max_count, data["count"]) | |
| for name, data in nodes.items(): | |
| symbol_size = 20 + (data["count"] / max_count) * 40 if max_count > 0 else 30 | |
| echarts_nodes.append({ | |
| "name": name, | |
| "value": data["count"], | |
| "symbolSize": symbol_size, | |
| "itemStyle": { | |
| "color": "#5470c6" if data["in_degree"] > 0 and data["out_degree"] > 0 else ("#91cc75" if data["in_degree"] == 0 else "#ee6666") | |
| }, | |
| "category": "Start" if data["in_degree"] == 0 else ("End" if data["out_degree"] == 0 else "Activity") | |
| }) | |
| echarts_links = [] | |
| for (source, target), data in links.items(): | |
| avg_duration = data["total_duration"] / data["count"] | |
| echarts_links.append({ | |
| "source": source, | |
| "target": target, | |
| "value": data["count"], | |
| "label": { | |
| "show": True, | |
| "formatter": f"{data['count']} ({avg_duration:.1f}h)" | |
| }, | |
| "lineStyle": { | |
| "width": 1 + (data["count"] / total_cases) * 5, | |
| "curveness": 0.2 | |
| } | |
| }) | |
| return jsonify({ | |
| "nodes": echarts_nodes, | |
| "links": echarts_links, | |
| "stats": { | |
| "total_cases": total_cases, | |
| "total_events": total_events, | |
| "avg_events_per_case": round(total_events / total_cases, 1) if total_cases else 0 | |
| } | |
| }) | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({"error": str(e)}), 500 | |
| def process_case_events(events, nodes, links, act_key, time_key): | |
| """Helper to process a sorted list of events for a single case""" | |
| for i in range(len(events)): | |
| curr = events[i] | |
| act = curr[act_key] | |
| # Update Node | |
| if act not in nodes: | |
| nodes[act] = {"count": 0, "in_degree": 0, "out_degree": 0} | |
| nodes[act]["count"] += 1 | |
| # Update Link | |
| if i < len(events) - 1: | |
| next_event = events[i+1] | |
| next_act = next_event[act_key] | |
| # Duration in hours | |
| t1 = curr[time_key] | |
| t2 = next_event[time_key] | |
| # Handle pandas timestamp vs python datetime | |
| if hasattr(t1, 'to_pydatetime'): t1 = t1.to_pydatetime() | |
| if hasattr(t2, 'to_pydatetime'): t2 = t2.to_pydatetime() | |
| duration = (t2 - t1).total_seconds() / 3600.0 | |
| link_key = (act, next_act) | |
| if link_key not in links: | |
| links[link_key] = {"count": 0, "total_duration": 0.0} | |
| links[link_key]["count"] += 1 | |
| links[link_key]["total_duration"] += duration | |
| nodes[act]["out_degree"] += 1 | |
| if next_act not in nodes: | |
| nodes[next_act] = {"count": 0, "in_degree": 0, "out_degree": 0} | |
| nodes[next_act]["in_degree"] += 1 | |
| if __name__ == '__main__': | |
| app.run(debug=True, host='0.0.0.0', port=7860) | |