import os import json import random import csv import io from datetime import datetime, timedelta from flask import Flask, request, jsonify, render_template # Try to import pandas, fallback to mock if unavailable (e.g. Python 3.14 env) try: import pandas as pd HAS_PANDAS = True except ImportError: HAS_PANDAS = False print("Warning: Pandas not found. Running in fallback mode.") app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max upload size # Configuration UPLOAD_FOLDER = '/tmp' ALLOWED_EXTENSIONS = {'csv'} def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def generate_demo_data_raw(): """Generates demo data as list of dicts (Fallback)""" data = [] activities = [ "Create Purchase Requisition", "Approve Requisition", "Create Purchase Order", "Receive Goods", "Receive Invoice", "Match Invoice", "Pay Invoice", "Close Case" ] # Generate 50 cases for i in range(1, 51): case_id = f"CASE-{i:03d}" current_time = datetime.now() - timedelta(days=random.randint(1, 30)) path = activities[:] if random.random() < 0.2: path.remove("Approve Requisition") if random.random() < 0.1: idx = path.index("Receive Invoice") path.insert(idx + 1, "Reject Invoice") path.insert(idx + 2, "Receive Invoice") for activity in path: duration_minutes = random.randint(60, 2880) current_time += timedelta(minutes=duration_minutes) data.append({ "case_id": case_id, "activity": activity, "timestamp": current_time.strftime("%Y-%m-%d %H:%M:%S") }) return data @app.route('/') def index(): return render_template('index.html') @app.route('/api/demo', methods=['GET']) def get_demo_data(): if HAS_PANDAS: df = pd.DataFrame(generate_demo_data_raw()) return df.to_csv(index=False) else: # Manual CSV generation data = generate_demo_data_raw() output = io.StringIO() if data: writer = csv.DictWriter(output, fieldnames=data[0].keys()) writer.writeheader() writer.writerows(data) return output.getvalue() @app.route('/api/analyze', methods=['POST']) def analyze(): try: if 'file' not in request.files: return jsonify({"error": "没有上传文件"}), 400 file = request.files['file'] if file.filename == '': return jsonify({"error": "未选择文件"}), 400 if file and allowed_file(file.filename): # Process Logic nodes = {} links = {} total_cases = 0 total_events = 0 # Use Pandas if available, otherwise fallback if HAS_PANDAS: try: df = pd.read_csv(file) except Exception as e: return jsonify({"error": f"CSV读取失败: {str(e)}"}), 400 cols = [c.lower() for c in df.columns] df.columns = cols case_col = next((c for c in cols if 'case' in c or 'id' in c), None) act_col = next((c for c in cols if 'activity' in c or 'event' in c or 'name' in c), None) time_col = next((c for c in cols if 'time' in c or 'date' in c), None) if not (case_col and act_col and time_col): return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400 try: df[time_col] = pd.to_datetime(df[time_col]) except: return jsonify({"error": "时间戳格式无效"}), 400 df = df.sort_values(by=[case_col, time_col]) cases = df.groupby(case_col) total_cases = len(cases) total_events = len(df) for case_id, group in cases: events = group.to_dict('records') process_case_events(events, nodes, links, act_col, time_col) else: # --- FALLBACK IMPLEMENTATION (Standard Lib) --- stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None) reader = csv.DictReader(stream) rows = list(reader) if not rows: return jsonify({"error": "空文件"}), 400 # Detect columns headers = [h.lower() for h in reader.fieldnames] case_key = next((h for h in reader.fieldnames if 'case' in h.lower() or 'id' in h.lower()), None) act_key = next((h for h in reader.fieldnames if 'activity' in h.lower() or 'event' in h.lower() or 'name' in h.lower()), None) time_key = next((h for h in reader.fieldnames if 'time' in h.lower() or 'date' in h.lower()), None) if not (case_key and act_key and time_key): return jsonify({"error": "缺少必要列: 需包含 CaseID, Activity, Timestamp"}), 400 # Group by Case case_map = {} for row in rows: c_id = row[case_key] if c_id not in case_map: case_map[c_id] = [] case_map[c_id].append(row) total_cases = len(case_map) total_events = len(rows) # Sort and Process for c_id, events in case_map.items(): # Parse dates for e in events: try: # Try ISO format first, then others e['_dt'] = datetime.fromisoformat(e[time_key].replace('Z', '+00:00')) except: try: e['_dt'] = datetime.strptime(e[time_key], "%Y-%m-%d %H:%M:%S") except: # Fallback for demo data format if generated locally e['_dt'] = datetime.now() events.sort(key=lambda x: x['_dt']) process_case_events(events, nodes, links, act_key, '_dt') # --- COMMON FORMATTING --- echarts_nodes = [] max_count = 0 for name, data in nodes.items(): max_count = max(max_count, data["count"]) for name, data in nodes.items(): symbol_size = 20 + (data["count"] / max_count) * 40 if max_count > 0 else 30 echarts_nodes.append({ "name": name, "value": data["count"], "symbolSize": symbol_size, "itemStyle": { "color": "#5470c6" if data["in_degree"] > 0 and data["out_degree"] > 0 else ("#91cc75" if data["in_degree"] == 0 else "#ee6666") }, "category": "Start" if data["in_degree"] == 0 else ("End" if data["out_degree"] == 0 else "Activity") }) echarts_links = [] for (source, target), data in links.items(): avg_duration = data["total_duration"] / data["count"] echarts_links.append({ "source": source, "target": target, "value": data["count"], "label": { "show": True, "formatter": f"{data['count']} ({avg_duration:.1f}h)" }, "lineStyle": { "width": 1 + (data["count"] / total_cases) * 5, "curveness": 0.2 } }) return jsonify({ "nodes": echarts_nodes, "links": echarts_links, "stats": { "total_cases": total_cases, "total_events": total_events, "avg_events_per_case": round(total_events / total_cases, 1) if total_cases else 0 } }) except Exception as e: import traceback traceback.print_exc() return jsonify({"error": str(e)}), 500 def process_case_events(events, nodes, links, act_key, time_key): """Helper to process a sorted list of events for a single case""" for i in range(len(events)): curr = events[i] act = curr[act_key] # Update Node if act not in nodes: nodes[act] = {"count": 0, "in_degree": 0, "out_degree": 0} nodes[act]["count"] += 1 # Update Link if i < len(events) - 1: next_event = events[i+1] next_act = next_event[act_key] # Duration in hours t1 = curr[time_key] t2 = next_event[time_key] # Handle pandas timestamp vs python datetime if hasattr(t1, 'to_pydatetime'): t1 = t1.to_pydatetime() if hasattr(t2, 'to_pydatetime'): t2 = t2.to_pydatetime() duration = (t2 - t1).total_seconds() / 3600.0 link_key = (act, next_act) if link_key not in links: links[link_key] = {"count": 0, "total_duration": 0.0} links[link_key]["count"] += 1 links[link_key]["total_duration"] += duration nodes[act]["out_degree"] += 1 if next_act not in nodes: nodes[next_act] = {"count": 0, "in_degree": 0, "out_degree": 0} nodes[next_act]["in_degree"] += 1 if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=7860)