Spaces:

Kevinshh
/

Preformu

Sleeping

App Files Files Community

Kevinshh commited on Jan 1

Commit

366270f

verified ·

1 Parent(s): b67f7b6

Upload streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +769 -0

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,769 @@

+"""
+Streamlit Application - Drug Stability Intelligence Platform
+This is the main entry point for the three-layer architecture:
+- Layer 1: IntentParser (LLM semantic understanding)
+- Layer 2: RegulatoryDecisionEngine (Rule-based calculations)
+- Layer 3: ExplanationGenerator + Plotly charts + Report generation
+Features:
+- Interactive stability analysis
+- Plotly charts with confidence intervals
+- Dual output: Streamlit interactive + HTML/PDF archive
+"""
+import streamlit as st
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
+import json
+import tempfile
+from datetime import datetime
+from typing import Dict, Any, Optional, List
+from pathlib import Path
+# Local imports
+from schemas.analysis_intent import (
+    AnalysisIntent,
+    AnalysisType,
+    AnalysisPurpose,
+    ExtractedDataSummary,
+)
+from schemas.decision_result import (
+    RegulatoryDecisionResult,
+    RefusalSeverity,
+)
+from layers.intent_parser import IntentParser
+from layers.regulatory_decision_engine import RegulatoryDecisionEngine
+from layers.explanation_generator import ExplanationGenerator
+from layers.model_invoker import ModelInvoker
+from utils.file_parsers import parse_file
+from utils.stability_data_extractor import StabilityDataExtractor
+from utils.stability_report_formatter import StabilityReportFormatter
+# =============================================================================
+# App Configuration
+# =============================================================================
+st.set_page_config(
+    page_title="Drug Stability Intelligence Platform",
+    page_icon="🧪",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: 700;
+        background: linear-gradient(90deg, #1e3a5f, #2e7d32);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        text-align: center;
+        margin-bottom: 1rem;
+    }
+    .sub-header {
+        font-size: 1.1rem;
+        color: #666;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .metric-card {
+        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        border-radius: 10px;
+        padding: 1rem;
+        margin: 0.5rem 0;
+    }
+    .warning-box {
+        background-color: #fff3cd;
+        border: 1px solid #ffc107;
+        border-radius: 5px;
+        padding: 1rem;
+        margin: 1rem 0;
+    }
+    .success-box {
+        background-color: #d4edda;
+        border: 1px solid #28a745;
+        border-radius: 5px;
+        padding: 1rem;
+        margin: 1rem 0;
+    }
+    .refusal-box {
+        background-color: #f8d7da;
+        border: 1px solid #dc3545;
+        border-radius: 5px;
+        padding: 1rem;
+        margin: 1rem 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+# =============================================================================
+# Chart Generation (Plotly)
+# =============================================================================
+def create_prediction_chart(
+    kinetic_fits: Dict,
+    predictions: Dict,
+    specification_limit: float
+) -> go.Figure:
+    """Create prediction chart with confidence intervals."""
+    fig = go.Figure()
+    # Colors
+    colors = px.colors.qualitative.Set2
+    # Plot each condition
+    for i, (cond_id, fit) in enumerate(kinetic_fits.items()):
+        color = colors[i % len(colors)]
+        # Generate fitted line
+        if hasattr(fit, 'k') and hasattr(fit, 'y0'):
+            t_line = list(range(0, 37, 3))
+            y_line = [fit.y0 + fit.k * t for t in t_line]
+            fig.add_trace(go.Scatter(
+                x=t_line,
+                y=y_line,
+                mode='lines',
+                name=f'{cond_id} (拟合线)',
+                line=dict(color=color, width=2)
+            ))
+        pred_y = []
+        ci_lower = []
+        ci_upper = []
+        for tp_key, pred in predictions.items():
+            if hasattr(pred, 'timepoint_months'):
+                pred_x.append(pred.timepoint_months)
+                pred_y.append(pred.point_estimate)
+                ci_lower.append(pred.CI_lower)
+                ci_upper.append(pred.CI_upper)
+        if pred_x:
+            # CI band
+            fig.add_trace(go.Scatter(
+                x=pred_x + pred_x[::-1],
+                y=ci_upper + ci_lower[::-1],
+                fill='toself',
+                fillcolor='rgba(40, 167, 69, 0.2)',
+                line=dict(color='rgba(255,255,255,0)'),
+                name='95% 置信区间',
+                showlegend=True
+            ))
+            # Prediction points
+            fig.add_trace(go.Scatter(
+                x=pred_x,
+                y=pred_y,
+                mode='markers',
+                name='预测值',
+                marker=dict(color='#28a745', size=12, symbol='diamond')
+            ))
+    # Specification limit
+    fig.add_hline(
+        y=specification_limit,
+        line_dash="dash",
+        line_color="#dc3545",
+        annotation_text=f"规格限度 ({specification_limit}%)"
+    )
+    fig.update_layout(
+        title="稳定性预测曲线 (含95%置信区间)",
+        xaxis_title="时间 (月)",
+        yaxis_title="杂质含量 (%)",
+        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
+        template="plotly_white",
+        height=500
+    )
+    return fig
+def create_batch_comparison_chart(
+    batch_ranking: List,
+    kinetic_fits: Dict
+) -> go.Figure:
+    """Create batch comparison bar chart."""
+    if not batch_ranking:
+        return go.Figure()
+    # Handle both dataclass objects and dicts
+    batch_names = []
+    scores = []
+    for r in batch_ranking:
+        if hasattr(r, 'batch_name'):
+            # It's a dataclass
+            batch_names.append(r.batch_name or r.batch_id or 'Unknown')
+            scores.append(r.score if r.score is not None else 0)
+        else:
+            # It's a dict
+            batch_names.append(r.get('batch_name', r.get('batch_id', 'Unknown')))
+            scores.append(r.get('score', 0))
+    # Color based on score
+    colors = ['#28a745' if s >= 80 else '#ffc107' if s >= 60 else '#dc3545' for s in scores]
+    fig = go.Figure(data=[
+        go.Bar(
+            x=batch_names,
+            y=scores,
+            marker_color=colors,
+            text=scores,
+            textposition='auto'
+        )
+    ])
+    fig.update_layout(
+        title="批次稳定性评分对比",
+        xaxis_title="批次",
+        yaxis_title="评分",
+        yaxis_range=[0, 105],
+        template="plotly_white",
+        height=400
+    )
+    return fig
+def create_kinetics_scatter(kinetic_fits: Dict) -> go.Figure:
+    """Create kinetics comparison scatter plot."""
+    if not kinetic_fits:
+        return go.Figure()
+    conditions = list(kinetic_fits.keys())
+    k_values = [fit.k if hasattr(fit, 'k') else 0 for fit in kinetic_fits.values()]
+    r2_values = [fit.R2 if hasattr(fit, 'R2') else 0 for fit in kinetic_fits.values()]
+    fig = go.Figure(data=[
+        go.Scatter(
+            x=k_values,
+            y=r2_values,
+            mode='markers+text',
+            text=conditions,
+            textposition='top center',
+            marker=dict(
+                size=20,
+                color=r2_values,
+                colorscale='RdYlGn',
+                showscale=True,
+                colorbar=dict(title="R²")
+            )
+        )
+    ])
+    fig.add_hline(y=0.9, line_dash="dash", line_color="green",
+                  annotation_text="R² = 0.9 (高质量)")
+    fig.add_hline(y=0.8, line_dash="dash", line_color="orange",
+                  annotation_text="R² = 0.8 (最低要求)")
+    fig.update_layout(
+        title="动力学拟合质量分布",
+        xaxis_title="降解速率 k (%/月)",
+        yaxis_title="决定系数 R²",
+        template="plotly_white",
+        height=400
+    )
+    return fig
+# =============================================================================
+# Core Analysis Pipeline
+# =============================================================================
+@st.cache_resource
+def get_engine():
+    """Get cached engine instances."""
+    return {
+        "intent_parser": IntentParser(),
+        "decision_engine": RegulatoryDecisionEngine(),
+        "explanation_generator": ExplanationGenerator()
+    }
+def run_analysis(
+    goal: str,
+    uploaded_files: List,
+    purpose: str = "rd_reference",
+    specification_limit: Optional[float] = None,
+    target_timepoints: Optional[List[int]] = None
+) -> tuple:
+    """
+    Run the full three-layer analysis pipeline.
+    Parameters are now optional - system will infer from data/goal if not provided.
+    Returns:
+        Tuple of (intent, result, explanations)
+    """
+    engines = get_engine()
+    # ==== PHASE 1: Parse files and extract structured data ====
+    all_text = ""
+    temp_paths = []
+    for uploaded_file in uploaded_files:
+        try:
+            # Save to temp file for parsing
+            suffix = Path(uploaded_file.name).suffix
+            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                tmp.write(uploaded_file.getvalue())
+                tmp_path = tmp.name
+                temp_paths.append(tmp_path)
+            # Parse file to get raw text
+            content = parse_file(tmp_path)
+            if content:
+                all_text += f"\n=== File: {uploaded_file.name} ===\n{content}\n"
+        except Exception as e:
+            st.warning(f"文件解析警告: {uploaded_file.name} - {str(e)}")
+    # ==== PHASE 2: Extract structured data using StabilityDataExtractor ====
+    extractor = StabilityDataExtractor()
+    raw_extracted = extractor.extract_from_text(all_text, goal)
+    # Use extracted spec limit if user didn't provide one
+    if specification_limit is None:
+        specification_limit = raw_extracted.get("specification_limit", 0.5)
+    # Use extracted target timepoints if user didn't provide
+    if target_timepoints is None or len(target_timepoints) == 0:
+        target_timepoints = raw_extracted.get("target_timepoints", [24, 36])
+    # ==== PHASE 3: Convert to batches format for RegulatoryDecisionEngine ====
+    extracted_data = _convert_to_batches_format(raw_extracted)
+    # Build data summary for intent parser
+    data_summary = _build_data_summary(extracted_data)
+    # ==== Layer 1: Parse Intent ====
+    intent = engines["intent_parser"].parse(goal, data_summary)
+    # Apply user selections (or extracted defaults)
+    intent.preferences.target_timepoints = target_timepoints
+    intent.constraints.specification_limit = specification_limit
+    try:
+        intent.constraints.purpose = AnalysisPurpose(purpose)
+    except ValueError:
+        intent.constraints.purpose = AnalysisPurpose.RD_REFERENCE
+    # ==== Layer 2: Execute Regulatory Decision ====
+    result = engines["decision_engine"].execute(intent, extracted_data)
+    # ==== Layer 3: Generate Explanations ====
+    explanations = engines["explanation_generator"].generate(
+        result=result,
+        purpose=purpose,
+        specification_limit=specification_limit,
+        confidence_level=intent.preferences.required_confidence
+    )
+    return intent, result, explanations
+def _convert_to_batches_format(raw_extracted: Dict) -> Dict[str, Any]:
+    """
+    Convert StabilityDataExtractor output to RegulatoryDecisionEngine expected format.
+    Input format (from extractor):
+        {\"demo_longterm\": {times: [...], values: [...]}, ...}
+        OR
+        {"batches": [...]}  (from generic extraction)
+    Output format (for engine):
+        {\"batches\": [{batch_id, conditions: [{condition_id, timepoints, cqa_data}]}]}
+    """
+    # If generic extraction already provided batches, use them directly
+    if raw_extracted.get("batches"):
+        return {"batches": raw_extracted["batches"]}
+    batches = []
+    cqa_name = raw_extracted.get("cqa", "总杂质")
+    # Demo Batch
+    demo_conditions = []
+    if raw_extracted.get("demo_longterm"):
+        demo_lt = raw_extracted["demo_longterm"]
+        demo_conditions.append({
+            "condition_id": "Demo_25C_LongTerm",
+            "timepoints": demo_lt.get("times", []),
+            "cqa_data": [{
+                "cqa_name": cqa_name,
+                "values": demo_lt.get("values", [])
+            }]
+        })
+    if raw_extracted.get("demo_accelerated"):
+        demo_acc = raw_extracted["demo_accelerated"]
+        demo_conditions.append({
+            "condition_id": "Demo_40C_Accelerated",
+            "timepoints": demo_acc.get("times", []),
+            "cqa_data": [{
+                "cqa_name": cqa_name,
+                "values": demo_acc.get("values", [])
+            }]
+        })
+    if demo_conditions:
+        batches.append({
+            "batch_id": "Demo",
+            "batch_name": "Demo批次",
+            "batch_type": "reference",
+            "conditions": demo_conditions
+        })
+    # Target Batch
+    target_conditions = []
+    if raw_extracted.get("target_accelerated"):
+        target_acc = raw_extracted["target_accelerated"]
+        target_conditions.append({
+            "condition_id": "Target_40C_Accelerated",
+            "timepoints": target_acc.get("times", []),
+            "cqa_data": [{
+                "cqa_name": cqa_name,
+                "values": target_acc.get("values", [])
+            }]
+        })
+    if raw_extracted.get("target_destructive"):
+        target_dest = raw_extracted["target_destructive"]
+        target_conditions.append({
+            "condition_id": "Target_60C_Destructive",
+            "timepoints": target_dest.get("times", []),
+            "cqa_data": [{
+                "cqa_name": cqa_name,
+                "values": target_dest.get("values", [])
+            }]
+        })
+    if target_conditions:
+        batches.append({
+            "batch_id": "Target",
+            "batch_name": "处方1",
+            "batch_type": "target",
+            "conditions": target_conditions
+        })
+    return {"batches": batches}
+def _build_data_summary(extracted_data: Dict) -> ExtractedDataSummary:
+    """Build ExtractedDataSummary from extracted batches data."""
+    data_summary = ExtractedDataSummary()
+    batches = extracted_data.get("batches", [])
+    if not batches:
+        return data_summary
+    data_summary.batch_ids = [b.get("batch_id", "") for b in batches]
+    all_conditions = []
+    all_cqas = []
+    all_timepoints = []
+    for batch in batches:
+        for cond in batch.get("conditions", []):
+            all_conditions.append(cond.get("condition_id", ""))
+            tps = cond.get("timepoints", [])
+            all_timepoints.extend([t for t in tps if t is not None])
+            for cqa in cond.get("cqa_data", []):
+                all_cqas.append(cqa.get("cqa_name", ""))
+    data_summary.conditions = list(set(all_conditions))
+    data_summary.cqa_list = list(set(all_cqas))
+    data_summary.available_timepoints = sorted(set(all_timepoints))
+    return data_summary
+# =============================================================================
+# Main Application
+# =============================================================================
+def main():
+    """Main Streamlit application."""
+    # Header
+    st.markdown('<h1 class="main-header">🧪 Drug Stability Intelligence Platform</h1>',
+                unsafe_allow_html=True)
+    st.markdown('<p class="sub-header">ICH/FDA/EMA合规的智能稳定性分析系统</p>',
+                unsafe_allow_html=True)
+    # Sidebar
+    with st.sidebar:
+        st.header("⚙️ 分析设置")
+        # LLM Provider
+        st.subheader("🔑 LLM 配置")
+        provider = st.selectbox(
+            "选择提供商",
+            ["Moonshot Kimi", "Google Gemini", "OpenAI", "Deepseek"],
+            index=0
+        )
+        api_key = st.text_input("API Key", type="password")
+        st.divider()
+        # Analysis Mode - NEW: Let user choose analysis type
+        st.subheader("📊 分析模式")
+        analysis_mode = st.selectbox(
+            "选择分析模式",
+            [
+                ("🤖 智能分析 (自动识别)", "auto"),
+                ("📈 稳定性预测", "prediction"),
+                ("🏷️ 批次筛选/对比", "batch_comparison"),
+                ("📊 趋势评估", "trend")
+            ],
+            format_func=lambda x: x[0],
+            help="系统将根据您的分析目标自动调整参数，您也可以在下方手动设置"
+        )[1]
+        purpose = st.selectbox(
+            "分析目的",
+            [
+                ("研发参考", "rd_reference"),
+                ("法规申报", "regulatory_submission"),
+                ("内部决策", "internal_decision")
+            ],
+            format_func=lambda x: x[0]
+        )[1]
+        # Optional Advanced Settings - in expander
+        with st.expander("⚙️ 高级参数 (可选)", expanded=False):
+            st.caption("💡 不设置时，系统将从数据或分析目标中自动推断")
+            use_custom_spec = st.checkbox("手动设置规格限度", value=False)
+            if use_custom_spec:
+                spec_limit = st.number_input(
+                    "规格限度 (%)",
+                    min_value=0.1,
+                    max_value=10.0,
+                    value=0.5,
+                    step=0.1
+                )
+            else:
+                spec_limit = None  # Will be inferred from data
+            use_custom_tp = st.checkbox("手动设置预测时间点", value=False)
+            if use_custom_tp:
+                target_tp = st.multiselect(
+                    "目标预测时间点 (月)",
+                    [6, 12, 18, 24, 30, 36, 48],
+                    default=[24, 36]
+                )
+            else:
+                target_tp = None  # Will be inferred from goal
+        st.divider()
+        st.subheader("ℹ️ 系统信息")
+        st.info("""
+        **三层架构**
+        - Layer 1: 意图理解 (LLM)
+        - Layer 2: 科学决策 (规则)
+        - Layer 3: 呈现报告 (LLM+Plotly)
+        """)
+    # Main content
+    col1, col2 = st.columns([1, 2])
+    with col1:
+        st.header("📁 数据输入")
+        # File upload
+        uploaded_files = st.file_uploader(
+            "上传稳定性数据文件",
+            type=["xlsx", "xls", "docx", "doc", "pdf", "csv"],
+            accept_multiple_files=True
+        )
+        # Analysis goal
+        goal = st.text_area(
+            "🎯 分析目标",
+            placeholder="例如：请预测SF-0047批次在24个月和36个月时的总杂质含量",
+            height=100
+        )
+        # Analyze button
+        analyze_clicked = st.button("🚀 开始分析", type="primary", use_container_width=True)
+    with col2:
+        st.header("📈 分析结果")
+        if analyze_clicked:
+            if not uploaded_files:
+                st.error("请上传稳定性数据文件")
+            elif not goal:
+                st.error("请输入分析目标")
+            else:
+                with st.spinner("正在执行三层分析..."):
+                    try:
+                        intent, result, explanations = run_analysis(
+                            goal=goal,
+                            uploaded_files=uploaded_files,
+                            purpose=purpose,
+                            specification_limit=spec_limit,
+                            target_timepoints=target_tp
+                        )
+                        # Store in session state
+                        st.session_state['intent'] = intent
+                        st.session_state['result'] = result
+                        st.session_state['explanations'] = explanations
+                    except Exception as e:
+                        st.error(f"分析过程发生错误: {str(e)}")
+        # Display results
+        if 'result' in st.session_state:
+            result = st.session_state['result']
+            explanations = st.session_state.get('explanations', {})
+            # Check if refused
+            if not result.can_proceed and result.refusal:
+                st.markdown(f"""
+                <div class="refusal-box">
+                    <h3>⚠️ 分析无法完成</h3>
+                    <p><strong>原因:</strong> {result.refusal.reason}</p>
+                    <p><strong>法规依据:</strong> {result.refusal.regulatory_reference}</p>
+                    <p><strong>建议:</strong></p>
+                    <ul>
+                        {"".join(f"<li>{s}</li>" for s in result.refusal.suggestions)}
+                    </ul>
+                </div>
+                """, unsafe_allow_html=True)
+            else:
+                # Executive Summary
+                st.subheader("📋 执行摘要")
+                st.success(explanations.get("executive_summary", result.get_executive_summary()))
+                # Tabs for different views
+                tabs = st.tabs(["📊 可视化", "📈 动力学结果", "🔮 预测结果", "📝 完整报告"])
+                with tabs[0]:
+                    # Charts
+                    if result.predictions:
+                        fig = create_prediction_chart(
+                            result.kinetic_fits,
+                            result.predictions,
+                            spec_limit
+                        )
+                        st.plotly_chart(fig, use_container_width=True)
+                    if result.batch_ranking:
+                        fig = create_batch_comparison_chart(
+                            result.batch_ranking,
+                            result.kinetic_fits
+                        )
+                        st.plotly_chart(fig, use_container_width=True)
+                    if result.kinetic_fits:
+                        fig = create_kinetics_scatter(result.kinetic_fits)
+                        st.plotly_chart(fig, use_container_width=True)
+                with tabs[1]:
+                    st.subheader("动力学拟合结果")
+                    if result.kinetic_fits:
+                        for cond_id, fit in result.kinetic_fits.items():
+                            with st.expander(f"📌 {cond_id}", expanded=True):
+                                col_a, col_b, col_c = st.columns(3)
+                                col_a.metric("k (%/月)", f"{fit.k:.4f}")
+                                col_b.metric("R²", f"{fit.R2:.4f}")
+                                col_c.metric("SE(k)", f"{fit.SE_k:.4f}")
+                                st.code(fit.equation)
+                    else:
+                        st.info("无动力学拟合结果")
+                with tabs[2]:
+                    st.subheader("预测结果")
+                    if result.predictions:
+                        for tp, pred in result.predictions.items():
+                            status_color = "🟢" if pred.is_compliant() else "🔴"
+                            with st.expander(f"{status_color} {tp}", expanded=True):
+                                col_a, col_b, col_c = st.columns(3)
+                                col_a.metric("点预测", f"{pred.point_estimate:.2f}%")
+                                col_b.metric("95% CI", f"{pred.CI_lower:.2f}% - {pred.CI_upper:.2f}%")
+                                col_c.metric("距规格余量", f"{pred.margin_to_limit:.2f}%")
+                    elif result.batch_ranking:
+                        st.subheader("批次排名")
+                        for r in result.batch_ranking:
+                            medal = "🥇" if r.rank == 1 else "🥈" if r.rank == 2 else "🥉" if r.rank == 3 else "📍"
+                            st.markdown(f"{medal} **{r.batch_name}** - 评分: {r.score} - {r.reason}")
+                    else:
+                        st.info("无预测结果")
+                with tabs[3]:
+                    st.subheader("完整分析报告")
+                    # Display all explanation sections
+                    for section, content in explanations.items():
+                        if content:
+                            st.markdown(f"**{section.replace('_', ' ').title()}**")
+                            st.markdown(content)
+                            st.divider()
+                    # Download buttons
+                    st.subheader("📥 下载报告")
+                    col_dl1, col_dl2 = st.columns(2)
+                    with col_dl1:
+                        # Generate HTML report
+                        html_content = generate_html_report(result, explanations, spec_limit)
+                        st.download_button(
+                            "📄 下载 HTML 报告",
+                            data=html_content,
+                            file_name=f"stability_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html",
+                            mime="text/html",
+                            use_container_width=True
+                        )
+                    with col_dl2:
+                        st.button(
+                            "📑 下载 PDF 报告 (需安装wkhtmltopdf)",
+                            disabled=True,
+                            use_container_width=True
+                        )
+def generate_html_report(
+    result: RegulatoryDecisionResult,
+    explanations: Dict[str, str],
+    spec_limit: float
+) -> str:
+    """Generate downloadable HTML report using dynamic orchestration."""
+    # Use the new ReportOrchestrator
+    from layers.report_orchestrator import ReportOrchestrator
+    # We need the intent - try to get it from session state
+    intent = st.session_state.get('intent')
+    if intent is None:
+        # Fallback: create minimal intent from available info
+        from schemas.analysis_intent import AnalysisIntent, AnalysisType
+        intent = AnalysisIntent(
+            user_question_raw="生成报告",
+            analysis_type=AnalysisType.BATCH_SCREENING if result.batch_ranking else AnalysisType.SHELF_LIFE_PREDICTION
+        )
+    # Generate report via orchestrator
+    orchestrator = ReportOrchestrator()
+    html_content = orchestrator.generate(intent, result, explanations)
+    return html_content
+if __name__ == "__main__":
+    main()