Spaces:
Running
Running
| """ | |
| app.py β Gradio UI entry point. | |
| ORIGINAL structure and all tabs preserved. | |
| NEW: second file upload for methodology CSV, technique sheets 1-4, | |
| journal cross-tabulation chart + table, technique optimisation log. | |
| """ | |
| import os, json | |
| import re | |
| import pandas as pd, numpy as np | |
| import gradio as gr | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from agent import run_pipeline, METHODOLOGY_PATTERNS, TECHNIQUE_PATTERNS | |
| # ββ CSV preview ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _preview(file): | |
| if not file: return "Upload a Scopus CSV to begin." | |
| df = pd.read_csv(file.name) | |
| df.columns = df.columns.str.lower() | |
| has_t = "title" in df.columns | |
| has_a = "abstract" in df.columns | |
| n = len(df) | |
| blanks_t = int(df["title"].isna().sum()) if has_t else n | |
| blanks_a = int(df["abstract"].isna().sum()) if has_a else n | |
| ok = "β " if has_t and has_a and blanks_t < n and blanks_a < n else "β" | |
| return (f"## {ok} CSV loaded β {n} entries\n\n" | |
| f"| Column | Present | Blank rows |\n|---|---|---|\n" | |
| f"| title | {'β ' if has_t else 'β'} | {blanks_t} |\n" | |
| f"| abstract | {'β ' if has_a else 'β'} | {blanks_a} |\n\n" | |
| f"**Usable papers:** {n - max(blanks_t, blanks_a)} / {n}") | |
| def _preview_methodology(file): | |
| if not file: return "Upload methodology CSV (title, doi, methodology) to enable technique analysis." | |
| df = pd.read_csv(file.name) | |
| df.columns = df.columns.str.lower() | |
| has_t = "title" in df.columns | |
| has_m = "methodology" in df.columns | |
| has_d = "doi" in df.columns | |
| n = len(df) | |
| ok = "β " if has_t and has_m else "β" | |
| return (f"## {ok} Methodology CSV β {n} papers\n\n" | |
| f"| Column | Present |\n|---|---|\n" | |
| f"| title | {'β ' if has_t else 'β'} |\n" | |
| f"| doi | {'β ' if has_d else 'β optional'} |\n" | |
| f"| methodology | {'β ' if has_m else 'β'} |\n\n" | |
| f"Journals will be auto-detected from DOI + title.") | |
| # ββ Original helper builders βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _top_papers_df(top_papers: dict) -> pd.DataFrame: | |
| rows = [] | |
| for cid in sorted(top_papers.keys()): | |
| for p in top_papers[cid]: | |
| rows.append({"Cluster": cid, "Label": p["cluster_label"], | |
| "Rank": p["rank"], "Title": p["title"], | |
| "Abstract Snippet": p["abstract_snippet"]}) | |
| return pd.DataFrame(rows) | |
| def _methodology_summary_df(methodology_data: dict, interps: dict) -> pd.DataFrame: | |
| rows = [] | |
| for cid in sorted(methodology_data.keys()): | |
| md = methodology_data[cid] | |
| label = interps.get(cid, {}).get("label", f"Cluster {cid}") | |
| rows.append({ | |
| "Cluster": cid, | |
| "Label": label, | |
| "Dominant Method": md.get("dominant_method", "β"), | |
| "Dominant Technique": md.get("dominant_technique", "β"), | |
| "Empirical %": md.get("empirical_pct", 0), | |
| "Theoretical %": md.get("theoretical_pct", 0), | |
| "Mixed %": md.get("mixed_pct", 0), | |
| "Methods (β₯2 LLMs)": ", ".join( | |
| f"{m['name']} ({m['pct']}%, {m['agreement']})" | |
| for m in md.get("methodologies", [])), | |
| "Techniques (β₯2 LLMs)": ", ".join( | |
| f"{t['name']} ({t['pct']}%, {t['agreement']})" | |
| for t in md.get("techniques", [])), | |
| "Regex Confirmed": ", ".join(md.get("regex_confirmed_consensus", [])) or "β", | |
| "Regex Rejected": ", ".join(md.get("regex_rejected_consensus", [])) or "β", | |
| }) | |
| return pd.DataFrame(rows) | |
| def _extraction_pipeline_df(methodology_data: dict, interps: dict) -> pd.DataFrame: | |
| rows = [] | |
| for cid in sorted(methodology_data.keys()): | |
| md = methodology_data[cid] | |
| label = interps.get(cid, {}).get("label", f"Cluster {cid}") | |
| scan = md.get("regex_scan", {}) | |
| for item in md.get("methodologies", []) + md.get("techniques", []): | |
| name = item["name"] | |
| regex_hits= scan.get("methods",{}).get(name,[]) or scan.get("techniques",{}).get(name,[]) | |
| matched = ", ".join(dict.fromkeys(h["match"] for h in regex_hits))[:80] if regex_hits else "β" | |
| rows.append({"Cluster": cid, "Label": label, "Item": name, | |
| "Type": "Method" if item in md.get("methodologies",[]) else "Technique", | |
| "Regex Match":matched, "Regex Fired": "β " if regex_hits else "β", | |
| "LLM Votes": item["llm_votes"], "Agreement": item["agreement"], | |
| "Avg Pct (%)":item["pct"], "Evidence": item.get("evidence","β"), | |
| "Gate Passed":"β ACCEPTED"}) | |
| for item in md.get("rejected_methods",[]) + md.get("rejected_techniques",[]): | |
| name = item["name"] | |
| regex_hits= scan.get("methods",{}).get(name,[]) or scan.get("techniques",{}).get(name,[]) | |
| matched = ", ".join(dict.fromkeys(h["match"] for h in regex_hits))[:80] if regex_hits else "β" | |
| rows.append({"Cluster": cid, "Label": label, "Item": name, | |
| "Type": "Method" if item in md.get("rejected_methods",[]) else "Technique", | |
| "Regex Match":matched, "Regex Fired": "β " if regex_hits else "β", | |
| "LLM Votes": item["llm_votes"], "Agreement": item["agreement"], | |
| "Avg Pct (%)":item["pct"], "Evidence": item.get("evidence","β"), | |
| "Gate Passed":"β REJECTED (single LLM)"}) | |
| return pd.DataFrame(rows) if rows else pd.DataFrame() | |
| def _per_llm_methodology_df(methodology_data: dict, interps: dict) -> pd.DataFrame: | |
| rows = [] | |
| for cid in sorted(methodology_data.keys()): | |
| md = methodology_data[cid] | |
| label = interps.get(cid,{}).get("label", f"Cluster {cid}") | |
| raw = md.get("llm_raw",{}) | |
| def _fmt(r, key): | |
| return " | ".join(f"{i['name']} ({i.get('pct',0)}%)" for i in r.get(key,[])) or "β" | |
| rows.append({"Cluster": cid, "Label": label, | |
| "Groq Methods": _fmt(raw.get("groq",{}), "methodologies"), | |
| "Mistral Methods": _fmt(raw.get("mistral",{}), "methodologies"), | |
| "Gemini Methods": _fmt(raw.get("gemini",{}), "methodologies"), | |
| "Groq Techniques": _fmt(raw.get("groq",{}), "techniques"), | |
| "Mistral Techniques": _fmt(raw.get("mistral",{}), "techniques"), | |
| "Gemini Techniques": _fmt(raw.get("gemini",{}), "techniques"), | |
| "Groq E/T/M": f"{raw.get('groq',{}).get('empirical_pct',0)}/" | |
| f"{raw.get('groq',{}).get('theoretical_pct',0)}/" | |
| f"{raw.get('groq',{}).get('mixed_pct',0)}", | |
| "Mistral E/T/M": f"{raw.get('mistral',{}).get('empirical_pct',0)}/" | |
| f"{raw.get('mistral',{}).get('theoretical_pct',0)}/" | |
| f"{raw.get('mistral',{}).get('mixed_pct',0)}", | |
| "Gemini E/T/M": f"{raw.get('gemini',{}).get('empirical_pct',0)}/" | |
| f"{raw.get('gemini',{}).get('theoretical_pct',0)}/" | |
| f"{raw.get('gemini',{}).get('mixed_pct',0)}", | |
| }) | |
| return pd.DataFrame(rows) | |
| def _regex_hits_df(methodology_data: dict, interps: dict) -> pd.DataFrame: | |
| rows = [] | |
| for cid in sorted(methodology_data.keys()): | |
| md = methodology_data[cid] | |
| label = interps.get(cid,{}).get("label", f"Cluster {cid}") | |
| scan = md.get("regex_scan",{}) | |
| for category, hits in scan.get("methods",{}).items(): | |
| for h in hits: | |
| rows.append({"Cluster": cid, "Label": label, "Bank": "Methodology", | |
| "Pattern Category": category, "Matched Text": h["match"], | |
| "Paper #": h["doc"], "Char Span": f"{h['span'][0]}β{h['span'][1]}"}) | |
| for category, hits in scan.get("techniques",{}).items(): | |
| for h in hits: | |
| rows.append({"Cluster": cid, "Label": label, "Bank": "Technique", | |
| "Pattern Category": category, "Matched Text": h["match"], | |
| "Paper #": h["doc"], "Char Span": f"{h['span'][0]}β{h['span'][1]}"}) | |
| return pd.DataFrame(rows) if rows else pd.DataFrame() | |
| def _methodology_bar_chart(methodology_data: dict, interps: dict) -> go.Figure: | |
| labels_list, empirical, theoretical, mixed = [], [], [], [] | |
| for cid in sorted(methodology_data.keys()): | |
| md = methodology_data[cid] | |
| labels_list.append(interps.get(cid,{}).get("label", f"C{cid}")[:30]) | |
| empirical.append(md.get("empirical_pct", 0)) | |
| theoretical.append(md.get("theoretical_pct", 0)) | |
| mixed.append(md.get("mixed_pct", 0)) | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar(name="Empirical %", x=labels_list, y=empirical, marker_color="#3dba7a")) | |
| fig.add_trace(go.Bar(name="Theoretical %", x=labels_list, y=theoretical, marker_color="#5b9cf6")) | |
| fig.add_trace(go.Bar(name="Mixed %", x=labels_list, y=mixed, marker_color="#f5a623")) | |
| fig.update_layout(barmode="stack", template="plotly_dark", height=420, | |
| paper_bgcolor="#0d1117", plot_bgcolor="#161b22", | |
| title="Research Orientation per Cluster β Averaged across Groq + Mistral + Gemini", | |
| xaxis_title="Cluster", yaxis_title="Percentage (%)", | |
| font=dict(size=11), legend=dict(orientation="h", y=1.12), xaxis_tickangle=-35) | |
| return fig | |
| def _refinement_df(rl: list) -> pd.DataFrame: | |
| if not rl: | |
| return pd.DataFrame(columns=["Cluster","Iteration","Old Label","New Label", | |
| "Issues","Improvement","Hallucination Detected"]) | |
| return pd.DataFrame([{ | |
| "Cluster": r["cluster"], "Iteration": r["iteration"], | |
| "Old Label": r["old_label"], "New Label": r["new_label"], | |
| "Issues": "; ".join(r.get("issues",[])), | |
| "Improvement": r["improvement_score"], | |
| "Hallucination Detected": r["hallucination_detected"], | |
| } for r in rl]) | |
| def _regex_pattern_info() -> str: | |
| m_list = "\n".join(f"- **{k}**: `{v.pattern}`" for k,v in METHODOLOGY_PATTERNS.items()) | |
| t_list = "\n".join(f"- **{k}**: `{v.pattern}`" for k,v in TECHNIQUE_PATTERNS.items()) | |
| return ( | |
| "### How Cluster Methodology Extraction Works\n\n" | |
| "**Step 1 β Regex Pre-Scan:** Two compiled pattern banks run against representative " | |
| "abstracts. Every match recorded with exact character span, matched text, paper number.\n\n" | |
| "**Step 2 β 3-LLM Council:** Groq, Mistral, Gemini each receive regex evidence + abstracts. " | |
| "Each LLM confirms/rejects regex hits and adds any missed methods/techniques.\n\n" | |
| "**Step 3 β β₯2-LLM Gate:** Only items named by β₯2 LLMs survive. Percentages averaged.\n\n" | |
| "**Step 4 β Orientation:** Empirical/Theoretical/Mixed averaged across 3 LLMs.\n\n" | |
| "---\n\n#### Methodology Bank\n" + m_list + | |
| "\n\n#### Technique Bank\n" + t_list) | |
| # ββ NEW helpers for methodology-CSV pipeline βββββββββββββββββββββββββββββββββ | |
| def _tech_sheet_df(sheet_rows: list) -> pd.DataFrame: | |
| return pd.DataFrame(sheet_rows) if sheet_rows else pd.DataFrame() | |
| def _tech_llm_pct_chart(comp_sheets: dict) -> go.Figure: | |
| """ | |
| Grouped bar: for each technique, show the % of papers it was found in | |
| by each of the 3 LLMs (Groq, Mistral, Gemini) + Consolidated. | |
| """ | |
| s1 = comp_sheets.get(1, []) | |
| s2 = comp_sheets.get(2, []) | |
| s3 = comp_sheets.get(3, []) | |
| s4 = comp_sheets.get(4, []) | |
| def _freq(rows): | |
| counts = {} | |
| n = len(rows) or 1 | |
| for row in rows: | |
| for t in (row.get("techniques","") or "").split(", "): | |
| t = t.strip().title() | |
| if t and t != "β": | |
| counts[t] = counts.get(t,0) + 1 | |
| return {k: round(v/n*100) for k,v in counts.items()} | |
| f1 = _freq(s1); f2 = _freq(s2); f3 = _freq(s3); f4 = _freq(s4) | |
| all_techs = sorted(set(f1)|set(f2)|set(f3)|set(f4)) | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar(name="Groq", x=all_techs, y=[f1.get(t,0) for t in all_techs], marker_color="#5b9cf6")) | |
| fig.add_trace(go.Bar(name="Mistral", x=all_techs, y=[f2.get(t,0) for t in all_techs], marker_color="#f5a623")) | |
| fig.add_trace(go.Bar(name="Gemini", x=all_techs, y=[f3.get(t,0) for t in all_techs], marker_color="#a855f7")) | |
| fig.add_trace(go.Bar(name="Consolidated", x=all_techs, y=[f4.get(t,0) for t in all_techs], marker_color="#3dba7a")) | |
| fig.update_layout(barmode="group", template="plotly_dark", height=480, | |
| paper_bgcolor="#0d1117", plot_bgcolor="#161b22", | |
| title="Computational Technique Frequency β % of Papers per LLM (Groq / Mistral / Gemini / Consolidated)", | |
| xaxis_title="Technique", yaxis_title="% of papers", | |
| font=dict(size=10), legend=dict(orientation="h", y=1.12), xaxis_tickangle=-40) | |
| return fig | |
| def _journal_crosstab_chart(journal_crosstab: dict) -> go.Figure: | |
| """ | |
| Grouped bar: for each technique, show % usage per journal. | |
| Journals on x-axis, techniques as bar groups. | |
| """ | |
| ct = journal_crosstab.get("consolidated", {}) | |
| journals = journal_crosstab.get("journals", []) | |
| techniques= journal_crosstab.get("techniques", []) | |
| if not journals or not techniques: | |
| fig = go.Figure() | |
| fig.update_layout(template="plotly_dark", title="No journal data available", | |
| paper_bgcolor="#0d1117") | |
| return fig | |
| COLORS = ["#5b9cf6","#3dba7a","#f5a623","#e04d4d","#a855f7","#06b6d4", | |
| "#f97316","#84cc16","#ec4899","#14b8a6","#8b5cf6","#ef4444"] | |
| fig = go.Figure() | |
| for i, tech in enumerate(techniques[:15]): # cap at 15 techniques for readability | |
| pcts = [ct.get(j,{}).get(tech, 0) for j in journals] | |
| fig.add_trace(go.Bar(name=tech, x=journals, y=pcts, | |
| marker_color=COLORS[i % len(COLORS)])) | |
| fig.update_layout(barmode="group", template="plotly_dark", height=500, | |
| paper_bgcolor="#0d1117", plot_bgcolor="#161b22", | |
| title="Computational Technique Usage β Cross-Tabulation by Journal (%)", | |
| xaxis_title="Journal", yaxis_title="% of papers using technique", | |
| font=dict(size=10), legend=dict(orientation="h", y=1.15), xaxis_tickangle=-20) | |
| return fig | |
| def _journal_crosstab_df(journal_crosstab: dict) -> pd.DataFrame: | |
| ct = journal_crosstab.get("consolidated", {}) | |
| journals = journal_crosstab.get("journals", []) | |
| techniques= journal_crosstab.get("techniques", []) | |
| paper_counts = journal_crosstab.get("journal_paper_counts", {}) | |
| rows = [] | |
| for j in journals: | |
| row = {"Journal": j, "N Papers": paper_counts.get(j,0)} | |
| for t in techniques: | |
| row[t] = f"{ct.get(j,{}).get(t,0)}%" | |
| rows.append(row) | |
| return pd.DataFrame(rows) | |
| def _tech_opt_df(opt_log: list) -> pd.DataFrame: | |
| if not opt_log: | |
| return pd.DataFrame(columns=["Technique","Refined Name","Hallucination", | |
| "High Variance","Groq %","Mistral %","Gemini %", | |
| "Suggestion","Split Into","Merge With"]) | |
| return pd.DataFrame([{ | |
| "Technique": r["technique"], | |
| "Refined Name": r["refined_name"], | |
| "Hallucination": r["is_hallucination"], | |
| "High Variance": r["high_variance"], | |
| "Groq %": r["pct_groq"], | |
| "Mistral %": r["pct_mistral"], | |
| "Gemini %": r["pct_gemini"], | |
| "Suggestion": r["suggestion"], | |
| "Split Into": r["split_into"], | |
| "Merge With": r["merge_with"], | |
| } for r in opt_log]) | |
| def _per_llm_freq_df(journal_crosstab: dict) -> pd.DataFrame: | |
| """Per-LLM technique frequency across all papers in methodology CSV.""" | |
| per_llm = journal_crosstab.get("per_llm_freq", {}) | |
| techniques = sorted(set(t for d in per_llm.values() for t in d.keys())) | |
| rows = [] | |
| for t in techniques: | |
| rows.append({ | |
| "Technique": t, | |
| "Groq %": per_llm.get("Groq",{}).get(t, 0), | |
| "Mistral %": per_llm.get("Mistral",{}).get(t, 0), | |
| "Gemini %": per_llm.get("Gemini",{}).get(t, 0), | |
| "Variance": round(max( | |
| per_llm.get("Groq",{}).get(t,0), | |
| per_llm.get("Mistral",{}).get(t,0), | |
| per_llm.get("Gemini",{}).get(t,0), | |
| ) - min( | |
| per_llm.get("Groq",{}).get(t,0), | |
| per_llm.get("Mistral",{}).get(t,0), | |
| per_llm.get("Gemini",{}).get(t,0), | |
| )), | |
| }) | |
| return pd.DataFrame(rows).sort_values("Groq %", ascending=False) | |
| # ββ NEW: Cluster Sizes bar chart (what supervisor pointed to) ββββββββββββββββ | |
| def _cluster_sizes_chart(interps: dict, disc: dict) -> go.Figure: | |
| """ | |
| Bar chart: Papers per Cluster β coloured by discipline rule status. | |
| Green = passes both constraints (mass β€ 25%, size β₯ 5). | |
| Yellow = exceeds 25% mass cap (dominant cluster warning). | |
| Red = below min-size of 5 (too small). | |
| Number label shown on top of each bar, exactly like supervisor's image. | |
| """ | |
| cluster_sizes = disc.get("cluster_sizes", {}) | |
| n_docs = sum(cluster_sizes.values()) or 1 | |
| max_allowed = int(0.25 * n_docs) | |
| labels, sizes, colors, texts = [], [], [], [] | |
| for cid in sorted(interps.keys()): | |
| label = interps[cid]["label"] | |
| size = cluster_sizes.get(cid, interps[cid].get("strong",0) + interps[cid].get("weak",0)) | |
| mass_pct = size / n_docs | |
| color = "#3dba7a" # green β PASS | |
| if mass_pct > 0.25: | |
| color = "#f5c518" # yellow β mass violation (like supervisor image) | |
| elif size < 5: | |
| color = "#e04d4d" # red β too small | |
| labels.append(label) | |
| sizes.append(size) | |
| colors.append(color) | |
| texts.append(str(size)) | |
| fig = go.Figure(go.Bar( | |
| x=labels, y=sizes, | |
| marker_color=colors, | |
| text=texts, | |
| textposition="outside", | |
| textfont=dict(size=11, color="#c9d1d9"), | |
| )) | |
| fig.add_hline(y=max_allowed, line_dash="dash", line_color="#f5a623", | |
| annotation_text=f"25% cap ({max_allowed} papers)", | |
| annotation_font_color="#f5a623") | |
| fig.update_layout( | |
| template="plotly_dark", height=520, | |
| paper_bgcolor="#0d1117", plot_bgcolor="#161b22", | |
| title="Cluster Sizes (Papers per Cluster) β Green=PASS Β· Yellow=Mass>25% Β· Red=Size<5", | |
| xaxis_title="Cluster", yaxis_title="Number of Papers", | |
| font=dict(size=10), xaxis_tickangle=-40, | |
| showlegend=False, | |
| margin=dict(t=80, b=200), | |
| ) | |
| return fig | |
| # ββ NEW: Reproducibility panel ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _reproducibility_df(td: dict, interps: dict) -> pd.DataFrame: | |
| """ | |
| Shows what the supervisor means by 'run again and again, topic list is same'. | |
| Pulls the stability ARI (already computed across 3 seeds in tools.py) and | |
| shows per-cluster persistence as a proxy for how stable each cluster is. | |
| High persistence = cluster survives across seeds = reproducible. | |
| Low persistence = cluster may disappear or merge on re-run. | |
| """ | |
| cluster_persistence = td.get("cluster_persistence", {}) | |
| overall_stability = td["metrics"].get("stability", 0.0) | |
| rows = [] | |
| for cid in sorted(interps.keys()): | |
| pers = cluster_persistence.get(cid, 0.0) | |
| label = interps[cid]["label"] | |
| size = interps[cid].get("strong",0) + interps[cid].get("weak",0) | |
| stable_verdict = "β Stable" if pers >= 0.7 else \ | |
| "β Borderline" if pers >= 0.4 else \ | |
| "β Fragile" | |
| rows.append({ | |
| "Cluster": cid, | |
| "Label": label, | |
| "Cluster Persistence": round(pers, 4), | |
| "Strong Members": interps[cid].get("strong", 0), | |
| "Weak Members": interps[cid].get("weak", 0), | |
| "Total Papers": size, | |
| "Stability Verdict": stable_verdict, | |
| "Note": ("Likely same label on re-run" if pers >= 0.7 else | |
| "Label may shift slightly" if pers >= 0.4 else | |
| "May merge/split on re-run β consider merging with adjacent cluster"), | |
| }) | |
| df = pd.DataFrame(rows).sort_values("Cluster Persistence", ascending=False) | |
| # Prepend overall ARI row | |
| overall_row = pd.DataFrame([{ | |
| "Cluster": "ALL", | |
| "Label": f"Overall ARI Stability across 3 seeds = {round(overall_stability,4)}", | |
| "Cluster Persistence": overall_stability, | |
| "Strong Members": "β", "Weak Members": "β", "Total Papers": "β", | |
| "Stability Verdict": "β Stable" if overall_stability >= 0.8 else | |
| "β Borderline" if overall_stability >= 0.5 else "β Unstable", | |
| "Note": "ARI close to 1.0 β running the pipeline again will produce the same clusters", | |
| }]) | |
| return pd.concat([overall_row, df], ignore_index=True) | |
| def _reproducibility_chart(td: dict, interps: dict) -> go.Figure: | |
| """Horizontal bar of cluster persistence β shows which clusters are stable.""" | |
| cluster_persistence = td.get("cluster_persistence", {}) | |
| labels, persis, colors = [], [], [] | |
| for cid in sorted(interps.keys(), key=lambda c: cluster_persistence.get(c,0)): | |
| p = cluster_persistence.get(cid, 0.0) | |
| labels.append(interps[cid]["label"][:35]) | |
| persis.append(round(p, 4)) | |
| colors.append("#3dba7a" if p >= 0.7 else "#f5a623" if p >= 0.4 else "#e04d4d") | |
| fig = go.Figure(go.Bar( | |
| x=persis, y=labels, orientation="h", | |
| marker_color=colors, | |
| text=[str(v) for v in persis], | |
| textposition="outside", | |
| )) | |
| fig.add_vline(x=0.7, line_dash="dot", line_color="#3dba7a", | |
| annotation_text="Stable threshold (0.7)") | |
| fig.add_vline(x=0.4, line_dash="dot", line_color="#f5a623", | |
| annotation_text="Borderline (0.4)") | |
| fig.update_layout( | |
| template="plotly_dark", height=max(400, len(labels)*28), | |
| paper_bgcolor="#0d1117", plot_bgcolor="#161b22", | |
| title="Cluster Persistence β Proxy for Reproducibility\n" | |
| "Green β₯ 0.7 (stable) Β· Orange 0.4β0.7 (borderline) Β· Red < 0.4 (fragile)", | |
| xaxis_title="Persistence Score", yaxis_title="", | |
| font=dict(size=10), margin=dict(l=260), | |
| ) | |
| return fig | |
| # ββ NEW: Human interpretability check ββββββββββββββββββββββββββββββββββββββββ | |
| def _interpretability_df(interps: dict) -> pd.DataFrame: | |
| """ | |
| Flags what supervisor called 'human interpretable topic list'. | |
| Checks two things: | |
| 1. Label overlap β pairs of cluster labels that share β₯2 significant words | |
| (e.g. 'Cybersecurity and Privacy' vs 'Cyber-Risk Management and Online Security'). | |
| 2. Vagueness β labels containing generic terms like 'systems', 'digital', 'data' | |
| as the ONLY meaningful content. | |
| Output is a table the supervisor can review to confirm distinctiveness. | |
| """ | |
| import itertools | |
| NOISE = {"the","and","for","with","using","based","from","that","are","this", | |
| "in","of","a","to","an","on","at","by","or","as","is","its","via", | |
| "systems","digital","information","management","based","driven"} | |
| VAGUE_SINGLES = {"systems","digital","data","information","analysis","research", | |
| "study","approach","framework","model","methods","technology"} | |
| def _sig_words(label: str) -> set: | |
| words = set(re.findall(r"\b[a-z]{4,}\b", label.lower())) | |
| return words - NOISE | |
| rows = [] | |
| cids = sorted(interps.keys()) | |
| labels_map = {cid: interps[cid]["label"] for cid in cids} | |
| # Check every pair | |
| seen_pairs = set() | |
| for cid_a, cid_b in itertools.combinations(cids, 2): | |
| la, lb = labels_map[cid_a], labels_map[cid_b] | |
| wa, wb = _sig_words(la), _sig_words(lb) | |
| overlap = wa & wb | |
| if len(overlap) >= 2: | |
| pair_key = tuple(sorted([cid_a, cid_b])) | |
| if pair_key not in seen_pairs: | |
| seen_pairs.add(pair_key) | |
| rows.append({ | |
| "Issue": "β Label Overlap", | |
| "Cluster A": cid_a, | |
| "Label A": la, | |
| "Cluster B": cid_b, | |
| "Label B": lb, | |
| "Shared Words": ", ".join(sorted(overlap)), | |
| "Severity": "HIGH β consider merging" if len(overlap) >= 3 | |
| else "MEDIUM β review distinctiveness", | |
| "Action": "Check if these two clusters cover the same research theme. " | |
| "If yes, increase min_cluster_size to force a merge.", | |
| }) | |
| # Check each label for vagueness | |
| for cid in cids: | |
| label = labels_map[cid] | |
| sig = _sig_words(label) | |
| vague = sig & VAGUE_SINGLES | |
| specific = sig - VAGUE_SINGLES | |
| if len(specific) == 0: | |
| rows.append({ | |
| "Issue": "β Too Vague", | |
| "Cluster A": cid, | |
| "Label A": label, | |
| "Cluster B": "β", | |
| "Label B": "β", | |
| "Shared Words": ", ".join(vague), | |
| "Severity": "HIGH β label is not human interpretable", | |
| "Action": "Run optimization pass to refine the label, " | |
| "or manually inspect keyphrases for more specific terms.", | |
| }) | |
| if not rows: | |
| rows.append({ | |
| "Issue": "β All Clear", | |
| "Cluster A": "β", "Label A": "All labels are distinct and specific", | |
| "Cluster B": "β", "Label B": "β", | |
| "Shared Words": "β", "Severity": "NONE", | |
| "Action": "Topic list is human interpretable and non-overlapping.", | |
| }) | |
| return pd.DataFrame(rows) | |
| # ββ Pipeline runner ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _run(corpus_file, method_file, gk, mk, gek, n_trials, n_optimize, | |
| progress=gr.Progress(track_tqdm=True)): | |
| if not corpus_file: raise gr.Error("Upload a Scopus corpus CSV first.") | |
| gk = gk.strip() or os.getenv("GROQ_API_KEY","") | |
| mk = mk.strip() or os.getenv("MISTRAL_API_KEY","") | |
| gek = gek.strip() or os.getenv("GEMINI_API_KEY","") | |
| if not all([gk,mk,gek]): raise gr.Error("All 3 API keys required.") | |
| method_path = method_file.name if method_file else None | |
| progress(0.05, desc="π₯ Loading CSVβ¦") | |
| progress(0.10, desc="π¬ Embedding corpus with SPECTER-2β¦") | |
| r = run_pipeline(corpus_file.name, gk, mk, gek, | |
| int(n_trials), int(n_optimize), method_path) | |
| if r.get("error"): raise gr.Error(r["error"]) | |
| progress(0.85, desc="π Building outputsβ¦") | |
| td, interps = r["topic_data"], r.get("interpretations",{}) | |
| disc, met = td["discipline"], td["metrics"] | |
| ar = r.get("agreement_rates",{}) | |
| rl = r.get("refinement_log", []) | |
| def _s(ok): return "β PASS" if ok else "β FAIL" | |
| summary = ( | |
| f"## Pipeline Complete β {disc['n_clusters']} clusters discovered\n\n" | |
| f"| Criterion | Value | Status |\n|---|---|---|\n" | |
| f"| Max cluster mass | {round(disc['max_mass_pct']*100,1)}% | {_s(disc['max_mass_ok'])} |\n" | |
| f"| Min cluster size | {disc['min_size']} | {_s(disc['min_size_ok'])} |\n" | |
| f"| Persistence (mean) | {round(met['persistence'],4)} | β |\n" | |
| f"| DBCV | {round(met['dbcv'],4)} | β |\n" | |
| f"| Stability (3 seeds) | {round(met['stability'],4)} | β |\n\n" | |
| f"**Trials:** {td['n_trials_run']} (best #{td['best_trial']}) Β· " | |
| f"**Agreement:** Triple {ar.get('triple',0)}% Β· Two+ {ar.get('two_or_more',0)}% Β· " | |
| f"**Optimization passes:** {n_optimize} Β· **Labels refined:** {len(rl)}" | |
| ) | |
| # UMAP scatter | |
| u2d = np.array(td["umap_2d"]) | |
| sdf = pd.DataFrame({"UMAP-1":u2d[:,0],"UMAP-2":u2d[:,1], | |
| "Cluster":[str(l) for l in td["labels"]], | |
| "Doc":[d[:60] for d in td["documents"]]}) | |
| fig = px.scatter(sdf, x="UMAP-1", y="UMAP-2", color="Cluster", | |
| hover_data=["Doc"], opacity=0.75, | |
| title="2-D UMAP visualisation of SPECTER-2 embeddings") | |
| fig.update_layout(template="plotly_dark", height=500, | |
| paper_bgcolor="#0d1117", plot_bgcolor="#161b22", font=dict(size=11)) | |
| # Trial log + Pareto | |
| tl = pd.DataFrame(td["trial_log"]) | |
| tl_cols = [c for c in ["trial","discipline_pass","n_clusters","persistence", | |
| "dbcv","max_mass_pct","min_size","n_noise"] if c in tl.columns] | |
| tl_show = tl[tl_cols] if not tl.empty else pd.DataFrame() | |
| pfig = go.Figure() | |
| if not tl.empty: | |
| for passed, color, name in [(True,"#3dba7a","PASS"),(False,"#e04d4d","FAIL")]: | |
| sub = tl[tl["discipline_pass"]==passed] | |
| if not sub.empty: | |
| pfig.add_trace(go.Scatter(x=sub["max_mass_pct"],y=sub["persistence"], | |
| mode="markers",marker=dict(size=8,color=color),name=name, | |
| text=sub["trial"],hovertemplate="Trial %{text}<br>Mass: %{x:.0%}<br>Pers: %{y:.3f}")) | |
| pfig.add_vline(x=0.25,line_dash="dash",line_color="#5a6480",annotation_text="25% rule") | |
| pfig.update_layout(template="plotly_dark",height=400, | |
| paper_bgcolor="#0d1117",plot_bgcolor="#161b22", | |
| title="Pareto front β Persistence vs Max cluster mass", | |
| xaxis_title="Max cluster mass",yaxis_title="Persistence",font=dict(size=11)) | |
| cdf_rows = [] | |
| for cid in sorted(interps.keys()): | |
| v = interps[cid] | |
| cdf_rows.append({"Cluster":cid,"Label":v["label"],"Agreement":v["agreement"], | |
| "Strong":v["strong"],"Weak":v["weak"], | |
| "Persistence":round(v.get("persistence",0),4), | |
| "Keyphrases":", ".join(v.get("keyphrases",[]))}) | |
| cdf = pd.DataFrame(cdf_rows) | |
| sheets = r.get("sheets",{}) | |
| s1 = pd.DataFrame(sheets.get(1,[])); s2 = pd.DataFrame(sheets.get(2,[])) | |
| s3 = pd.DataFrame(sheets.get(3,[])); s4 = pd.DataFrame(sheets.get(4,[])) | |
| sp = r.get("sheet_paths",{}) | |
| mdf = pd.DataFrame(r.get("mismatch_table",[])) | |
| md_data = r.get("methodology_data",{}) | |
| top_papers_df = _top_papers_df(r.get("top_papers",{})) | |
| method_sum_df = _methodology_summary_df(md_data, interps) | |
| method_chart = _methodology_bar_chart(md_data, interps) | |
| extraction_df = _extraction_pipeline_df(md_data, interps) | |
| per_llm_meth_df = _per_llm_methodology_df(md_data, interps) | |
| regex_hits_df = _regex_hits_df(md_data, interps) | |
| pattern_info = _regex_pattern_info() | |
| refine_df = _refinement_df(rl) | |
| # ββ NEW: methodology-CSV outputs βββββββββββββββββββββββββββββββββββββββββ | |
| comp_sheets = r.get("comp_technique_sheets", {1:[], 2:[], 3:[], 4:[]}) | |
| jct = r.get("journal_crosstab", {}) | |
| tech_opt_log = r.get("technique_opt_log", []) | |
| tech_s1 = _tech_sheet_df(comp_sheets.get(1,[])) | |
| tech_s2 = _tech_sheet_df(comp_sheets.get(2,[])) | |
| tech_s3 = _tech_sheet_df(comp_sheets.get(3,[])) | |
| tech_s4 = _tech_sheet_df(comp_sheets.get(4,[])) | |
| tech_llm_chart = _tech_llm_pct_chart(comp_sheets) | |
| jct_chart = _journal_crosstab_chart(jct) | |
| jct_df = _journal_crosstab_df(jct) | |
| per_llm_freq_df = _per_llm_freq_df(jct) | |
| tech_opt_df = _tech_opt_df(tech_opt_log) | |
| # ββ NEW: cluster sizes, reproducibility, interpretability βββββββββββββββββ | |
| cluster_sizes_fig = _cluster_sizes_chart(interps, disc) | |
| repro_chart = _reproducibility_chart(td, interps) | |
| repro_df = _reproducibility_df(td, interps) | |
| interpretability_df = _interpretability_df(interps) | |
| progress(1.0, desc="β Done!") | |
| dl_files = [f for f in [sp.get(1),sp.get(2),sp.get(3),sp.get(4),r.get("json_path")] if f] | |
| return ( | |
| # ββ original outputs (order preserved) βββββββββββββββββββββββββββββββ | |
| summary, fig, pfig, tl_show, cdf, | |
| top_papers_df, | |
| method_chart, method_sum_df, extraction_df, per_llm_meth_df, | |
| regex_hits_df, pattern_info, | |
| refine_df, | |
| s1, s2, s3, s4, | |
| dl_files if dl_files else None, | |
| mdf, | |
| # ββ new outputs βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| tech_llm_chart, | |
| tech_s1, tech_s2, tech_s3, tech_s4, | |
| per_llm_freq_df, | |
| jct_chart, | |
| jct_df, | |
| tech_opt_df, | |
| # ββ supervisor additions ββββββββββββββββββββββββββββββββββββββββββββββ | |
| cluster_sizes_fig, | |
| repro_chart, | |
| repro_df, | |
| interpretability_df, | |
| ) | |
| # ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| css = ".gradio-container{background:#0d1117!important;color:#c9d1d9!important}" \ | |
| "footer{display:none!important}" | |
| with gr.Blocks(theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate"), | |
| css=css, title="SPECTER-2 Topic Analyzer") as demo: | |
| gr.Markdown("# π SPECTER-2 Topic Analyzer") | |
| with gr.Row(): | |
| # ββ Left sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Corpus CSV") | |
| file_in = gr.File(label="Upload Scopus CSV (title + abstract)", | |
| file_types=[".csv"]) | |
| preview_out = gr.Markdown("Upload a CSV to see stats.") | |
| gr.Markdown("### π¬ Methodology CSV *(optional)*") | |
| method_file_in = gr.File(label="Upload Methodology CSV (title, doi, methodology)", | |
| file_types=[".csv"]) | |
| method_preview = gr.Markdown("Upload methodology CSV to enable technique analysis.") | |
| gr.Markdown("### π API Keys") | |
| groq_in = gr.Textbox(label="Groq API Key", type="password", | |
| placeholder="or set GROQ_API_KEY env var") | |
| mistral_in = gr.Textbox(label="Mistral API Key", type="password", | |
| placeholder="or set MISTRAL_API_KEY env var") | |
| gemini_in = gr.Textbox(label="Gemini API Key", type="password", | |
| placeholder="or set GEMINI_API_KEY env var") | |
| gr.Markdown("### β Parameters") | |
| trials_in = gr.Slider(10, 100, 50, step=5, label="Optuna Trials") | |
| optimize_in = gr.Slider(1, 5, 1, step=1, | |
| label="π Optimization Passes", | |
| info="Pass 1 = no refinement. 2β5 = LLM critic audits topic labels " | |
| "AND technique labels for hallucinations + improvements.") | |
| run_btn = gr.Button("βΆ Run Full Pipeline", variant="primary", size="lg") | |
| # ββ Main panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| # ββ original tabs (order / content unchanged) βββββββββββββββββ | |
| with gr.Tab("Summary"): | |
| summary_out = gr.Markdown() | |
| with gr.Tab("2-D UMAP"): | |
| scatter_out = gr.Plot() | |
| with gr.Tab("Pareto Front"): | |
| pareto_out = gr.Plot() | |
| with gr.Tab("Trial Log"): | |
| trial_out = gr.Dataframe() | |
| with gr.Tab("Clusters"): | |
| cluster_out = gr.Dataframe() | |
| with gr.Tab("π Top 3 Papers"): | |
| gr.Markdown("### Top 3 Representative Papers per Cluster\n" | |
| "Ranked by cosine similarity to cluster centroid " | |
| "in SPECTER-2 embedding space.") | |
| top_papers_out = gr.Dataframe( | |
| headers=["Cluster","Label","Rank","Title","Abstract Snippet"], | |
| wrap=True) | |
| with gr.Tab("π¬ Cluster Methodology"): | |
| gr.Markdown("### Cluster-Level Methodology β 3-LLM Council\n" | |
| "Derived from representative abstracts per cluster. " | |
| "β₯2-LLM gate applied.") | |
| method_chart_out = gr.Plot() | |
| method_summary_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("β Cluster Extraction Pipeline"): | |
| gr.Markdown("### Full Regex + LLM Extraction Trace (per cluster)") | |
| extraction_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π€ Cluster Per-LLM Votes"): | |
| gr.Markdown("### Raw Per-LLM Methodology Votes (per cluster)") | |
| per_llm_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π Cluster Regex Hits"): | |
| gr.Markdown("### Regex Pattern Matches (per cluster)\n" | |
| "Every match with exact character span and paper number.") | |
| regex_hits_out = gr.Dataframe(wrap=True) | |
| regex_info_out = gr.Markdown() | |
| with gr.Tab("π Refinement Log"): | |
| gr.Markdown("### Topic Label Optimization Log\n" | |
| "Changes made by LLM critic per optimization pass.") | |
| refine_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("Sheet 1 β Groq"): s1_out = gr.Dataframe() | |
| with gr.Tab("Sheet 2 β Mistral"): s2_out = gr.Dataframe() | |
| with gr.Tab("Sheet 3 β Gemini"): s3_out = gr.Dataframe() | |
| with gr.Tab("Sheet 4 β Consolidated"): s4_out = gr.Dataframe() | |
| with gr.Tab("RQ Mismatch"): mismatch_out = gr.Dataframe() | |
| with gr.Tab("Downloads"): | |
| dl_out = gr.File(label="All sheet CSVs + topics.json", | |
| file_count="multiple") | |
| # ββ NEW tabs: methodology CSV pipeline ββββββββββββββββββββββββ | |
| with gr.Tab("π» Comp. Techniques β LLM % Chart"): | |
| gr.Markdown("### Computational Technique Frequency β Methodology CSV\n" | |
| "For each technique, shows the % of papers it was extracted " | |
| "from by each of the 3 LLMs independently + the consolidated " | |
| "result (β₯2-LLM gate). Bars grouped by technique.") | |
| tech_llm_chart_out = gr.Plot() | |
| with gr.Tab("π» Tech Sheet 1 β Groq"): | |
| gr.Markdown("### Groq raw technique extraction β one row per paper") | |
| tech_s1_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π» Tech Sheet 2 β Mistral"): | |
| gr.Markdown("### Mistral raw technique extraction β one row per paper") | |
| tech_s2_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π» Tech Sheet 3 β Gemini"): | |
| gr.Markdown("### Gemini raw technique extraction β one row per paper") | |
| tech_s3_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π» Tech Sheet 4 β Consolidated"): | |
| gr.Markdown("### Consolidated techniques β β₯2-LLM agreement, one row per paper") | |
| tech_s4_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π Tech Frequency by LLM"): | |
| gr.Markdown("### Per-LLM Technique Frequency Table\n" | |
| "% of all papers where each LLM extracted each technique. " | |
| "High variance = LLMs disagree β optimization flag.") | |
| per_llm_freq_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π Journal Cross-Tabulation"): | |
| gr.Markdown("### Technique Γ Journal Cross-Tabulation\n" | |
| "Rows = journals auto-detected from DOI/title. " | |
| "Columns = consolidated techniques. " | |
| "Values = % of papers in that journal using the technique.\n\n" | |
| "**Journals detected:** MISQ, JAIS, ISR, JMIS, PAJAIS, " | |
| "ECIS, ICIS, Other.") | |
| jct_chart_out = gr.Plot() | |
| jct_df_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π§ Technique Optimization"): | |
| gr.Markdown("### Technique Label Improvement Suggestions\n" | |
| "Groq critic flags: hallucination, high inter-LLM variance " | |
| "(>15% gap), split/merge recommendations.\n" | |
| "Only runs when Optimization Passes β₯ 2.") | |
| tech_opt_out = gr.Dataframe(wrap=True) | |
| # ββ Supervisor-requested additions ββββββββββββββββββββββββββββ | |
| with gr.Tab("π Cluster Sizes"): | |
| gr.Markdown( | |
| "### Cluster Sizes (Papers per Cluster)\n" | |
| "Exact chart your supervisor highlighted. " | |
| "**Green** = passes both discipline rules (mass β€ 25%, size β₯ 5). " | |
| "**Yellow** = cluster exceeds 25% mass cap β dominant cluster warning. " | |
| "**Red** = cluster has fewer than 5 papers β too small.\n\n" | |
| "The orange dashed line marks the 25% cap. Any bar above it " | |
| "will fail the discipline check and the pipeline will re-optimise." | |
| ) | |
| cluster_sizes_out = gr.Plot() | |
| with gr.Tab("π Reproducibility"): | |
| gr.Markdown( | |
| "### Reproducibility β 'Run Again and Again, Topic List is the Same'\n\n" | |
| "Your supervisor wants proof that running the pipeline multiple times " | |
| "produces the **same clusters**. This tab shows two measures:\n\n" | |
| "**Overall ARI Stability** (top row) β Adjusted Rand Index averaged " | |
| "across 3 random seeds. ARI = 1.0 means identical clusters every run. " | |
| "ARI β₯ 0.8 is considered stable for publication.\n\n" | |
| "**Cluster Persistence** (per row) β how strongly each cluster's " | |
| "structure is preserved in the condensed HDBSCAN tree. " | |
| "High persistence β cluster survives parameter variation β " | |
| "same label will appear on re-run. " | |
| "Low persistence β cluster may split or merge β label may change.\n\n" | |
| "π’ β₯ 0.7 Stable Β· π‘ 0.4β0.7 Borderline Β· π΄ < 0.4 Fragile" | |
| ) | |
| repro_chart_out = gr.Plot() | |
| repro_df_out = gr.Dataframe(wrap=True) | |
| with gr.Tab("π§ Interpretability Check"): | |
| gr.Markdown( | |
| "### Human Interpretability Check β 'Topic List Must Be Distinct'\n\n" | |
| "Your supervisor flagged that labels like " | |
| "*'Cybersecurity and Privacy'* and *'Cyber-Risk Management and Online Security'* " | |
| "look like the same topic. This tab automatically detects:\n\n" | |
| "**β Label Overlap** β pairs of cluster labels sharing β₯ 2 significant " | |
| "words (noise words like 'and', 'for', 'in' excluded). " | |
| "Overlapping labels suggest the two clusters may cover the same theme " | |
| "and should be reviewed for merging.\n\n" | |
| "**β Too Vague** β labels where all meaningful words are generic " | |
| "('systems', 'digital', 'data') with no domain-specific content. " | |
| "These need the optimization pass to refine them.\n\n" | |
| "**Action column** tells you exactly what to do for each flag." | |
| ) | |
| interpretability_out = gr.Dataframe(wrap=True) | |
| # ββ Wire callbacks ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| file_in.change(_preview, inputs=[file_in], outputs=[preview_out]) | |
| method_file_in.change(_preview_methodology, inputs=[method_file_in], outputs=[method_preview]) | |
| run_btn.click( | |
| _run, | |
| inputs=[file_in, method_file_in, groq_in, mistral_in, gemini_in, | |
| trials_in, optimize_in], | |
| outputs=[ | |
| # original | |
| summary_out, scatter_out, pareto_out, trial_out, cluster_out, | |
| top_papers_out, | |
| method_chart_out, method_summary_out, extraction_out, per_llm_out, | |
| regex_hits_out, regex_info_out, | |
| refine_out, | |
| s1_out, s2_out, s3_out, s4_out, | |
| dl_out, mismatch_out, | |
| # new | |
| tech_llm_chart_out, | |
| tech_s1_out, tech_s2_out, tech_s3_out, tech_s4_out, | |
| per_llm_freq_out, | |
| jct_chart_out, | |
| jct_df_out, | |
| tech_opt_out, | |
| # supervisor additions | |
| cluster_sizes_out, | |
| repro_chart_out, | |
| repro_df_out, | |
| interpretability_out, | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |