Spaces:

phonix-db
/

phonix-mcp-server

Running

App Files Files Community

mohnishi commited on 28 days ago

Commit

362a906

1 Parent(s): bc41dfd

Fix: switch to FastAPI MCP, remove gradio dependency

Browse files

Files changed (3) hide show

app.py +41 -253
do.sh +4 -0
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -1,22 +1,18 @@
 """
 Phonix Database MCP Server
-HuggingFace Spaces deployment (Gradio + MCP)
-Dataset: phonix-db/phonix-summary
 """
-import gradio as gr
 from mcp.server.fastmcp import FastMCP
 from datasets import load_dataset
 import pandas as pd
 import json
-# ── MCP Server Initialization ──────────────────────────────────────────
 mcp = FastMCP(
     "Phonix Database",
     instructions="""
     Phonix is a first-principles database for anharmonic phonon interactions.
-    It contains ~17,000 calculations of lattice thermal conductivity and related properties.
     Available tools:
     - search_by_formula   : Search by chemical formula (e.g. "Si", "MgO", "BeTe")
@@ -28,7 +24,6 @@ mcp = FastMCP(
     """
 )
-# ── Dataset Loading (with Cache) ───────────────────────────────────────
 _df: pd.DataFrame | None = None
 def get_df() -> pd.DataFrame:
@@ -38,10 +33,7 @@ def get_df() -> pd.DataFrame:
         _df = ds.to_pandas()
     return _df
-# ── Helper Functions ──────────────────────────────────────────────────
 def _serialize(df: pd.DataFrame, max_rows: int = 50) -> str:
-    """Serialize DataFrame to JSON (omit 'structure' column)"""
     cols = [c for c in df.columns if c != "structure"]
     subset = df[cols].head(max_rows)
     return json.dumps({
@@ -50,42 +42,27 @@ def _serialize(df: pd.DataFrame, max_rows: int = 50) -> str:
         "entries": subset.where(pd.notna(subset), None).to_dict(orient="records")
     }, ensure_ascii=False, indent=2)
-# ── MCP Tool Definitions ──────────────────────────────────────────────
 @mcp.tool()
 def search_by_formula(formula: str) -> str:
-    """
-    Search entries by chemical formula (partial match, case-insensitive).
     Args:
-        formula: Chemical formula or element symbol, e.g. "Si", "MgO", "BeTe", "LaP"
-    Returns:
-        JSON with matched entries (up to 50 rows).
     """
     df = get_df()
     mask = df["formula"].str.contains(formula, case=False, na=False)
-    result = df[mask]
-    return _serialize(result)
 @mcp.tool()
 def search_by_elements(elements: list[str]) -> str:
-    """
-    Search entries that contain ALL specified elements.
     Args:
-        elements: List of element symbols, e.g. ["Si", "O"] for silicon oxides
-    Returns:
-        JSON with matched entries (up to 50 rows).
     """
     df = get_df()
     mask = pd.Series([True] * len(df), index=df.index)
     for el in elements:
         mask &= df["formula"].str.contains(el, case=False, na=False)
-    result = df[mask]
-    return _serialize(result)
 @mcp.tool()
 def filter_by_kappa(
@@ -93,61 +70,43 @@ def filter_by_kappa(
     max_klat: float | None = None,
     only_converged: bool = True
 ) -> str:
-    """
-    Filter entries by lattice thermal conductivity klat [W/mK].
     Args:
         min_klat: Minimum klat value in W/mK (optional)
         max_klat: Maximum klat value in W/mK (optional)
         only_converged: If True, exclude entries where klat is null (default: True)
-    Returns:
-        JSON with matched entries sorted by klat descending (up to 50 rows).
     """
     df = get_df()
     result = df.copy()
     if only_converged:
         result = result[result["klat[W/mK]"].notna()]
     if min_klat is not None:
         result = result[result["klat[W/mK]"] >= min_klat]
     if max_klat is not None:
         result = result[result["klat[W/mK]"] <= max_klat]
     result = result.sort_values("klat[W/mK]", ascending=False)
     return _serialize(result)
 @mcp.tool()
 def filter_by_spacegroup(spg_number: int) -> str:
-    """
-    Filter entries by space group number.
     Args:
-        spg_number: International space group number (1–230), e.g. 225 for Fm-3m, 227 for Fd-3m
-    Returns:
-        JSON with matched entries (up to 50 rows).
     """
     df = get_df()
-    result = df[df["spg_number"] == spg_number]
-    return _serialize(result)
 @mcp.tool()
 def get_entry(input_dir: str) -> str:
-    """
-    Get full details for a specific calculation entry, including structure data.
     Args:
-        input_dir: The input_dir identifier (e.g. "mp-149", "mp-149-2", "mp-24")
-    Returns:
-        JSON with all columns including structure data.
     """
     df = get_df()
     result = df[df["input_dir"] == input_dir]
     if result.empty:
         return json.dumps({"error": f"Entry '{input_dir}' not found."})
     row = result.iloc[0].where(pd.notna(result.iloc[0]), None).to_dict()
-    # Parse structure if it is a JSON string
     if row.get("structure") and isinstance(row["structure"], str):
         try:
             row["structure"] = json.loads(row["structure"])
@@ -155,207 +114,36 @@ def get_entry(input_dir: str) -> str:
             pass
     return json.dumps(row, ensure_ascii=False, indent=2)
 @mcp.tool()
 def list_columns() -> str:
-    """
-    List all available columns in the Phonix summary database with descriptions.
-    Returns:
-        JSON with column names and descriptions.
-    """
     columns = {
-        "mp_id":            "Materials Project ID (e.g. mp-149 for Si diamond)",
-        "input_dir":        "Unique calculation directory name (use this for get_entry)",
-        "formula":          "Chemical formula (e.g. Si, MgO, BeTe)",
-        "spg_number":       "International space group number (1–230)",
-        "natoms_prim":      "Number of atoms in primitive cell",
-        "natoms_conv":      "Number of atoms in conventional cell",
-        "natoms_sc":        "Number of atoms in supercell used for fc2/fc3",
-        "trans_conv2prim":  "Transformation matrix from conventional to primitive cell",
-        "trans_conv2sc":    "Transformation matrix from conventional to supercell",
-        "structure":        "Crystal structure (ASE-compatible JSON: cell, positions, symbols)",
-        "volume[A^3]":      "Cell volume in cubic angstroms",
-        "nac":              "Non-analytical correction flag (0 or 1)",
-        "volume_relaxation":"Volume relaxation flag (0=fixed, 1=relaxed)",
-        "scph":             "Self-consistent phonon (SCPH) flag",
-        "four":             "4th-order force constants flag",
-        "kappa_type":       "Thermal conductivity calculation type (e.g. '3ph')",
-        "qmesh":            "q-point mesh for BTE (e.g. '21x21x21')",
-        "kp[W/mK]":         "Lattice thermal conductivity - p (off-diagonal) contribution [W/mK]",
-        "kc[W/mK]":         "Lattice thermal conductivity - c (coherence) contribution [W/mK]",
-        "klat[W/mK]":       "Total lattice thermal conductivity (kp+kc) [W/mK]",
-        "min_phfreq[cm^-1]":"Minimum phonon frequency [cm^-1] (negative = imaginary mode)",
-        "max_phfreq[cm^-1]":"Maximum phonon frequency [cm^-1]",
-        "fc2_error[%]":     "2nd-order force constants fitting error [%]",
-        "fc3_error[%]":     "3rd-order force constants fitting error [%]",
-        "calc_time[sec]":   "Total calculation time [seconds]",
     }
     return json.dumps(columns, ensure_ascii=False, indent=2)
-# ── Gradio UI ────────────────────────────────��────────────────────
-CSS = """
-@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Inter:wght@300;400;600&display=swap');
-body, .gradio-container {
-    background: #0a0e1a !important;
-    color: #c8d6e5 !important;
-    font-family: 'Inter', sans-serif !important;
-}
-h1, h2, h3 { font-family: 'Space Mono', monospace !important; }
-.phonix-header {
-    text-align: center;
-    padding: 2rem 1rem 1.5rem;
-    border-bottom: 1px solid #1e2d45;
-    margin-bottom: 1.5rem;
-}
-.phonix-title {
-    font-family: 'Space Mono', monospace;
-    font-size: 2rem;
-    font-weight: 700;
-    color: #4fc3f7;
-    letter-spacing: 0.08em;
-    margin: 0;
-}
-.phonix-subtitle {
-    color: #607d8b;
-    font-size: 0.85rem;
-    margin-top: 0.4rem;
-    letter-spacing: 0.04em;
-}
-.mcp-badge {
-    display: inline-block;
-    background: #0d47a1;
-    color: #90caf9;
-    font-family: 'Space Mono', monospace;
-    font-size: 0.7rem;
-    padding: 0.2rem 0.6rem;
-    border-radius: 3px;
-    margin-top: 0.6rem;
-    letter-spacing: 0.06em;
-}
-.stat-bar {
-    display: flex;
-    gap: 1.5rem;
-    justify-content: center;
-    padding: 0.8rem;
-    background: #0f1829;
-    border-radius: 6px;
-    margin-bottom: 1.5rem;
-    flex-wrap: wrap;
-}
-.stat-item {
-    text-align: center;
-}
-.stat-value {
-    font-family: 'Space Mono', monospace;
-    font-size: 1.2rem;
-    color: #4fc3f7;
-    display: block;
-}
-.stat-label {
-    font-size: 0.7rem;
-    color: #546e7a;
-    text-transform: uppercase;
-    letter-spacing: 0.05em;
-}
-.gr-button-primary {
-    background: #0d47a1 !important;
-    border: 1px solid #1565c0 !important;
-    font-family: 'Space Mono', monospace !important;
-}
-.gr-button-primary:hover {
-    background: #1565c0 !important;
-}
-label { color: #90a4ae !important; font-size: 0.8rem !important; }
-textarea, input[type="text"], input[type="number"] {
-    background: #0f1829 !important;
-    border: 1px solid #1e2d45 !important;
-    color: #c8d6e5 !important;
-    font-family: 'Space Mono', monospace !important;
-    font-size: 0.85rem !important;
-}
-"""
-def ui_search_formula(formula):
-    return search_by_formula(formula)
-def ui_filter_kappa(min_k, max_k, converged):
-    return filter_by_kappa(
-        min_klat=float(min_k) if min_k else None,
-        max_klat=float(max_k) if max_k else None,
-        only_converged=converged
-    )
-def ui_get_entry(input_dir):
-    return get_entry(input_dir.strip())
-with gr.Blocks(css=CSS, title="Phonix Database MCP") as demo:
-    gr.HTML("""
-    <div class="phonix-header">
-      <p class="phonix-title">⟨ PHONIX DATABASE ⟩</p>
-      <p class="phonix-subtitle">Database for Anharmonic Phonon Interactions · First-Principles</p>
-      <span class="mcp-badge">MCP SERVER ACTIVE</span>
-    </div>
-    """)
-    gr.HTML("""
-    <div class="stat-bar">
-      <div class="stat-item"><span class="stat-value">~17,300</span><span class="stat-label">Calculations</span></div>
-      <div class="stat-item"><span class="stat-value">klat</span><span class="stat-label">Thermal Conductivity</span></div>
-      <div class="stat-item"><span class="stat-value">3ph/SCPH</span><span class="stat-label">Methods</span></div>
-      <div class="stat-item"><span class="stat-value">CC BY 4.0</span><span class="stat-label">License</span></div>
-    </div>
-    """)
-    with gr.Tabs():
-        with gr.Tab("🔍 Formula Search"):
-            formula_in = gr.Textbox(label="Chemical Formula", placeholder="Si, MgO, BeTe, LaP7 ...")
-            formula_btn = gr.Button("Search", variant="primary")
-            formula_out = gr.Code(language="json", label="Results")
-            formula_btn.click(ui_search_formula, inputs=formula_in, outputs=formula_out)
-        with gr.Tab("🌡️ κ Filter"):
-            with gr.Row():
-                min_k = gr.Number(label="Min klat [W/mK]", value=None)
-                max_k = gr.Number(label="Max klat [W/mK]", value=None)
-            converged = gr.Checkbox(label="Only converged (klat not null)", value=True)
-            kappa_btn = gr.Button("Filter", variant="primary")
-            kappa_out = gr.Code(language="json", label="Results")
-            kappa_btn.click(ui_filter_kappa, inputs=[min_k, max_k, converged], outputs=kappa_out)
-        with gr.Tab("📋 Entry Detail"):
-            entry_in = gr.Textbox(label="input_dir", placeholder="mp-149, mp-149-2, mp-24 ...")
-            entry_btn = gr.Button("Get Entry", variant="primary")
-            entry_out = gr.Code(language="json", label="Full Entry Data")
-            entry_btn.click(ui_get_entry, inputs=entry_in, outputs=entry_out)
-        with gr.Tab("ℹ️ Column Guide"):
-            col_btn = gr.Button("Show Column Descriptions", variant="primary")
-            col_out = gr.Code(language="json", label="Columns")
-            col_btn.click(lambda: list_columns(), outputs=col_out)
-    gr.Markdown(
-        "**MCP Endpoint**: `https://phonix-db-phonix-mcp-server.hf.space/gradio_api/mcp/sse`  \n"
-        "Dataset: [phonix-db/phonix-summary](https://huggingface.co/datasets/phonix-db/phonix-summary) · "
-        "[phonix-db.org](https://phonix-db.org)",
-        elem_classes=["phonix-subtitle"]
-    )
-# Launch MCP + Gradio simultaneously
-demo.launch(mcp_server=True)

 """
 Phonix Database MCP Server
+HuggingFace Spaces deployment (FastAPI + MCP over SSE)
 """
 from mcp.server.fastmcp import FastMCP
 from datasets import load_dataset
 import pandas as pd
 import json
 mcp = FastMCP(
     "Phonix Database",
     instructions="""
     Phonix is a first-principles database for anharmonic phonon interactions.
+    ~17,000 calculations of lattice thermal conductivity and related properties.
     Available tools:
     - search_by_formula   : Search by chemical formula (e.g. "Si", "MgO", "BeTe")
     """
 )
 _df: pd.DataFrame | None = None
 def get_df() -> pd.DataFrame:
         _df = ds.to_pandas()
     return _df
 def _serialize(df: pd.DataFrame, max_rows: int = 50) -> str:
     cols = [c for c in df.columns if c != "structure"]
     subset = df[cols].head(max_rows)
     return json.dumps({
         "entries": subset.where(pd.notna(subset), None).to_dict(orient="records")
     }, ensure_ascii=False, indent=2)
 @mcp.tool()
 def search_by_formula(formula: str) -> str:
+    """Search entries by chemical formula (partial match, case-insensitive).
     Args:
+        formula: Chemical formula or element symbol, e.g. "Si", "MgO", "BeTe"
     """
     df = get_df()
     mask = df["formula"].str.contains(formula, case=False, na=False)
+    return _serialize(df[mask])
 @mcp.tool()
 def search_by_elements(elements: list[str]) -> str:
+    """Search entries that contain ALL specified elements.
     Args:
+        elements: List of element symbols, e.g. ["Si", "O"]
     """
     df = get_df()
     mask = pd.Series([True] * len(df), index=df.index)
     for el in elements:
         mask &= df["formula"].str.contains(el, case=False, na=False)
+    return _serialize(df[mask])
 @mcp.tool()
 def filter_by_kappa(
     max_klat: float | None = None,
     only_converged: bool = True
 ) -> str:
+    """Filter entries by lattice thermal conductivity klat [W/mK].
     Args:
         min_klat: Minimum klat value in W/mK (optional)
         max_klat: Maximum klat value in W/mK (optional)
         only_converged: If True, exclude entries where klat is null (default: True)
     """
     df = get_df()
     result = df.copy()
     if only_converged:
         result = result[result["klat[W/mK]"].notna()]
     if min_klat is not None:
         result = result[result["klat[W/mK]"] >= min_klat]
     if max_klat is not None:
         result = result[result["klat[W/mK]"] <= max_klat]
     result = result.sort_values("klat[W/mK]", ascending=False)
     return _serialize(result)
 @mcp.tool()
 def filter_by_spacegroup(spg_number: int) -> str:
+    """Filter entries by space group number.
     Args:
+        spg_number: International space group number (1-230)
     """
     df = get_df()
+    return _serialize(df[df["spg_number"] == spg_number])
 @mcp.tool()
 def get_entry(input_dir: str) -> str:
+    """Get full details for a specific calculation entry, including structure data.
     Args:
+        input_dir: The input_dir identifier (e.g. "mp-149", "mp-149-2")
     """
     df = get_df()
     result = df[df["input_dir"] == input_dir]
     if result.empty:
         return json.dumps({"error": f"Entry '{input_dir}' not found."})
     row = result.iloc[0].where(pd.notna(result.iloc[0]), None).to_dict()
     if row.get("structure") and isinstance(row["structure"], str):
         try:
             row["structure"] = json.loads(row["structure"])
             pass
     return json.dumps(row, ensure_ascii=False, indent=2)
 @mcp.tool()
 def list_columns() -> str:
+    """List all available columns in the Phonix summary database with descriptions."""
     columns = {
+        "mp_id":             "Materials Project ID (e.g. mp-149 for Si diamond)",
+        "input_dir":         "Unique calculation directory name (use for get_entry)",
+        "formula":           "Chemical formula (e.g. Si, MgO, BeTe)",
+        "spg_number":        "International space group number (1-230)",
+        "natoms_prim":       "Number of atoms in primitive cell",
+        "natoms_conv":       "Number of atoms in conventional cell",
+        "natoms_sc":         "Number of atoms in supercell for fc2/fc3",
+        "structure":         "Crystal structure JSON (cell, positions, symbols)",
+        "volume[A^3]":       "Cell volume in cubic angstroms",
+        "nac":               "Non-analytical correction flag (0 or 1)",
+        "volume_relaxation": "Volume relaxation flag (0=fixed, 1=relaxed)",
+        "scph":              "Self-consistent phonon (SCPH) flag",
+        "four":              "4th-order force constants flag",
+        "kappa_type":        "Thermal conductivity calculation type (e.g. '3ph')",
+        "qmesh":             "q-point mesh for BTE (e.g. '21x21x21')",
+        "kp[W/mK]":          "Lattice thermal conductivity p-contribution [W/mK]",
+        "kc[W/mK]":          "Lattice thermal conductivity c-contribution [W/mK]",
+        "klat[W/mK]":        "Total lattice thermal conductivity (kp+kc) [W/mK]",
+        "min_phfreq[cm^-1]": "Minimum phonon frequency (negative = imaginary mode)",
+        "max_phfreq[cm^-1]": "Maximum phonon frequency [cm^-1]",
+        "fc2_error[%]":      "2nd-order force constants fitting error [%]",
+        "fc3_error[%]":      "3rd-order force constants fitting error [%]",
+        "calc_time[sec]":    "Total calculation time [seconds]",
     }
     return json.dumps(columns, ensure_ascii=False, indent=2)
+if __name__ == "__main__":
+    # HuggingFace Spaces uses PORT 7860
+    mcp.run(transport="sse", host="0.0.0.0", port=7860)

do.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+git add .
+git commit -m "Fix: switch to FastAPI MCP, remove gradio dependency"
+git push

requirements.txt CHANGED Viewed

@@ -1,4 +1,7 @@
-gradio[mcp]>=5.0.0
 mcp[cli]>=1.0.0
 datasets>=2.18.0
 pandas>=2.0.0

+gradio==4.44.1
 mcp[cli]>=1.0.0
 datasets>=2.18.0
 pandas>=2.0.0
+pydub==0.25.1
+pyaudioop==0.1.0