mohnishi commited on
Commit
362a906
·
1 Parent(s): bc41dfd

Fix: switch to FastAPI MCP, remove gradio dependency

Browse files
Files changed (3) hide show
  1. app.py +41 -253
  2. do.sh +4 -0
  3. requirements.txt +4 -1
app.py CHANGED
@@ -1,22 +1,18 @@
1
  """
2
  Phonix Database MCP Server
3
- HuggingFace Spaces deployment (Gradio + MCP)
4
-
5
- Dataset: phonix-db/phonix-summary
6
  """
7
 
8
- import gradio as gr
9
  from mcp.server.fastmcp import FastMCP
10
  from datasets import load_dataset
11
  import pandas as pd
12
  import json
13
 
14
- # ── MCP Server Initialization ──────────────────────────────────────────
15
  mcp = FastMCP(
16
  "Phonix Database",
17
  instructions="""
18
  Phonix is a first-principles database for anharmonic phonon interactions.
19
- It contains ~17,000 calculations of lattice thermal conductivity and related properties.
20
 
21
  Available tools:
22
  - search_by_formula : Search by chemical formula (e.g. "Si", "MgO", "BeTe")
@@ -28,7 +24,6 @@ mcp = FastMCP(
28
  """
29
  )
30
 
31
- # ── Dataset Loading (with Cache) ───────────────────────────────────────
32
  _df: pd.DataFrame | None = None
33
 
34
  def get_df() -> pd.DataFrame:
@@ -38,10 +33,7 @@ def get_df() -> pd.DataFrame:
38
  _df = ds.to_pandas()
39
  return _df
40
 
41
-
42
- # ── Helper Functions ──────────────────────────────────────────────────
43
  def _serialize(df: pd.DataFrame, max_rows: int = 50) -> str:
44
- """Serialize DataFrame to JSON (omit 'structure' column)"""
45
  cols = [c for c in df.columns if c != "structure"]
46
  subset = df[cols].head(max_rows)
47
  return json.dumps({
@@ -50,42 +42,27 @@ def _serialize(df: pd.DataFrame, max_rows: int = 50) -> str:
50
  "entries": subset.where(pd.notna(subset), None).to_dict(orient="records")
51
  }, ensure_ascii=False, indent=2)
52
 
53
-
54
- # ── MCP Tool Definitions ──────────────────────────────────────────────
55
-
56
  @mcp.tool()
57
  def search_by_formula(formula: str) -> str:
58
- """
59
- Search entries by chemical formula (partial match, case-insensitive).
60
-
61
  Args:
62
- formula: Chemical formula or element symbol, e.g. "Si", "MgO", "BeTe", "LaP"
63
- Returns:
64
- JSON with matched entries (up to 50 rows).
65
  """
66
  df = get_df()
67
  mask = df["formula"].str.contains(formula, case=False, na=False)
68
- result = df[mask]
69
- return _serialize(result)
70
-
71
 
72
  @mcp.tool()
73
  def search_by_elements(elements: list[str]) -> str:
74
- """
75
- Search entries that contain ALL specified elements.
76
-
77
  Args:
78
- elements: List of element symbols, e.g. ["Si", "O"] for silicon oxides
79
- Returns:
80
- JSON with matched entries (up to 50 rows).
81
  """
82
  df = get_df()
83
  mask = pd.Series([True] * len(df), index=df.index)
84
  for el in elements:
85
  mask &= df["formula"].str.contains(el, case=False, na=False)
86
- result = df[mask]
87
- return _serialize(result)
88
-
89
 
90
  @mcp.tool()
91
  def filter_by_kappa(
@@ -93,61 +70,43 @@ def filter_by_kappa(
93
  max_klat: float | None = None,
94
  only_converged: bool = True
95
  ) -> str:
96
- """
97
- Filter entries by lattice thermal conductivity klat [W/mK].
98
-
99
  Args:
100
  min_klat: Minimum klat value in W/mK (optional)
101
  max_klat: Maximum klat value in W/mK (optional)
102
  only_converged: If True, exclude entries where klat is null (default: True)
103
- Returns:
104
- JSON with matched entries sorted by klat descending (up to 50 rows).
105
  """
106
  df = get_df()
107
  result = df.copy()
108
-
109
  if only_converged:
110
  result = result[result["klat[W/mK]"].notna()]
111
  if min_klat is not None:
112
  result = result[result["klat[W/mK]"] >= min_klat]
113
  if max_klat is not None:
114
  result = result[result["klat[W/mK]"] <= max_klat]
115
-
116
  result = result.sort_values("klat[W/mK]", ascending=False)
117
  return _serialize(result)
118
 
119
-
120
  @mcp.tool()
121
  def filter_by_spacegroup(spg_number: int) -> str:
122
- """
123
- Filter entries by space group number.
124
-
125
  Args:
126
- spg_number: International space group number (1230), e.g. 225 for Fm-3m, 227 for Fd-3m
127
- Returns:
128
- JSON with matched entries (up to 50 rows).
129
  """
130
  df = get_df()
131
- result = df[df["spg_number"] == spg_number]
132
- return _serialize(result)
133
-
134
 
135
  @mcp.tool()
136
  def get_entry(input_dir: str) -> str:
137
- """
138
- Get full details for a specific calculation entry, including structure data.
139
-
140
  Args:
141
- input_dir: The input_dir identifier (e.g. "mp-149", "mp-149-2", "mp-24")
142
- Returns:
143
- JSON with all columns including structure data.
144
  """
145
  df = get_df()
146
  result = df[df["input_dir"] == input_dir]
147
  if result.empty:
148
  return json.dumps({"error": f"Entry '{input_dir}' not found."})
149
  row = result.iloc[0].where(pd.notna(result.iloc[0]), None).to_dict()
150
- # Parse structure if it is a JSON string
151
  if row.get("structure") and isinstance(row["structure"], str):
152
  try:
153
  row["structure"] = json.loads(row["structure"])
@@ -155,207 +114,36 @@ def get_entry(input_dir: str) -> str:
155
  pass
156
  return json.dumps(row, ensure_ascii=False, indent=2)
157
 
158
-
159
  @mcp.tool()
160
  def list_columns() -> str:
161
- """
162
- List all available columns in the Phonix summary database with descriptions.
163
-
164
- Returns:
165
- JSON with column names and descriptions.
166
- """
167
  columns = {
168
- "mp_id": "Materials Project ID (e.g. mp-149 for Si diamond)",
169
- "input_dir": "Unique calculation directory name (use this for get_entry)",
170
- "formula": "Chemical formula (e.g. Si, MgO, BeTe)",
171
- "spg_number": "International space group number (1230)",
172
- "natoms_prim": "Number of atoms in primitive cell",
173
- "natoms_conv": "Number of atoms in conventional cell",
174
- "natoms_sc": "Number of atoms in supercell used for fc2/fc3",
175
- "trans_conv2prim": "Transformation matrix from conventional to primitive cell",
176
- "trans_conv2sc": "Transformation matrix from conventional to supercell",
177
- "structure": "Crystal structure (ASE-compatible JSON: cell, positions, symbols)",
178
- "volume[A^3]": "Cell volume in cubic angstroms",
179
- "nac": "Non-analytical correction flag (0 or 1)",
180
- "volume_relaxation":"Volume relaxation flag (0=fixed, 1=relaxed)",
181
- "scph": "Self-consistent phonon (SCPH) flag",
182
- "four": "4th-order force constants flag",
183
- "kappa_type": "Thermal conductivity calculation type (e.g. '3ph')",
184
- "qmesh": "q-point mesh for BTE (e.g. '21x21x21')",
185
- "kp[W/mK]": "Lattice thermal conductivity - p (off-diagonal) contribution [W/mK]",
186
- "kc[W/mK]": "Lattice thermal conductivity - c (coherence) contribution [W/mK]",
187
- "klat[W/mK]": "Total lattice thermal conductivity (kp+kc) [W/mK]",
188
- "min_phfreq[cm^-1]":"Minimum phonon frequency [cm^-1] (negative = imaginary mode)",
189
- "max_phfreq[cm^-1]":"Maximum phonon frequency [cm^-1]",
190
- "fc2_error[%]": "2nd-order force constants fitting error [%]",
191
- "fc3_error[%]": "3rd-order force constants fitting error [%]",
192
- "calc_time[sec]": "Total calculation time [seconds]",
193
  }
194
  return json.dumps(columns, ensure_ascii=False, indent=2)
195
 
196
-
197
- # ── Gradio UI ────────────────────────────────��────────────────────
198
- CSS = """
199
- @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Inter:wght@300;400;600&display=swap');
200
-
201
- body, .gradio-container {
202
- background: #0a0e1a !important;
203
- color: #c8d6e5 !important;
204
- font-family: 'Inter', sans-serif !important;
205
- }
206
-
207
- h1, h2, h3 { font-family: 'Space Mono', monospace !important; }
208
-
209
- .phonix-header {
210
- text-align: center;
211
- padding: 2rem 1rem 1.5rem;
212
- border-bottom: 1px solid #1e2d45;
213
- margin-bottom: 1.5rem;
214
- }
215
-
216
- .phonix-title {
217
- font-family: 'Space Mono', monospace;
218
- font-size: 2rem;
219
- font-weight: 700;
220
- color: #4fc3f7;
221
- letter-spacing: 0.08em;
222
- margin: 0;
223
- }
224
-
225
- .phonix-subtitle {
226
- color: #607d8b;
227
- font-size: 0.85rem;
228
- margin-top: 0.4rem;
229
- letter-spacing: 0.04em;
230
- }
231
-
232
- .mcp-badge {
233
- display: inline-block;
234
- background: #0d47a1;
235
- color: #90caf9;
236
- font-family: 'Space Mono', monospace;
237
- font-size: 0.7rem;
238
- padding: 0.2rem 0.6rem;
239
- border-radius: 3px;
240
- margin-top: 0.6rem;
241
- letter-spacing: 0.06em;
242
- }
243
-
244
- .stat-bar {
245
- display: flex;
246
- gap: 1.5rem;
247
- justify-content: center;
248
- padding: 0.8rem;
249
- background: #0f1829;
250
- border-radius: 6px;
251
- margin-bottom: 1.5rem;
252
- flex-wrap: wrap;
253
- }
254
-
255
- .stat-item {
256
- text-align: center;
257
- }
258
-
259
- .stat-value {
260
- font-family: 'Space Mono', monospace;
261
- font-size: 1.2rem;
262
- color: #4fc3f7;
263
- display: block;
264
- }
265
-
266
- .stat-label {
267
- font-size: 0.7rem;
268
- color: #546e7a;
269
- text-transform: uppercase;
270
- letter-spacing: 0.05em;
271
- }
272
-
273
- .gr-button-primary {
274
- background: #0d47a1 !important;
275
- border: 1px solid #1565c0 !important;
276
- font-family: 'Space Mono', monospace !important;
277
- }
278
-
279
- .gr-button-primary:hover {
280
- background: #1565c0 !important;
281
- }
282
-
283
- label { color: #90a4ae !important; font-size: 0.8rem !important; }
284
-
285
- textarea, input[type="text"], input[type="number"] {
286
- background: #0f1829 !important;
287
- border: 1px solid #1e2d45 !important;
288
- color: #c8d6e5 !important;
289
- font-family: 'Space Mono', monospace !important;
290
- font-size: 0.85rem !important;
291
- }
292
- """
293
-
294
- def ui_search_formula(formula):
295
- return search_by_formula(formula)
296
-
297
- def ui_filter_kappa(min_k, max_k, converged):
298
- return filter_by_kappa(
299
- min_klat=float(min_k) if min_k else None,
300
- max_klat=float(max_k) if max_k else None,
301
- only_converged=converged
302
- )
303
-
304
- def ui_get_entry(input_dir):
305
- return get_entry(input_dir.strip())
306
-
307
- with gr.Blocks(css=CSS, title="Phonix Database MCP") as demo:
308
-
309
- gr.HTML("""
310
- <div class="phonix-header">
311
- <p class="phonix-title">⟨ PHONIX DATABASE ⟩</p>
312
- <p class="phonix-subtitle">Database for Anharmonic Phonon Interactions · First-Principles</p>
313
- <span class="mcp-badge">MCP SERVER ACTIVE</span>
314
- </div>
315
- """)
316
-
317
- gr.HTML("""
318
- <div class="stat-bar">
319
- <div class="stat-item"><span class="stat-value">~17,300</span><span class="stat-label">Calculations</span></div>
320
- <div class="stat-item"><span class="stat-value">klat</span><span class="stat-label">Thermal Conductivity</span></div>
321
- <div class="stat-item"><span class="stat-value">3ph/SCPH</span><span class="stat-label">Methods</span></div>
322
- <div class="stat-item"><span class="stat-value">CC BY 4.0</span><span class="stat-label">License</span></div>
323
- </div>
324
- """)
325
-
326
- with gr.Tabs():
327
- with gr.Tab("🔍 Formula Search"):
328
- formula_in = gr.Textbox(label="Chemical Formula", placeholder="Si, MgO, BeTe, LaP7 ...")
329
- formula_btn = gr.Button("Search", variant="primary")
330
- formula_out = gr.Code(language="json", label="Results")
331
- formula_btn.click(ui_search_formula, inputs=formula_in, outputs=formula_out)
332
-
333
- with gr.Tab("🌡️ κ Filter"):
334
- with gr.Row():
335
- min_k = gr.Number(label="Min klat [W/mK]", value=None)
336
- max_k = gr.Number(label="Max klat [W/mK]", value=None)
337
- converged = gr.Checkbox(label="Only converged (klat not null)", value=True)
338
- kappa_btn = gr.Button("Filter", variant="primary")
339
- kappa_out = gr.Code(language="json", label="Results")
340
- kappa_btn.click(ui_filter_kappa, inputs=[min_k, max_k, converged], outputs=kappa_out)
341
-
342
- with gr.Tab("📋 Entry Detail"):
343
- entry_in = gr.Textbox(label="input_dir", placeholder="mp-149, mp-149-2, mp-24 ...")
344
- entry_btn = gr.Button("Get Entry", variant="primary")
345
- entry_out = gr.Code(language="json", label="Full Entry Data")
346
- entry_btn.click(ui_get_entry, inputs=entry_in, outputs=entry_out)
347
-
348
- with gr.Tab("ℹ️ Column Guide"):
349
- col_btn = gr.Button("Show Column Descriptions", variant="primary")
350
- col_out = gr.Code(language="json", label="Columns")
351
- col_btn.click(lambda: list_columns(), outputs=col_out)
352
-
353
- gr.Markdown(
354
- "**MCP Endpoint**: `https://phonix-db-phonix-mcp-server.hf.space/gradio_api/mcp/sse` \n"
355
- "Dataset: [phonix-db/phonix-summary](https://huggingface.co/datasets/phonix-db/phonix-summary) · "
356
- "[phonix-db.org](https://phonix-db.org)",
357
- elem_classes=["phonix-subtitle"]
358
- )
359
-
360
- # Launch MCP + Gradio simultaneously
361
- demo.launch(mcp_server=True)
 
1
  """
2
  Phonix Database MCP Server
3
+ HuggingFace Spaces deployment (FastAPI + MCP over SSE)
 
 
4
  """
5
 
 
6
  from mcp.server.fastmcp import FastMCP
7
  from datasets import load_dataset
8
  import pandas as pd
9
  import json
10
 
 
11
  mcp = FastMCP(
12
  "Phonix Database",
13
  instructions="""
14
  Phonix is a first-principles database for anharmonic phonon interactions.
15
+ ~17,000 calculations of lattice thermal conductivity and related properties.
16
 
17
  Available tools:
18
  - search_by_formula : Search by chemical formula (e.g. "Si", "MgO", "BeTe")
 
24
  """
25
  )
26
 
 
27
  _df: pd.DataFrame | None = None
28
 
29
  def get_df() -> pd.DataFrame:
 
33
  _df = ds.to_pandas()
34
  return _df
35
 
 
 
36
  def _serialize(df: pd.DataFrame, max_rows: int = 50) -> str:
 
37
  cols = [c for c in df.columns if c != "structure"]
38
  subset = df[cols].head(max_rows)
39
  return json.dumps({
 
42
  "entries": subset.where(pd.notna(subset), None).to_dict(orient="records")
43
  }, ensure_ascii=False, indent=2)
44
 
 
 
 
45
  @mcp.tool()
46
  def search_by_formula(formula: str) -> str:
47
+ """Search entries by chemical formula (partial match, case-insensitive).
 
 
48
  Args:
49
+ formula: Chemical formula or element symbol, e.g. "Si", "MgO", "BeTe"
 
 
50
  """
51
  df = get_df()
52
  mask = df["formula"].str.contains(formula, case=False, na=False)
53
+ return _serialize(df[mask])
 
 
54
 
55
  @mcp.tool()
56
  def search_by_elements(elements: list[str]) -> str:
57
+ """Search entries that contain ALL specified elements.
 
 
58
  Args:
59
+ elements: List of element symbols, e.g. ["Si", "O"]
 
 
60
  """
61
  df = get_df()
62
  mask = pd.Series([True] * len(df), index=df.index)
63
  for el in elements:
64
  mask &= df["formula"].str.contains(el, case=False, na=False)
65
+ return _serialize(df[mask])
 
 
66
 
67
  @mcp.tool()
68
  def filter_by_kappa(
 
70
  max_klat: float | None = None,
71
  only_converged: bool = True
72
  ) -> str:
73
+ """Filter entries by lattice thermal conductivity klat [W/mK].
 
 
74
  Args:
75
  min_klat: Minimum klat value in W/mK (optional)
76
  max_klat: Maximum klat value in W/mK (optional)
77
  only_converged: If True, exclude entries where klat is null (default: True)
 
 
78
  """
79
  df = get_df()
80
  result = df.copy()
 
81
  if only_converged:
82
  result = result[result["klat[W/mK]"].notna()]
83
  if min_klat is not None:
84
  result = result[result["klat[W/mK]"] >= min_klat]
85
  if max_klat is not None:
86
  result = result[result["klat[W/mK]"] <= max_klat]
 
87
  result = result.sort_values("klat[W/mK]", ascending=False)
88
  return _serialize(result)
89
 
 
90
  @mcp.tool()
91
  def filter_by_spacegroup(spg_number: int) -> str:
92
+ """Filter entries by space group number.
 
 
93
  Args:
94
+ spg_number: International space group number (1-230)
 
 
95
  """
96
  df = get_df()
97
+ return _serialize(df[df["spg_number"] == spg_number])
 
 
98
 
99
  @mcp.tool()
100
  def get_entry(input_dir: str) -> str:
101
+ """Get full details for a specific calculation entry, including structure data.
 
 
102
  Args:
103
+ input_dir: The input_dir identifier (e.g. "mp-149", "mp-149-2")
 
 
104
  """
105
  df = get_df()
106
  result = df[df["input_dir"] == input_dir]
107
  if result.empty:
108
  return json.dumps({"error": f"Entry '{input_dir}' not found."})
109
  row = result.iloc[0].where(pd.notna(result.iloc[0]), None).to_dict()
 
110
  if row.get("structure") and isinstance(row["structure"], str):
111
  try:
112
  row["structure"] = json.loads(row["structure"])
 
114
  pass
115
  return json.dumps(row, ensure_ascii=False, indent=2)
116
 
 
117
  @mcp.tool()
118
  def list_columns() -> str:
119
+ """List all available columns in the Phonix summary database with descriptions."""
 
 
 
 
 
120
  columns = {
121
+ "mp_id": "Materials Project ID (e.g. mp-149 for Si diamond)",
122
+ "input_dir": "Unique calculation directory name (use for get_entry)",
123
+ "formula": "Chemical formula (e.g. Si, MgO, BeTe)",
124
+ "spg_number": "International space group number (1-230)",
125
+ "natoms_prim": "Number of atoms in primitive cell",
126
+ "natoms_conv": "Number of atoms in conventional cell",
127
+ "natoms_sc": "Number of atoms in supercell for fc2/fc3",
128
+ "structure": "Crystal structure JSON (cell, positions, symbols)",
129
+ "volume[A^3]": "Cell volume in cubic angstroms",
130
+ "nac": "Non-analytical correction flag (0 or 1)",
131
+ "volume_relaxation": "Volume relaxation flag (0=fixed, 1=relaxed)",
132
+ "scph": "Self-consistent phonon (SCPH) flag",
133
+ "four": "4th-order force constants flag",
134
+ "kappa_type": "Thermal conductivity calculation type (e.g. '3ph')",
135
+ "qmesh": "q-point mesh for BTE (e.g. '21x21x21')",
136
+ "kp[W/mK]": "Lattice thermal conductivity p-contribution [W/mK]",
137
+ "kc[W/mK]": "Lattice thermal conductivity c-contribution [W/mK]",
138
+ "klat[W/mK]": "Total lattice thermal conductivity (kp+kc) [W/mK]",
139
+ "min_phfreq[cm^-1]": "Minimum phonon frequency (negative = imaginary mode)",
140
+ "max_phfreq[cm^-1]": "Maximum phonon frequency [cm^-1]",
141
+ "fc2_error[%]": "2nd-order force constants fitting error [%]",
142
+ "fc3_error[%]": "3rd-order force constants fitting error [%]",
143
+ "calc_time[sec]": "Total calculation time [seconds]",
 
 
144
  }
145
  return json.dumps(columns, ensure_ascii=False, indent=2)
146
 
147
+ if __name__ == "__main__":
148
+ # HuggingFace Spaces uses PORT 7860
149
+ mcp.run(transport="sse", host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
do.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ git add .
2
+ git commit -m "Fix: switch to FastAPI MCP, remove gradio dependency"
3
+ git push
4
+
requirements.txt CHANGED
@@ -1,4 +1,7 @@
1
- gradio[mcp]>=5.0.0
2
  mcp[cli]>=1.0.0
3
  datasets>=2.18.0
4
  pandas>=2.0.0
 
 
 
 
1
+ gradio==4.44.1
2
  mcp[cli]>=1.0.0
3
  datasets>=2.18.0
4
  pandas>=2.0.0
5
+ pydub==0.25.1
6
+ pyaudioop==0.1.0
7
+