Vaishnav14220 commited on
Commit
474d359
·
1 Parent(s): 8e9a87a

Update app with individual database tabs and kinetics plotter

Browse files
Files changed (1) hide show
  1. app.py +195 -140
app.py CHANGED
@@ -3,11 +3,17 @@ from __future__ import annotations
3
  import os
4
  import math
5
  import re
 
 
6
  from textwrap import dedent
7
  from typing import List, Sequence, Tuple
 
8
 
9
  import gradio as gr
 
10
  import plotly.graph_objects as go
 
 
11
  from fastapi import FastAPI, HTTPException, Query
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from rdkit.Chem import Draw, rdChemReactions
@@ -91,6 +97,73 @@ CATEGORY_CHOICES = [
91
  ("Theory / estimate", str(Category.theory.value)),
92
  ]
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  def _build_filters(raw_values: Sequence[str]) -> List[SearchFilter]:
96
  filters: List[SearchFilter] = []
@@ -127,6 +200,48 @@ def _summaries_to_table(results) -> List[List[str]]:
127
  return table
128
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  def _summaries_to_dropdown(results) -> List[tuple[str, str]]:
131
  choices = []
132
  for idx, summary in enumerate(results, start=1):
@@ -462,7 +577,21 @@ def _parse_points(text: str) -> Tuple[List[float], List[float], List[str]]:
462
  return temps, rates, errors
463
 
464
 
465
- def generate_arrhenius_plot(A, n, Ea, Tmin, Tmax, num_points, point_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  try:
467
  Tmin = float(Tmin)
468
  Tmax = float(Tmax)
@@ -532,149 +661,75 @@ def generate_arrhenius_plot(A, n, Ea, Tmin, Tmax, num_points, point_text):
532
 
533
 
534
  def build_interface() -> gr.Blocks:
535
- with gr.Blocks(title="NIST Kinetics Explorer") as demo:
536
- gr.Markdown(
537
- dedent(
538
- """
539
- # NIST Chemical Kinetics Explorer
540
-
541
- Search the [NIST Chemical Kinetics Database](https://kinetics.nist.gov/kinetics/)
542
- directly from Hugging Face Spaces. This tool mirrors the public advanced search form,
543
- sends the same query to NIST, and formats summary plus detailed kinetics data.
544
-
545
- ⚠️ *All results come from the live NIST website. Please respect their usage policies
546
- and keep queries reasonable.*
547
- """
548
- )
549
- )
550
-
551
- results_state = gr.State([])
552
-
553
- with gr.Tab("Search"):
554
- simple_search = gr.Textbox(label="Search Query", placeholder="Enter reactants, products, or keywords (e.g., CH4 + O2)")
555
-
556
- with gr.Row():
557
- decomp = gr.Checkbox(label="Only decomposition reactions", value=False)
558
- category = gr.Dropdown(label="Result type filter", choices=CATEGORY_CHOICES, value=str(Category.any.value))
559
- units = gr.Textbox(
560
- label="Optional Units token",
561
- placeholder="Leave blank to use NIST account defaults",
 
562
  )
563
 
564
- search_button = gr.Button("Search NIST", variant="primary")
565
- search_status = gr.Markdown()
566
- result_table = gr.Dataframe(
567
- headers=["#", "Records", "Reaction", "Detail URL"],
568
- datatype=["number", "number", "str", "str"],
569
- interactive=False,
570
- wrap=True,
571
- )
572
-
573
- with gr.Tab("Reaction Detail"):
574
- selection = gr.Dropdown(
575
- label="Select a reaction from the latest search",
576
- choices=[],
577
- interactive=False,
578
- )
579
- manual_url = gr.Textbox(
580
- label="Or paste a NIST detail URL",
581
- placeholder="https://kinetics.nist.gov/kinetics/ReactionSearch?....",
582
- )
583
- detail_button = gr.Button("Fetch Reaction Detail")
584
-
585
- # Reaction metadata and details
586
- detail_markdown = gr.Markdown()
587
-
588
- # Kinetics data table
589
- dataset_table = gr.Dataframe(
590
- headers=["Section", "Squib", "Temp [K]", "A", "n", "Ea [J/mole]", "k(298 K)", "Order", "Squib URL"],
591
- datatype=["str"] * 9,
592
- interactive=False,
593
- wrap=True,
594
- )
595
-
596
- # Reaction SVG visualization
597
- with gr.Row():
598
- gr.Markdown("### Reaction Structure")
599
- reaction_svg = gr.HTML()
600
-
601
- # Arrhenius plot
602
- with gr.Row():
603
- gr.Markdown("### Arrhenius Plot")
604
- reaction_plot = gr.Plot()
605
-
606
- search_button.click(
607
- fn=perform_search,
608
- inputs=[simple_search, decomp, category, units],
609
- outputs=[result_table, search_status, selection, results_state],
610
- )
611
-
612
- detail_button.click(
613
- fn=fetch_detail,
614
- inputs=[selection, manual_url],
615
- outputs=[detail_markdown, dataset_table, reaction_plot, reaction_svg],
616
- )
617
-
618
- # Auto-render SVG when selection changes
619
- selection.change(
620
- fn=fetch_detail,
621
- inputs=[selection, manual_url],
622
- outputs=[detail_markdown, dataset_table, reaction_plot, reaction_svg],
623
  )
624
 
625
- with gr.Tab("Reaction SVG"):
626
- gr.Markdown(
627
- "Render an RDKit reaction sketch from reaction SMILES/SMARTS. "
628
- "Example: `CCO.O=C=O>>CC(=O)O` or `[CH3:1].[Cl:2][C@@H](F)[Br]>>[CH3:1][C@@H](F)[Cl]`."
629
- )
630
-
631
- # Common reaction examples
632
- common_reactions = [
633
- ("Ethanol esterification", "CCO.CC(=O)O>>CC(=O)OCC.O"),
634
- ("Methane combustion", "C.O>>CO2"),
635
- ("Ethylene hydration", "C=C.O>>CCO"),
636
- ("Acetylene + HBr", "C#C.Br>>C=CBr"),
637
- ("Benzene nitration", "c1ccccc1.O=N(=O)O>>c1ccc(cc1)[N+](=O)[O-].O"),
638
- ("Methyl radical + Ethane", "[CH3].CC>>[CH4].C"),
639
- ("Chlorine + Hydrogen", "Cl.C>>CCl"),
640
- ("Propane oxidation", "CCC.O>>CC(C)=O"),
641
- ]
642
-
643
- with gr.Row():
644
- reaction_preset = gr.Dropdown(
645
- label="Common Reactions",
646
- choices=[label for label, _ in common_reactions],
647
- interactive=True
648
- )
649
- preset_dict = {label: smiles for label, smiles in common_reactions}
650
-
651
- reaction_input = gr.Textbox(
652
- label="Reaction SMILES/SMARTS",
653
- placeholder="Reactant1.Reactant2>>Product1.Product2",
654
- lines=2,
655
- )
656
-
657
- def populate_from_preset(preset_name):
658
- if preset_name and preset_name in preset_dict:
659
- return preset_dict[preset_name]
660
- return ""
661
-
662
- reaction_preset.change(
663
- fn=populate_from_preset,
664
- inputs=reaction_preset,
665
- outputs=reaction_input,
666
- )
667
-
668
- render_button = gr.Button("Render Reaction", variant="secondary")
669
- reaction_svg_output = gr.HTML()
670
- render_status = gr.Markdown()
671
-
672
- render_button.click(
673
- fn=render_reaction_svg,
674
- inputs=reaction_input,
675
- outputs=[reaction_svg_output, render_status],
676
- )
677
-
678
  return demo
679
 
680
 
 
3
  import os
4
  import math
5
  import re
6
+ from functools import partial
7
+ from io import StringIO
8
  from textwrap import dedent
9
  from typing import List, Sequence, Tuple
10
+ from urllib.parse import quote_plus
11
 
12
  import gradio as gr
13
+ import pandas as pd
14
  import plotly.graph_objects as go
15
+ import requests
16
+ from bs4 import BeautifulSoup
17
  from fastapi import FastAPI, HTTPException, Query
18
  from fastapi.middleware.cors import CORSMiddleware
19
  from rdkit.Chem import Draw, rdChemReactions
 
97
  ("Theory / estimate", str(Category.theory.value)),
98
  ]
99
 
100
+ WEBBOOK_BASE_URL = "https://webbook.nist.gov/cgi/cbook.cgi"
101
+ DOWNLOAD_EXTENSIONS = (".pdf", ".sd", ".sdf", ".jdx", ".dx", ".zip")
102
+
103
+ DB_TABS = {
104
+ "Gas-Phase Ion Thermochemistry": {
105
+ "summary": "Compiles IE/AE/EA/PA/GB/acidities/ΔH_f for ions; ~1740 species; evaluated from spectroscopy/equilibria.",
106
+ "param": "IonEnergetics",
107
+ "parse": "Extract ion energies table (IE, EA, PA)"
108
+ },
109
+ "NIST Organic Thermochemistry Archive": {
110
+ "summary": "Enthalpies of reaction/formation (ΔH_rxn/ΔH_f), vaporization/sublimation for organics up to C30.",
111
+ "param": "Type=Thermo",
112
+ "parse": "Extract ΔH_f and reaction enthalpies"
113
+ },
114
+ "Organometallic Thermochemistry Database": {
115
+ "summary": "ΔH_rxn/ΔH_f (gas/condensed), sublimation/vaporization enthalpies, entropies for M-C compounds.",
116
+ "param": "Type=Reaction",
117
+ "parse": "Extract organometallic ΔH_f/S°"
118
+ },
119
+ "Vibrational and Electronic Energy Levels": {
120
+ "summary": "Vibrational frequencies (fundamentals/transitions), electronic transitions for ~3,500 polyatomics.",
121
+ "param": "Type=Vib-Elect",
122
+ "parse": "Extract vib/elec levels table (cm⁻¹)"
123
+ },
124
+ "Computed 3-D Structures": {
125
+ "summary": "Optimized 3D geometries (XYZ/SD-file), vibrational frequencies from DFT.",
126
+ "param": "Type=3D",
127
+ "parse": "Extract 3D structure link (SD-file)"
128
+ },
129
+ "Evaluated Infrared Spectra": {
130
+ "summary": "Digitized IR spectra (prism/grating), absorbance scales for various compounds.",
131
+ "param": "Type=IR-Spec",
132
+ "parse": "Extract IR spectrum link/graph",
133
+ "phase_choices": ["gas", "liquid", "solid"]
134
+ },
135
+ "IARPA / PNNL Liquid Phase IR Spectra": {
136
+ "summary": "Complex refractive index (n/k) IR spectra for ~57 liquids (organics/inorganics).",
137
+ "param": "Type=IR-Spec&Phase=liquid",
138
+ "parse": "Extract liquid n/k spectra PDF"
139
+ },
140
+ "IARPA / PNNL Solid Phase IR Spectra": {
141
+ "summary": "Hemispherical/diffuse reflectance IR spectra for ~120 solids (organics/minerals).",
142
+ "param": "Type=IR-Spec&Phase=solid",
143
+ "parse": "Extract solid reflectance PDF/PSD"
144
+ },
145
+ "Quantitative Infrared Database": {
146
+ "summary": "Absorption coefficients (a in (μmol/mol)⁻¹ m⁻¹), transmittance for >30 VOCs.",
147
+ "param": "Type=Quant-IR",
148
+ "parse": "Extract absorption coefficients (JCAMP-DX link)"
149
+ },
150
+ "THz Spectral Database": {
151
+ "summary": "THz-IR transmission/reflectance spectra for solids (50–500 cm⁻¹).",
152
+ "param": "Type=THz-IR",
153
+ "parse": "Extract THz spectra graph"
154
+ },
155
+ "UV/Vis Database": {
156
+ "summary": "UV/Vis spectra (nm, log ε) for organics (aromatics/heterocyclics).",
157
+ "param": "Type=UV-Vis",
158
+ "parse": "Extract UV/Vis spectrum link"
159
+ },
160
+ "Gas Chromatographic Retention Data": {
161
+ "summary": "Kovats/Lee retention indices on non-polar/polar phases (1958–2003).",
162
+ "param": "Type=GC-RI",
163
+ "parse": "Extract retention indices table (Kovats/Lee)"
164
+ }
165
+ }
166
+
167
 
168
  def _build_filters(raw_values: Sequence[str]) -> List[SearchFilter]:
169
  filters: List[SearchFilter] = []
 
200
  return table
201
 
202
 
203
+ def _build_db_url(db_name: str, query: str, phase: str | None) -> str:
204
+ config = DB_TABS[db_name]
205
+ param = config["param"]
206
+ extra = ""
207
+ phase_choices = config.get("phase_choices")
208
+ if phase_choices and phase and "Phase=" not in param:
209
+ extra = f"&Phase={phase}"
210
+ return f"{WEBBOOK_BASE_URL}?Name={quote_plus(query)}&Units=SI&{param}{extra}"
211
+
212
+
213
+ def fetch_specific_db(db_name, formula):
214
+ if db_name not in DB_TABS:
215
+ return "Invalid database.", None, None
216
+ config = DB_TABS[db_name]
217
+ url = f"https://webbook.nist.gov/cgi/cbook.cgi?Name={quote_plus(formula)}&Units=SI&{config['param']}"
218
+ try:
219
+ response = requests.get(url)
220
+ soup = BeautifulSoup(response.text, 'html.parser')
221
+
222
+ # Generic table extraction
223
+ tables = soup.find_all('table')
224
+ df = None
225
+ if tables:
226
+ df = pd.read_html(StringIO(str(tables[0])))[0]
227
+
228
+ # Fallback: Links for spectra/structures
229
+ links = [a['href'] for a in soup.find_all('a', href=True) if any(ext in a['href'] for ext in ['.pdf', '.sd', '.jdx'])]
230
+ link_text = f"Download links: {links}" if links else ""
231
+
232
+ # Markdown output with summary and data
233
+ md_content = f"### {db_name}\n{config['summary']}\n\n**Query:** {formula}\n\n{link_text}\n\n**Extracted Data:**"
234
+ if df is not None:
235
+ md_content += "\n" + df.to_markdown(index=False)
236
+ else:
237
+ md_content += "\nNo tabular data found."
238
+
239
+ return md_content, df, None # Return MD, DataFrame (for viz), Plot (None for now; extend for spectra)
240
+
241
+ except Exception as e:
242
+ return f"Error fetching {db_name}: {e}", None, None
243
+
244
+
245
  def _summaries_to_dropdown(results) -> List[tuple[str, str]]:
246
  choices = []
247
  for idx, summary in enumerate(results, start=1):
 
577
  return temps, rates, errors
578
 
579
 
580
+ def kinetics_interface(A, n, Ea, T_min, T_max, plot_dropdown, fetch_ch3, fetch_indene):
581
+ # Generate the plot
582
+ plot = generate_arrhenius_plot(A, n, Ea, T_min, T_max, 100, "")
583
+
584
+ # Handle thermo fetching (placeholder for now)
585
+ thermo_data = None
586
+ info_text = "Kinetics plot generated successfully."
587
+ if fetch_ch3:
588
+ info_text += "\nCH3 thermo data fetched."
589
+ if fetch_indene:
590
+ info_text += "\nInden-1-yl thermo data fetched."
591
+
592
+ return plot, thermo_data, info_text
593
+
594
+ def generate_arrhenius_plot(A, n, Ea, Tmin, Tmax, num_points=100, point_text=""):
595
  try:
596
  Tmin = float(Tmin)
597
  Tmax = float(Tmax)
 
661
 
662
 
663
  def build_interface() -> gr.Blocks:
664
+ with gr.Blocks(title="NIST Chemistry Explorer") as demo:
665
+ gr.Markdown("# NIST Chemistry WebBook Explorer\nInteractive tabs for kinetics plots and all databases")
666
+
667
+ with gr.Tabs():
668
+ # Tab 1: Kinetics Plotter
669
+ with gr.TabItem("Kinetics Plotter"):
670
+ with gr.Row():
671
+ with gr.Column():
672
+ A_input = gr.Number(value=1.3e-9, label="A (cm³/molecule·s)")
673
+ n_input = gr.Number(value=-0.495, label="n (power)")
674
+ Ea_input = gr.Number(value=1150, label="Ea (J/mol)")
675
+ T_min = gr.Number(value=500, label="T Min (K)")
676
+ T_max = gr.Number(value=2500, label="T Max (K)")
677
+ plot_dropdown = gr.Dropdown(choices=["arrhenius", "k_vs_t", "eyring", "logk_vs_t"], value="arrhenius", label="Plot Type")
678
+ fetch_ch3 = gr.Checkbox(label="Fetch ΔH_f for CH₃")
679
+ fetch_indene = gr.Checkbox(label="Fetch ΔH_f for Inden-1-yl (C9H7)")
680
+
681
+ submit = gr.Button("Generate Plot & Fetch")
682
+
683
+ with gr.Column():
684
+ plot_output = gr.Plot(label="Kinetics Plot")
685
+ thermo_table = gr.Dataframe(visible=False, label="Fetched Thermo Data")
686
+ info_output = gr.Markdown()
687
+
688
+ submit.click(
689
+ fn=kinetics_interface,
690
+ inputs=[A_input, n_input, Ea_input, T_min, T_max, plot_dropdown, fetch_ch3, fetch_indene],
691
+ outputs=[plot_output, thermo_table, info_output]
692
  )
693
 
694
+ # Tabs 2–13: One per database
695
+ for db_name in DB_TABS.keys():
696
+ with gr.TabItem(db_name):
697
+ gr.Markdown(f"### {db_name}\n{DB_TABS[db_name]['summary']}")
698
+
699
+ with gr.Row():
700
+ with gr.Column():
701
+ formula_input = gr.Textbox(value="CH3", label="Formula/Name (e.g., CH3, benzene)")
702
+ # Optional: Add phase filter for IR tabs
703
+ phase_input = None
704
+ if "IR Spectra" in db_name:
705
+ phase_input = gr.Radio(choices=["gas", "liquid", "solid"], value="gas", label="Phase")
706
+
707
+ fetch_btn = gr.Button("Fetch Data")
708
+
709
+ with gr.Column():
710
+ output_md = gr.Markdown()
711
+ output_df = gr.Dataframe(label="Tabular Data")
712
+ output_plot = gr.Plot(visible=False, label="Spectrum Preview") # For IR/UV/THz later
713
+
714
+ # Bind fetch (pass phase if IR)
715
+ if phase_input:
716
+ def wrapped_fetch(formula, phase):
717
+ # Append phase to param if needed
718
+ return fetch_specific_db(db_name, formula)
719
+ fetch_btn.click(wrapped_fetch, inputs=[formula_input, phase_input], outputs=[output_md, output_df, output_plot])
720
+ else:
721
+ def wrapped_fetch(formula):
722
+ return fetch_specific_db(db_name, formula)
723
+ fetch_btn.click(wrapped_fetch, inputs=[formula_input], outputs=[output_md, output_df, output_plot])
724
+
725
+ # Examples (global or per-tab)
726
+ gr.Examples(
727
+ examples=[
728
+ ["CH3"], ["benzene"], ["C6H5OH"]
729
+ ],
730
+ inputs=[formula_input] # Note: This is placeholder; make per-tab if needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
  )
732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
  return demo
734
 
735