Spaces:

Aluode
/

neuromorphic-molecular-solver

Sleeping

App Files Files Community

Aluode commited on Oct 14, 2025

Commit

0f19e3e

verified ·

1 Parent(s): 437ba3a

Upload 2 files

Browse files

Files changed (2) hide show

app.py +191 -0
molecular_constraint_solver.py +228 -0

app.py ADDED Viewed

	@@ -0,0 +1,191 @@

+# app.py
+import streamlit as st
+import numpy as np
+import time
+from collections import defaultdict
+import json
+import io
+try:
+    from rdkit import Chem
+    from rdkit.Chem import Draw
+    from rdkit.Chem import rdMolDraw2D
+    RDKIT_AVAILABLE = True
+except ImportError:
+    RDKIT_AVAILABLE = False
+from molecular_constraint_solver import MolecularConstraintEncoder, parse_constraints
+class SparsePhaseCalciumField3SAT:
+    def __init__(self, N_vars, clauses, seed=42, K=0.87, eta=0.045,
+                 prune_rate=0.005, noise=0.03, DT=0.003, drive=14.28, solver_steps=300):
+        np.random.seed(seed)
+        self.N, self.M, self.clauses = N_vars, len(clauses), clauses
+        self.K, self.eta, self.prune_rate, self.noise, self.DT = K, eta, prune_rate, noise, DT
+        self.drive, self.max_steps = drive, solver_steps
+        self.phases, self.clause_weights = np.random.uniform(0, 2 * np.pi, N_vars), np.ones(self.M)
+        self.W = defaultdict(dict)
+        for _ in range(min(self.N * 2, 20000)):
+            i, j = np.random.randint(0, self.N, 2)
+            if i != j: self.W[i][j] = np.random.uniform(0.01, 0.05)
+        self.history = {'satisfaction': []}
+    def get_assignment(self): return np.cos(self.phases) > 0
+    def evaluate_clause(self, clause, assignment):
+        for lit in clause:
+            idx = abs(lit) - 1
+            if idx >= self.N: continue
+            val = assignment[idx]
+            if (lit > 0 and val) or (lit < 0 and not val): return True
+        return False
+    def compute_satisfaction(self, assignment=None):
+        if assignment is None: assignment = self.get_assignment()
+        if self.M == 0: return 1.0
+        return sum(1 for c in self.clauses if self.evaluate_clause(c, assignment)) / self.M
+    def step(self):
+        dphi, assignment = np.zeros(self.N), self.get_assignment()
+        for idx, clause in enumerate(self.clauses):
+            if not self.evaluate_clause(clause, assignment):
+                self.clause_weights[idx] = min(self.clause_weights[idx] + 0.02, 5.0)
+                lit = clause[np.random.randint(len(clause))]
+                idx_var = abs(lit) - 1
+                if idx_var >= self.N: continue
+                target = 0.0 if lit > 0 else np.pi
+                dphi[idx_var] += self.drive * self.clause_weights[idx] * np.sin(target - self.phases[idx_var])
+        for i in self.W:
+            for j, w in self.W[i].items():
+                p_diff = self.phases[j] - self.phases[i]
+                dphi[i] += self.K * w * np.sin(p_diff)
+                dphi[j] -= self.K * w * np.sin(p_diff)
+        dphi += self.noise * np.random.randn(self.N)
+        self.phases = np.mod(self.phases + self.DT * dphi, 2 * np.pi)
+        if np.random.rand() < 0.1:
+            for _ in range(20):
+                i, j = np.random.randint(0, self.N, 2)
+                if i != j and np.cos(self.phases[i] - self.phases[j]) > 0.98:
+                    self.W[i][j] = min(1.0, self.W[i].get(j, 0.0) + self.eta)
+            if self.W:
+                s = np.random.choice(list(self.W.keys()))
+                if self.W[s]:
+                    t = np.random.choice(list(self.W[s].keys()))
+                    self.W[s][t] *= (1 - self.prune_rate)
+                    if self.W[s][t] < 0.01: del self.W[s][t]
+        self.history['satisfaction'].append(self.compute_satisfaction())
+def draw_molecule_from_structure(s_dict):
+    if not RDKIT_AVAILABLE:
+        atoms = s_dict.get('atoms', [])
+        bonds = s_dict.get('bonds', [])
+        if not atoms: return "No atoms to draw."
+        adj = {a['id']: [] for a in atoms}
+        for b in bonds:
+            adj[b['from']].append(b['to'])
+            adj[b['to']].append(b['from'])
+        lines = [f"{a['id']:02d} {a['element']:>2} -> {', '.join(map(str, adj[a['id']]))}" for a in atoms]
+        return "\n".join(lines)
+    try:
+        mol = Chem.RWMol()
+        atom_map = {}
+        for info in s_dict.get('atoms', []):
+            atom = Chem.Atom(info['element'])
+            idx = mol.AddAtom(atom)
+            atom_map[info['id']] = idx
+        for bond in s_dict.get('bonds', []):
+            a, b = bond['from'], bond['to']
+            if a in atom_map and b in atom_map:
+                mol.AddBond(atom_map[a], atom_map[b], Chem.BondType.SINGLE)
+        if mol.GetNumAtoms() == 0: return None
+        rdkit_idx_to_original_id = {v: k for k, v in atom_map.items()}
+        drawer = rdMolDraw2D.MolDraw2DCairo(300, 300)
+        opts = drawer.drawOptions()
+        for idx in range(mol.GetNumAtoms()):
+            original_id = rdkit_idx_to_original_id.get(idx, '?')
+            symbol = mol.GetAtomWithIdx(idx).GetSymbol()
+            opts.atomLabels[idx] = f"{original_id}:{symbol}"
+        rdMolDraw2D.PrepareAndDrawMolecule(drawer, mol)
+        drawer.FinishDrawing()
+        png = drawer.GetDrawingText()
+        from PIL import Image
+        return Image.open(io.BytesIO(png))
+    except Exception as e:
+        return f"RDKit drawing failed: {e}"
+st.set_page_config(page_title="Molecular Constraint Solver", layout="wide", page_icon="🧬")
+st.markdown("""<style>.main-header{font-size:3rem;color:#1f77b4;text-align:center}.sub-header{font-size:1.2rem;color:#666;text-align:center;margin-bottom:2rem}</style>""", unsafe_allow_html=True)
+st.markdown('<div class="main-header">🧬 Molecular Constraint Solver</div>', unsafe_allow_html=True)
+st.markdown('<div class="sub-header">Generate molecular graphs satisfying hard constraints via neuromorphic 3-SAT solving</div>', unsafe_allow_html=True)
+st.sidebar.header("Constraint Configuration")
+st.sidebar.subheader("Chemical Properties")
+aromatic_rings = st.sidebar.slider("Aromatic Rings", 0, 5, 1)
+max_mw = st.sidebar.slider("Maximum Molecular Weight (Da)", 200, 700, 500, step=10)
+forbidden_groups = st.sidebar.multiselect("Forbidden Functional Groups:", ['nitro', 'azide', 'peroxide'], [])
+st.sidebar.subheader("Additional Constraints")
+min_atoms = st.sidebar.slider("Minimum atom count", 0, 30, 10, help="Forces the molecule to have at least this many atoms.")
+synthesizable = st.sidebar.checkbox("Synthesizable", value=False)
+max_atoms = 30
+st.sidebar.subheader("Solver Parameters")
+n_molecules = st.sidebar.slider("Number of molecules to generate", 1, 50, 5)
+solver_steps = st.sidebar.slider("Solver Steps", 50, 1000, 300)
+drive_strength = st.sidebar.slider("Drive Strength", 10.0, 100.0, 75.0, step=5.0)
+if st.sidebar.button("🧬 Generate Molecules", type="primary"):
+    with st.spinner("Encoding constraints → Solving 3-SAT → Decoding structures..."):
+        try:
+            constraints_list = [f"aromatic_rings == {aromatic_rings}", f"molecular_weight < {max_mw}"]
+            if min_atoms > 0:
+                constraints_list.append(f"min_atoms >= {min_atoms}")
+            for group in forbidden_groups: constraints_list.append(f"NOT {group}")
+            if synthesizable: constraints_list.append("synthesizable")
+            constraints = parse_constraints(constraints_list)
+            encoder = MolecularConstraintEncoder(max_atoms=max_atoms)
+            clauses, n_vars = encoder.encode_constraints(constraints)
+            st.info(f"Generated a SAT problem with {n_vars} variables and {len(clauses)} clauses.")
+            results = []
+            progress_bar = st.progress(0, text="Generating molecules...")
+            for i in range(n_molecules):
+                solver = SparsePhaseCalciumField3SAT(
+                    N_vars=n_vars, clauses=clauses, seed=int(time.time()) + i,
+                    drive=drive_strength, solver_steps=solver_steps
+                )
+                for _ in range(solver_steps): solver.step()
+                assignment = solver.get_assignment()
+                structure = encoder.decode_solution(assignment)
+                structure['satisfaction'] = solver.compute_satisfaction()
+                structure['molecule_id'] = i + 1
+                results.append(structure)
+                progress_bar.progress((i + 1) / n_molecules)
+            st.session_state['results'] = results
+            st.success(f"Successfully generated {n_molecules} molecular structures!")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+            import traceback
+            st.code(traceback.format_exc())
+if 'results' in st.session_state:
+    results = st.session_state['results']
+    st.subheader("Generated Molecules")
+    cols = st.columns(min(len(results), 5))
+    for i, res in enumerate(results):
+        with cols[i % 5]:
+            st.metric(f"Molecule {res['molecule_id']}", f"{res['satisfaction']:.1%} sat.")
+            output = draw_molecule_from_structure(res)
+            if isinstance(output, str):
+                st.code(output)
+            elif output is not None:
+                st.image(output)
+            else:
+                st.warning("Could not draw.")
+            with st.expander("Details"):
+                st.json(res)

molecular_constraint_solver.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# molecular_constraint_solver.py
+# FINAL VERSION with corrected decoder
+import numpy as np
+from typing import List, Dict, Tuple
+from dataclasses import dataclass
+import re
+@dataclass
+class MolecularConstraint:
+    constraint_type: str
+    value: any
+    operator: str = '=='
+class MolecularConstraintEncoder:
+    def __init__(self, max_atoms=30):
+        self.max_atoms = max_atoms
+        self.max_bonds = max_atoms * (max_atoms - 1) // 2
+        self.var_offset = 1
+        self.atom_types = ['C', 'N', 'O', 'S', 'F', 'Cl', 'Br', 'P', 'H', 'None']
+        self.atom_var_start = self.var_offset
+        self.var_offset += self.max_atoms * len(self.atom_types)
+        self.bond_existence_var_start = self.var_offset
+        self.var_offset += self.max_bonds
+        self.conn_var_start = self.var_offset
+        self.var_offset += self.max_atoms
+        self.bond_types = ['single', 'double', 'triple']
+        self.bond_type_var_start = self.var_offset
+        self.var_offset += self.max_bonds * len(self.bond_types)
+        self.ring_var_start = self.var_offset
+        self.var_offset += self.max_atoms
+        self.max_rings = 10
+        self.aromatic_var_start = self.var_offset
+        self.var_offset += self.max_rings
+        self.functional_groups = ['nitro', 'azide', 'peroxide', 'aldehyde', 'ketone', 'carboxyl', 'amine', 'amide', 'ester', 'ether', 'thiol', 'sulfone', 'phosphate', 'hydroxyl', 'halogen', 'cyano', 'isocyanate', 'epoxide', 'lactone', 'quinone']
+        self.group_var_start = self.var_offset
+        self.var_offset += len(self.functional_groups)
+        self.mw_thresholds = list(range(100, 600, 10))
+        self.mw_var_start = self.var_offset
+        self.var_offset += len(self.mw_thresholds)
+    def atom_type_var(self, atom_idx, atom_type):
+        return self.atom_var_start + atom_idx * len(self.atom_types) + self.atom_types.index(atom_type)
+    def bond_existence_var(self, i, j):
+        if i == j: return -1
+        if i > j: i, j = j, i
+        idx = int(i * (self.max_atoms - (i + 1) / 2.0) + (j - i - 1))
+        return self.bond_existence_var_start + idx
+    def conn_var(self, atom_idx):
+        return self.conn_var_start + atom_idx
+    def atom_exists_lit(self, atom_idx):
+        return -self.atom_type_var(atom_idx, 'None')
+    def ring_var(self, idx): return self.ring_var_start + idx
+    def aromatic_ring_var(self, idx): return self.aromatic_var_start + idx
+    def functional_group_var(self, g): return self.group_var_start + self.functional_groups.index(g)
+    def mw_var(self, t): return self.mw_var_start + self.mw_thresholds.index(min(self.mw_thresholds, key=lambda x: abs(x-t)))
+    def encode_constraints(self, constraints: List[MolecularConstraint]) -> Tuple[List[List[int]], int]:
+        all_clauses = self._encode_structural_validity()
+        all_clauses.extend(self.encode_valence())
+        all_clauses.extend(self._encode_connectivity())
+        for constraint in constraints:
+            all_clauses.extend(self._encode_single_constraint(constraint))
+        return self._convert_to_3sat(all_clauses)
+    def _encode_connectivity(self):
+        clauses = []
+        clauses.append([self.atom_type_var(0, 'None'), self.conn_var(0)])
+        clauses.append([-self.atom_type_var(0, 'None'), -self.conn_var(0)])
+        for i in range(self.max_atoms):
+            for j in range(i + 1, self.max_atoms):
+                bond_var = self.bond_existence_var(i, j)
+                clauses.append([-self.conn_var(i), -bond_var, self.conn_var(j)])
+                clauses.append([-self.conn_var(j), -bond_var, self.conn_var(i)])
+        for i in range(self.max_atoms):
+             clauses.append([self.atom_type_var(i, 'None'), self.conn_var(i)])
+        return clauses
+    def encode_valence(self):
+        clauses = []
+        valence_rules = {'C': 4, 'N': 3, 'O': 2, 'S': 2, 'F': 1, 'Cl': 1, 'Br': 1, 'P': 3, 'H': 1}
+        for i in range(self.max_atoms):
+            bond_vars = [self.bond_existence_var(i, j) for j in range(self.max_atoms) if i != j]
+            for atom_type, val in valence_rules.items():
+                type_var = self.atom_type_var(i, atom_type)
+                if val > len(bond_vars):
+                    clauses.append([-type_var])
+                    continue
+                for cl in self._cardinality_at_least(bond_vars, val) + self._cardinality_at_most(bond_vars, val):
+                    if cl: clauses.append([-type_var] + cl)
+        return clauses
+    def _cardinality_at_least(self, V, k):
+        n = len(V)
+        if k <= 0: return []
+        if n < k: return [[1, -1]]
+        if k == 1 and n > 0: return [V]
+        clauses = []
+        s = [[self.var_offset + i * k + j for j in range(k)] for i in range(n)]
+        self.var_offset += n * k
+        clauses.append([-V[0], s[0][0]])
+        for j in range(1, k): clauses.append([-s[0][j]])
+        for i in range(1, n):
+            clauses.append([-V[i], s[i][0]])
+            clauses.append([-s[i-1][0], s[i][0]])
+            for j in range(1, k):
+                clauses.append([-V[i], -s[i-1][j-1], s[i][j]])
+                clauses.append([-s[i-1][j], s[i][j]])
+        clauses.append([s[n-1][k-1]])
+        return clauses
+    def _cardinality_at_most(self, V, k):
+        n = len(V)
+        if k < 0: return [[1, -1]]
+        if k >= n: return []
+        return self._cardinality_at_least([-v for v in V], n - k)
+    def _encode_structural_validity(self):
+        clauses = []
+        for i in range(self.max_atoms):
+            v = [self.atom_type_var(i, t) for t in self.atom_types]
+            clauses.append(v)
+            for i1 in range(len(v)):
+                for i2 in range(i1 + 1, len(v)): clauses.append([-v[i1], -v[i2]])
+        return clauses
+    def _encode_single_constraint(self, c):
+        if c.constraint_type == 'min_atoms': return self._encode_min_atoms(c.value)
+        if c.constraint_type == 'aromatic_rings': return self._encode_aromatic_rings(c.value, c.operator)
+        if c.constraint_type == 'molecular_weight': return self._encode_molecular_weight(c.value, c.operator)
+        if c.constraint_type == 'forbidden_group': return self._encode_forbidden_group(c.value)
+        if c.constraint_type == 'synthesizable': return self._encode_synthesizability()
+        return []
+    def _encode_min_atoms(self, k):
+        if k <= 0: return []
+        existence_literals = [self.atom_exists_lit(i) for i in range(self.max_atoms)]
+        return self._cardinality_at_least(existence_literals, k)
+    def _encode_aromatic_rings(self, v, o):
+        if o == '==': return [[self.aromatic_ring_var(i)] if i < v else [-self.aromatic_ring_var(i)] for i in range(self.max_rings)]
+        return []
+    def _encode_molecular_weight(self, v, o):
+        c = []
+        for i in range(len(self.mw_thresholds) - 1): c.append([-self.mw_var(self.mw_thresholds[i+1]), self.mw_var(self.mw_thresholds[i])])
+        if o == '<':
+            for t in self.mw_thresholds:
+                if t >= v: c.append([-self.mw_var(t)])
+        return c
+    def _encode_forbidden_group(self, v):
+        if v not in self.functional_groups: return []
+        return [[-self.functional_group_var(v)]]
+    def _encode_synthesizability(self):
+        c = [[-self.aromatic_ring_var(i)] for i in range(3, self.max_rings)]
+        rg = ['nitro', 'azide', 'peroxide', 'isocyanate']
+        rv = [self.functional_group_var(g) for g in rg if g in self.functional_groups]
+        for i in range(len(rv)):
+            for j in range(i + 1, len(rv)): c.append([-rv[i], -rv[j]])
+        return c
+    def _convert_to_3sat(self, cs):
+        s3c, nxt = [], self.var_offset
+        for c in cs:
+            if not c: continue
+            if len(c) <= 3:
+                while len(c) < 3: c.append(c[-1])
+                s3c.append(c)
+            else:
+                rem = list(c)
+                while len(rem) > 3:
+                    l1, l2 = rem.pop(0), rem.pop(0)
+                    s3c.append([l1, l2, nxt]); rem.insert(0, -nxt); nxt += 1
+                s3c.append(rem)
+        self.var_offset = nxt
+        return s3c, self.var_offset - 1
+    # <<< MODIFIED: Robust decoder to prevent ghost bonds >>>
+    def decode_solution(self, a):
+        s = {'atoms': [], 'bonds': [], 'aromatic_rings': 0, 'functional_groups': [], 'molecular_weight_range': None}
+        if not isinstance(a, np.ndarray) or a.ndim != 1: return s
+        # Step 1: Decode atoms and create a set of valid, existing atom IDs
+        existing_atom_ids = set()
+        for i in range(self.max_atoms):
+            for t in self.atom_types:
+                v = self.atom_type_var(i, t) - 1
+                if v < len(a) and a[v] and t != 'None':
+                    s['atoms'].append({'id': i, 'element': t})
+                    existing_atom_ids.add(i)
+                    break
+        # Step 2: Decode bonds, but only if BOTH atoms in the bond exist
+        for i in range(self.max_atoms):
+            for j in range(i + 1, self.max_atoms):
+                 v = self.bond_existence_var(i, j)
+                 if v != -1 and v - 1 < len(a) and a[v-1]:
+                     # FIX: Check if both atoms are in our set of existing atoms
+                     if i in existing_atom_ids and j in existing_atom_ids:
+                         s['bonds'].append({'from': i, 'to': j})
+        s['aromatic_rings'] = sum(1 for i in range(self.max_rings) if self.aromatic_ring_var(i)-1 < len(a) and a[self.aromatic_ring_var(i)-1])
+        s['functional_groups'] = [g for g in self.functional_groups if self.functional_group_var(g)-1 < len(a) and a[self.functional_group_var(g)-1]]
+        mw_min = 0
+        for t in self.mw_thresholds:
+            v = self.mw_var(t) - 1
+            if v < len(a) and a[v]: mw_min = t
+            else: break
+        s['molecular_weight_range'] = (mw_min, mw_min + 10)
+        return s
+def parse_constraints(ss):
+    cs = []
+    for s in ss:
+        s = s.strip()
+        m = re.match(r'(\w+)\s*([<>=!]+)\s*(\d+)', s)
+        if m:
+            name, op, val_str = m.groups()
+            cs.append(MolecularConstraint(name, int(val_str), op))
+        elif s.startswith('NOT '): cs.append(MolecularConstraint('forbidden_group', s[4:].strip()))
+        elif s in ['synthesizable']: cs.append(MolecularConstraint(s, True))
+    return cs