Spaces:

smitathkr1
/

namereaction_api

Sleeping

App Files Files Community

Vaishnav14220 commited on Nov 6, 2025

Commit

c640e5c

1 Parent(s): e125455

Update app to use corrected dataset with general reactants/products and corrected names

Browse files

Files changed (1) hide show

app.py +37 -19

app.py CHANGED Viewed

@@ -14,11 +14,11 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
 from svglib.svglib import svg2rlg
 # Load dataset
-ds = load_dataset("smitathkr1/organic_reactions_enhanced")
 df = ds['train'].to_pandas()
 # Precompute unique values for autocomplete
-reaction_names = df['name'].unique().tolist()
 all_reactants = []
 all_products = []
@@ -33,7 +33,7 @@ def generate_reaction_svg(name):
     if not name:
         return "Please provide a reaction name."
     # Find the reaction
-    result = df[df['name'].str.lower() == name.lower()]
     if not result.empty:
         row = result.iloc[0]
         reactants_smiles = '.'.join(row['reactants_smiles'])
@@ -66,7 +66,7 @@ def generate_all_reactions_pdf():
     for idx, row in df.iterrows():
         # Reaction header
-        reaction_title = f"Reaction {idx+1}: {row['name']}"
         story.append(Paragraph(reaction_title, styles['Heading2']))
         # Generate SVG for this reaction
@@ -103,12 +103,16 @@ def generate_all_reactions_pdf():
         products_smiles = [s for s in row['products_smiles'] if s is not None]
         # Content
         content = [
-            f"<b>Reactants:</b> {', '.join(row['reactants'])}",
             f"<b>Reactants SMILES:</b> {', '.join(reactants_smiles) if reactants_smiles else 'None'}",
-            f"<b>Reagents:</b> {', '.join(row['reagents']) if len(row['reagents']) > 0 else 'None'}",
             f"<b>Reagents SMILES:</b> {', '.join(reagents_smiles) if reagents_smiles else 'None'}",
-            f"<b>Products:</b> {', '.join(row['products'])}",
             f"<b>Products SMILES:</b> {', '.join(products_smiles) if products_smiles else 'None'}"
         ]
@@ -124,58 +128,72 @@ def search_by_reaction_name(query):
     if not query:
         return "Please enter a reaction name."
     # Exact match first
-    result = df[df['name'].str.lower() == query.lower()]
     if not result.empty:
         row = result.iloc[0]
-        return f"**{row['name']}**\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
     # Fuzzy match
     matches = process.extract(query, reaction_names, limit=1)
     if matches and matches[0][1] > 80:
         best_match = matches[0][0]
-        result = df[df['name'] == best_match]
         row = result.iloc[0]
-        return f"**{row['name']}** (closest match)\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
     return "No matching reaction found."
 def search_by_reactant(reactant):
     if not reactant:
         return "Please enter a reactant."
-    matches = df[df['reactants'].apply(lambda x: reactant.lower() in [r.lower() for r in x])]
     if not matches.empty:
         results = []
         for _, row in matches.head(5).iterrows():
-            results.append(f"**{row['name']}**: {', '.join(row['reactants'])} → {', '.join(row['products'])}")
         return "\n\n".join(results)
     # Fuzzy match for autocorrection
     fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
     if fuzzy_matches and fuzzy_matches[0][1] > 70:
         closest = fuzzy_matches[0][0]
-        matches = df[df['reactants'].apply(lambda x: closest.lower() in [r.lower() for r in x])]
         if not matches.empty:
             results = [f"Did you mean '{closest}'?\n"]
             for _, row in matches.head(5).iterrows():
-                results.append(f"**{row['name']}**: {', '.join(row['reactants'])} → {', '.join(row['products'])}")
             return "\n\n".join(results)
     return "No reactions found with that reactant."
 def search_by_product(product):
     if not product:
         return "Please enter a product."
-    matches = df[df['products'].apply(lambda x: product.lower() in [p.lower() for p in x])]
     if not matches.empty:
         results = []
         for _, row in matches.head(5).iterrows():
-            results.append(f"**{row['name']}**: {', '.join(row['reactants'])} → {', '.join(row['products'])}")
         return "\n\n".join(results)
     # Fuzzy match for autocorrection
     fuzzy_matches = process.extract(product, unique_products, limit=3)
     if fuzzy_matches and fuzzy_matches[0][1] > 70:
         closest = fuzzy_matches[0][0]
-        matches = df[df['products'].apply(lambda x: closest.lower() in [p.lower() for p in x])]
         if not matches.empty:
             results = [f"Did you mean '{closest}'?\n"]
             for _, row in matches.head(5).iterrows():
-                results.append(f"**{row['name']}**: {', '.join(row['reactants'])} → {', '.join(row['products'])}")
             return "\n\n".join(results)
     return "No reactions found with that product."

 from svglib.svglib import svg2rlg
 # Load dataset
+dataset = load_dataset("smitathkr1/organic_reactions_corrected")
 df = ds['train'].to_pandas()
 # Precompute unique values for autocomplete
+reaction_names = df['corrected_name'].unique().tolist()
 all_reactants = []
 all_products = []
     if not name:
         return "Please provide a reaction name."
     # Find the reaction
+    result = df[df['corrected_name'].str.lower() == name.lower()]
     if not result.empty:
         row = result.iloc[0]
         reactants_smiles = '.'.join(row['reactants_smiles'])
     for idx, row in df.iterrows():
         # Reaction header
+        reaction_title = f"Reaction {idx+1}: {row['corrected_name']}"
         story.append(Paragraph(reaction_title, styles['Heading2']))
         # Generate SVG for this reaction
         products_smiles = [s for s in row['products_smiles'] if s is not None]
         # Content
+        reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+        reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
+        products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
         content = [
+            f"<b>Reactants:</b> {reactants}",
             f"<b>Reactants SMILES:</b> {', '.join(reactants_smiles) if reactants_smiles else 'None'}",
+            f"<b>Reagents:</b> {reagents}",
             f"<b>Reagents SMILES:</b> {', '.join(reagents_smiles) if reagents_smiles else 'None'}",
+            f"<b>Products:</b> {products}",
             f"<b>Products SMILES:</b> {', '.join(products_smiles) if products_smiles else 'None'}"
         ]
     if not query:
         return "Please enter a reaction name."
     # Exact match first
+    result = df[df['corrected_name'].str.lower() == query.lower()]
     if not result.empty:
         row = result.iloc[0]
+        reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+        products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
+        reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
+        return f"**{row['corrected_name']}**\n\n**Reactants:** {reactants}\n\n**Reagents:** {reagents}\n\n**Products:** {products}\n\n**Description:** {row['description'][:500]}..."
     # Fuzzy match
     matches = process.extract(query, reaction_names, limit=1)
     if matches and matches[0][1] > 80:
         best_match = matches[0][0]
+        result = df[df['corrected_name'] == best_match]
         row = result.iloc[0]
+        reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+        products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
+        reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
+        return f"**{row['corrected_name']}** (closest match)\n\n**Reactants:** {reactants}\n\n**Reagents:** {reagents}\n\n**Products:** {products}\n\n**Description:** {row['description'][:500]}..."
     return "No matching reaction found."
 def search_by_reactant(reactant):
     if not reactant:
         return "Please enter a reactant."
+    matches = df[df['general_reactants'].str.lower().str.contains(reactant.lower(), na=False)]
     if not matches.empty:
         results = []
         for _, row in matches.head(5).iterrows():
+            reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+            products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
+            results.append(f"**{row['corrected_name']}**: {reactants} → {products}")
         return "\n\n".join(results)
     # Fuzzy match for autocorrection
     fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
     if fuzzy_matches and fuzzy_matches[0][1] > 70:
         closest = fuzzy_matches[0][0]
+        matches = df[df['general_reactants'].str.lower().str.contains(closest.lower(), na=False)]
         if not matches.empty:
             results = [f"Did you mean '{closest}'?\n"]
             for _, row in matches.head(5).iterrows():
+                reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+                products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
+                results.append(f"**{row['corrected_name']}**: {reactants} → {products}")
             return "\n\n".join(results)
     return "No reactions found with that reactant."
 def search_by_product(product):
     if not product:
         return "Please enter a product."
+    matches = df[df['general_products'].str.lower().str.contains(product.lower(), na=False)]
     if not matches.empty:
         results = []
         for _, row in matches.head(5).iterrows():
+            reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+            products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
+            results.append(f"**{row['corrected_name']}**: {reactants} → {products}")
         return "\n\n".join(results)
     # Fuzzy match for autocorrection
     fuzzy_matches = process.extract(product, unique_products, limit=3)
     if fuzzy_matches and fuzzy_matches[0][1] > 70:
         closest = fuzzy_matches[0][0]
+        matches = df[df['general_products'].str.lower().str.contains(closest.lower(), na=False)]
         if not matches.empty:
             results = [f"Did you mean '{closest}'?\n"]
             for _, row in matches.head(5).iterrows():
+                reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
+                products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
+                results.append(f"**{row['corrected_name']}**: {reactants} → {products}")
             return "\n\n".join(results)
     return "No reactions found with that product."