Spaces:
Sleeping
Sleeping
Vaishnav14220
commited on
Commit
Β·
c640e5c
1
Parent(s):
e125455
Update app to use corrected dataset with general reactants/products and corrected names
Browse files
app.py
CHANGED
|
@@ -14,11 +14,11 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
|
|
| 14 |
from svglib.svglib import svg2rlg
|
| 15 |
|
| 16 |
# Load dataset
|
| 17 |
-
|
| 18 |
df = ds['train'].to_pandas()
|
| 19 |
|
| 20 |
# Precompute unique values for autocomplete
|
| 21 |
-
reaction_names = df['
|
| 22 |
all_reactants = []
|
| 23 |
all_products = []
|
| 24 |
|
|
@@ -33,7 +33,7 @@ def generate_reaction_svg(name):
|
|
| 33 |
if not name:
|
| 34 |
return "Please provide a reaction name."
|
| 35 |
# Find the reaction
|
| 36 |
-
result = df[df['
|
| 37 |
if not result.empty:
|
| 38 |
row = result.iloc[0]
|
| 39 |
reactants_smiles = '.'.join(row['reactants_smiles'])
|
|
@@ -66,7 +66,7 @@ def generate_all_reactions_pdf():
|
|
| 66 |
|
| 67 |
for idx, row in df.iterrows():
|
| 68 |
# Reaction header
|
| 69 |
-
reaction_title = f"Reaction {idx+1}: {row['
|
| 70 |
story.append(Paragraph(reaction_title, styles['Heading2']))
|
| 71 |
|
| 72 |
# Generate SVG for this reaction
|
|
@@ -103,12 +103,16 @@ def generate_all_reactions_pdf():
|
|
| 103 |
products_smiles = [s for s in row['products_smiles'] if s is not None]
|
| 104 |
|
| 105 |
# Content
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
content = [
|
| 107 |
-
f"<b>Reactants:</b> {
|
| 108 |
f"<b>Reactants SMILES:</b> {', '.join(reactants_smiles) if reactants_smiles else 'None'}",
|
| 109 |
-
f"<b>Reagents:</b> {
|
| 110 |
f"<b>Reagents SMILES:</b> {', '.join(reagents_smiles) if reagents_smiles else 'None'}",
|
| 111 |
-
f"<b>Products:</b> {
|
| 112 |
f"<b>Products SMILES:</b> {', '.join(products_smiles) if products_smiles else 'None'}"
|
| 113 |
]
|
| 114 |
|
|
@@ -124,58 +128,72 @@ def search_by_reaction_name(query):
|
|
| 124 |
if not query:
|
| 125 |
return "Please enter a reaction name."
|
| 126 |
# Exact match first
|
| 127 |
-
result = df[df['
|
| 128 |
if not result.empty:
|
| 129 |
row = result.iloc[0]
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
| 131 |
# Fuzzy match
|
| 132 |
matches = process.extract(query, reaction_names, limit=1)
|
| 133 |
if matches and matches[0][1] > 80:
|
| 134 |
best_match = matches[0][0]
|
| 135 |
-
result = df[df['
|
| 136 |
row = result.iloc[0]
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
| 138 |
return "No matching reaction found."
|
| 139 |
|
| 140 |
def search_by_reactant(reactant):
|
| 141 |
if not reactant:
|
| 142 |
return "Please enter a reactant."
|
| 143 |
-
matches = df[df['
|
| 144 |
if not matches.empty:
|
| 145 |
results = []
|
| 146 |
for _, row in matches.head(5).iterrows():
|
| 147 |
-
|
|
|
|
|
|
|
| 148 |
return "\n\n".join(results)
|
| 149 |
# Fuzzy match for autocorrection
|
| 150 |
fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
|
| 151 |
if fuzzy_matches and fuzzy_matches[0][1] > 70:
|
| 152 |
closest = fuzzy_matches[0][0]
|
| 153 |
-
matches = df[df['
|
| 154 |
if not matches.empty:
|
| 155 |
results = [f"Did you mean '{closest}'?\n"]
|
| 156 |
for _, row in matches.head(5).iterrows():
|
| 157 |
-
|
|
|
|
|
|
|
| 158 |
return "\n\n".join(results)
|
| 159 |
return "No reactions found with that reactant."
|
| 160 |
|
| 161 |
def search_by_product(product):
|
| 162 |
if not product:
|
| 163 |
return "Please enter a product."
|
| 164 |
-
matches = df[df['
|
| 165 |
if not matches.empty:
|
| 166 |
results = []
|
| 167 |
for _, row in matches.head(5).iterrows():
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
return "\n\n".join(results)
|
| 170 |
# Fuzzy match for autocorrection
|
| 171 |
fuzzy_matches = process.extract(product, unique_products, limit=3)
|
| 172 |
if fuzzy_matches and fuzzy_matches[0][1] > 70:
|
| 173 |
closest = fuzzy_matches[0][0]
|
| 174 |
-
matches = df[df['
|
| 175 |
if not matches.empty:
|
| 176 |
results = [f"Did you mean '{closest}'?\n"]
|
| 177 |
for _, row in matches.head(5).iterrows():
|
| 178 |
-
|
|
|
|
|
|
|
| 179 |
return "\n\n".join(results)
|
| 180 |
return "No reactions found with that product."
|
| 181 |
|
|
|
|
| 14 |
from svglib.svglib import svg2rlg
|
| 15 |
|
| 16 |
# Load dataset
|
| 17 |
+
dataset = load_dataset("smitathkr1/organic_reactions_corrected")
|
| 18 |
df = ds['train'].to_pandas()
|
| 19 |
|
| 20 |
# Precompute unique values for autocomplete
|
| 21 |
+
reaction_names = df['corrected_name'].unique().tolist()
|
| 22 |
all_reactants = []
|
| 23 |
all_products = []
|
| 24 |
|
|
|
|
| 33 |
if not name:
|
| 34 |
return "Please provide a reaction name."
|
| 35 |
# Find the reaction
|
| 36 |
+
result = df[df['corrected_name'].str.lower() == name.lower()]
|
| 37 |
if not result.empty:
|
| 38 |
row = result.iloc[0]
|
| 39 |
reactants_smiles = '.'.join(row['reactants_smiles'])
|
|
|
|
| 66 |
|
| 67 |
for idx, row in df.iterrows():
|
| 68 |
# Reaction header
|
| 69 |
+
reaction_title = f"Reaction {idx+1}: {row['corrected_name']}"
|
| 70 |
story.append(Paragraph(reaction_title, styles['Heading2']))
|
| 71 |
|
| 72 |
# Generate SVG for this reaction
|
|
|
|
| 103 |
products_smiles = [s for s in row['products_smiles'] if s is not None]
|
| 104 |
|
| 105 |
# Content
|
| 106 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 107 |
+
reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
|
| 108 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 109 |
+
|
| 110 |
content = [
|
| 111 |
+
f"<b>Reactants:</b> {reactants}",
|
| 112 |
f"<b>Reactants SMILES:</b> {', '.join(reactants_smiles) if reactants_smiles else 'None'}",
|
| 113 |
+
f"<b>Reagents:</b> {reagents}",
|
| 114 |
f"<b>Reagents SMILES:</b> {', '.join(reagents_smiles) if reagents_smiles else 'None'}",
|
| 115 |
+
f"<b>Products:</b> {products}",
|
| 116 |
f"<b>Products SMILES:</b> {', '.join(products_smiles) if products_smiles else 'None'}"
|
| 117 |
]
|
| 118 |
|
|
|
|
| 128 |
if not query:
|
| 129 |
return "Please enter a reaction name."
|
| 130 |
# Exact match first
|
| 131 |
+
result = df[df['corrected_name'].str.lower() == query.lower()]
|
| 132 |
if not result.empty:
|
| 133 |
row = result.iloc[0]
|
| 134 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 135 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 136 |
+
reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
|
| 137 |
+
return f"**{row['corrected_name']}**\n\n**Reactants:** {reactants}\n\n**Reagents:** {reagents}\n\n**Products:** {products}\n\n**Description:** {row['description'][:500]}..."
|
| 138 |
# Fuzzy match
|
| 139 |
matches = process.extract(query, reaction_names, limit=1)
|
| 140 |
if matches and matches[0][1] > 80:
|
| 141 |
best_match = matches[0][0]
|
| 142 |
+
result = df[df['corrected_name'] == best_match]
|
| 143 |
row = result.iloc[0]
|
| 144 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 145 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 146 |
+
reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
|
| 147 |
+
return f"**{row['corrected_name']}** (closest match)\n\n**Reactants:** {reactants}\n\n**Reagents:** {reagents}\n\n**Products:** {products}\n\n**Description:** {row['description'][:500]}..."
|
| 148 |
return "No matching reaction found."
|
| 149 |
|
| 150 |
def search_by_reactant(reactant):
|
| 151 |
if not reactant:
|
| 152 |
return "Please enter a reactant."
|
| 153 |
+
matches = df[df['general_reactants'].str.lower().str.contains(reactant.lower(), na=False)]
|
| 154 |
if not matches.empty:
|
| 155 |
results = []
|
| 156 |
for _, row in matches.head(5).iterrows():
|
| 157 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 158 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 159 |
+
results.append(f"**{row['corrected_name']}**: {reactants} β {products}")
|
| 160 |
return "\n\n".join(results)
|
| 161 |
# Fuzzy match for autocorrection
|
| 162 |
fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
|
| 163 |
if fuzzy_matches and fuzzy_matches[0][1] > 70:
|
| 164 |
closest = fuzzy_matches[0][0]
|
| 165 |
+
matches = df[df['general_reactants'].str.lower().str.contains(closest.lower(), na=False)]
|
| 166 |
if not matches.empty:
|
| 167 |
results = [f"Did you mean '{closest}'?\n"]
|
| 168 |
for _, row in matches.head(5).iterrows():
|
| 169 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 170 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 171 |
+
results.append(f"**{row['corrected_name']}**: {reactants} β {products}")
|
| 172 |
return "\n\n".join(results)
|
| 173 |
return "No reactions found with that reactant."
|
| 174 |
|
| 175 |
def search_by_product(product):
|
| 176 |
if not product:
|
| 177 |
return "Please enter a product."
|
| 178 |
+
matches = df[df['general_products'].str.lower().str.contains(product.lower(), na=False)]
|
| 179 |
if not matches.empty:
|
| 180 |
results = []
|
| 181 |
for _, row in matches.head(5).iterrows():
|
| 182 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 183 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 184 |
+
results.append(f"**{row['corrected_name']}**: {reactants} β {products}")
|
| 185 |
return "\n\n".join(results)
|
| 186 |
# Fuzzy match for autocorrection
|
| 187 |
fuzzy_matches = process.extract(product, unique_products, limit=3)
|
| 188 |
if fuzzy_matches and fuzzy_matches[0][1] > 70:
|
| 189 |
closest = fuzzy_matches[0][0]
|
| 190 |
+
matches = df[df['general_products'].str.lower().str.contains(closest.lower(), na=False)]
|
| 191 |
if not matches.empty:
|
| 192 |
results = [f"Did you mean '{closest}'?\n"]
|
| 193 |
for _, row in matches.head(5).iterrows():
|
| 194 |
+
reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
|
| 195 |
+
products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
|
| 196 |
+
results.append(f"**{row['corrected_name']}**: {reactants} β {products}")
|
| 197 |
return "\n\n".join(results)
|
| 198 |
return "No reactions found with that product."
|
| 199 |
|