Vaishnav14220 commited on
Commit
c640e5c
Β·
1 Parent(s): e125455

Update app to use corrected dataset with general reactants/products and corrected names

Browse files
Files changed (1) hide show
  1. app.py +37 -19
app.py CHANGED
@@ -14,11 +14,11 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
14
  from svglib.svglib import svg2rlg
15
 
16
  # Load dataset
17
- ds = load_dataset("smitathkr1/organic_reactions_enhanced")
18
  df = ds['train'].to_pandas()
19
 
20
  # Precompute unique values for autocomplete
21
- reaction_names = df['name'].unique().tolist()
22
  all_reactants = []
23
  all_products = []
24
 
@@ -33,7 +33,7 @@ def generate_reaction_svg(name):
33
  if not name:
34
  return "Please provide a reaction name."
35
  # Find the reaction
36
- result = df[df['name'].str.lower() == name.lower()]
37
  if not result.empty:
38
  row = result.iloc[0]
39
  reactants_smiles = '.'.join(row['reactants_smiles'])
@@ -66,7 +66,7 @@ def generate_all_reactions_pdf():
66
 
67
  for idx, row in df.iterrows():
68
  # Reaction header
69
- reaction_title = f"Reaction {idx+1}: {row['name']}"
70
  story.append(Paragraph(reaction_title, styles['Heading2']))
71
 
72
  # Generate SVG for this reaction
@@ -103,12 +103,16 @@ def generate_all_reactions_pdf():
103
  products_smiles = [s for s in row['products_smiles'] if s is not None]
104
 
105
  # Content
 
 
 
 
106
  content = [
107
- f"<b>Reactants:</b> {', '.join(row['reactants'])}",
108
  f"<b>Reactants SMILES:</b> {', '.join(reactants_smiles) if reactants_smiles else 'None'}",
109
- f"<b>Reagents:</b> {', '.join(row['reagents']) if len(row['reagents']) > 0 else 'None'}",
110
  f"<b>Reagents SMILES:</b> {', '.join(reagents_smiles) if reagents_smiles else 'None'}",
111
- f"<b>Products:</b> {', '.join(row['products'])}",
112
  f"<b>Products SMILES:</b> {', '.join(products_smiles) if products_smiles else 'None'}"
113
  ]
114
 
@@ -124,58 +128,72 @@ def search_by_reaction_name(query):
124
  if not query:
125
  return "Please enter a reaction name."
126
  # Exact match first
127
- result = df[df['name'].str.lower() == query.lower()]
128
  if not result.empty:
129
  row = result.iloc[0]
130
- return f"**{row['name']}**\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
 
 
 
131
  # Fuzzy match
132
  matches = process.extract(query, reaction_names, limit=1)
133
  if matches and matches[0][1] > 80:
134
  best_match = matches[0][0]
135
- result = df[df['name'] == best_match]
136
  row = result.iloc[0]
137
- return f"**{row['name']}** (closest match)\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
 
 
 
138
  return "No matching reaction found."
139
 
140
  def search_by_reactant(reactant):
141
  if not reactant:
142
  return "Please enter a reactant."
143
- matches = df[df['reactants'].apply(lambda x: reactant.lower() in [r.lower() for r in x])]
144
  if not matches.empty:
145
  results = []
146
  for _, row in matches.head(5).iterrows():
147
- results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
 
 
148
  return "\n\n".join(results)
149
  # Fuzzy match for autocorrection
150
  fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
151
  if fuzzy_matches and fuzzy_matches[0][1] > 70:
152
  closest = fuzzy_matches[0][0]
153
- matches = df[df['reactants'].apply(lambda x: closest.lower() in [r.lower() for r in x])]
154
  if not matches.empty:
155
  results = [f"Did you mean '{closest}'?\n"]
156
  for _, row in matches.head(5).iterrows():
157
- results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
 
 
158
  return "\n\n".join(results)
159
  return "No reactions found with that reactant."
160
 
161
  def search_by_product(product):
162
  if not product:
163
  return "Please enter a product."
164
- matches = df[df['products'].apply(lambda x: product.lower() in [p.lower() for p in x])]
165
  if not matches.empty:
166
  results = []
167
  for _, row in matches.head(5).iterrows():
168
- results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
 
 
169
  return "\n\n".join(results)
170
  # Fuzzy match for autocorrection
171
  fuzzy_matches = process.extract(product, unique_products, limit=3)
172
  if fuzzy_matches and fuzzy_matches[0][1] > 70:
173
  closest = fuzzy_matches[0][0]
174
- matches = df[df['products'].apply(lambda x: closest.lower() in [p.lower() for p in x])]
175
  if not matches.empty:
176
  results = [f"Did you mean '{closest}'?\n"]
177
  for _, row in matches.head(5).iterrows():
178
- results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
 
 
179
  return "\n\n".join(results)
180
  return "No reactions found with that product."
181
 
 
14
  from svglib.svglib import svg2rlg
15
 
16
  # Load dataset
17
+ dataset = load_dataset("smitathkr1/organic_reactions_corrected")
18
  df = ds['train'].to_pandas()
19
 
20
  # Precompute unique values for autocomplete
21
+ reaction_names = df['corrected_name'].unique().tolist()
22
  all_reactants = []
23
  all_products = []
24
 
 
33
  if not name:
34
  return "Please provide a reaction name."
35
  # Find the reaction
36
+ result = df[df['corrected_name'].str.lower() == name.lower()]
37
  if not result.empty:
38
  row = result.iloc[0]
39
  reactants_smiles = '.'.join(row['reactants_smiles'])
 
66
 
67
  for idx, row in df.iterrows():
68
  # Reaction header
69
+ reaction_title = f"Reaction {idx+1}: {row['corrected_name']}"
70
  story.append(Paragraph(reaction_title, styles['Heading2']))
71
 
72
  # Generate SVG for this reaction
 
103
  products_smiles = [s for s in row['products_smiles'] if s is not None]
104
 
105
  # Content
106
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
107
+ reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
108
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
109
+
110
  content = [
111
+ f"<b>Reactants:</b> {reactants}",
112
  f"<b>Reactants SMILES:</b> {', '.join(reactants_smiles) if reactants_smiles else 'None'}",
113
+ f"<b>Reagents:</b> {reagents}",
114
  f"<b>Reagents SMILES:</b> {', '.join(reagents_smiles) if reagents_smiles else 'None'}",
115
+ f"<b>Products:</b> {products}",
116
  f"<b>Products SMILES:</b> {', '.join(products_smiles) if products_smiles else 'None'}"
117
  ]
118
 
 
128
  if not query:
129
  return "Please enter a reaction name."
130
  # Exact match first
131
+ result = df[df['corrected_name'].str.lower() == query.lower()]
132
  if not result.empty:
133
  row = result.iloc[0]
134
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
135
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
136
+ reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
137
+ return f"**{row['corrected_name']}**\n\n**Reactants:** {reactants}\n\n**Reagents:** {reagents}\n\n**Products:** {products}\n\n**Description:** {row['description'][:500]}..."
138
  # Fuzzy match
139
  matches = process.extract(query, reaction_names, limit=1)
140
  if matches and matches[0][1] > 80:
141
  best_match = matches[0][0]
142
+ result = df[df['corrected_name'] == best_match]
143
  row = result.iloc[0]
144
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
145
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
146
+ reagents = row['general_reagents'] if pd.notna(row['general_reagents']) else ', '.join(row['reagents'])
147
+ return f"**{row['corrected_name']}** (closest match)\n\n**Reactants:** {reactants}\n\n**Reagents:** {reagents}\n\n**Products:** {products}\n\n**Description:** {row['description'][:500]}..."
148
  return "No matching reaction found."
149
 
150
  def search_by_reactant(reactant):
151
  if not reactant:
152
  return "Please enter a reactant."
153
+ matches = df[df['general_reactants'].str.lower().str.contains(reactant.lower(), na=False)]
154
  if not matches.empty:
155
  results = []
156
  for _, row in matches.head(5).iterrows():
157
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
158
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
159
+ results.append(f"**{row['corrected_name']}**: {reactants} β†’ {products}")
160
  return "\n\n".join(results)
161
  # Fuzzy match for autocorrection
162
  fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
163
  if fuzzy_matches and fuzzy_matches[0][1] > 70:
164
  closest = fuzzy_matches[0][0]
165
+ matches = df[df['general_reactants'].str.lower().str.contains(closest.lower(), na=False)]
166
  if not matches.empty:
167
  results = [f"Did you mean '{closest}'?\n"]
168
  for _, row in matches.head(5).iterrows():
169
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
170
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
171
+ results.append(f"**{row['corrected_name']}**: {reactants} β†’ {products}")
172
  return "\n\n".join(results)
173
  return "No reactions found with that reactant."
174
 
175
  def search_by_product(product):
176
  if not product:
177
  return "Please enter a product."
178
+ matches = df[df['general_products'].str.lower().str.contains(product.lower(), na=False)]
179
  if not matches.empty:
180
  results = []
181
  for _, row in matches.head(5).iterrows():
182
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
183
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
184
+ results.append(f"**{row['corrected_name']}**: {reactants} β†’ {products}")
185
  return "\n\n".join(results)
186
  # Fuzzy match for autocorrection
187
  fuzzy_matches = process.extract(product, unique_products, limit=3)
188
  if fuzzy_matches and fuzzy_matches[0][1] > 70:
189
  closest = fuzzy_matches[0][0]
190
+ matches = df[df['general_products'].str.lower().str.contains(closest.lower(), na=False)]
191
  if not matches.empty:
192
  results = [f"Did you mean '{closest}'?\n"]
193
  for _, row in matches.head(5).iterrows():
194
+ reactants = row['general_reactants'] if pd.notna(row['general_reactants']) else ', '.join(row['reactants'])
195
+ products = row['general_products'] if pd.notna(row['general_products']) else ', '.join(row['products'])
196
+ results.append(f"**{row['corrected_name']}**: {reactants} β†’ {products}")
197
  return "\n\n".join(results)
198
  return "No reactions found with that product."
199