Vaishnav14220
Add dropdown autocomplete and autocorrection for searches
7a2f5d4
raw
history blame
8.68 kB
import gradio as gr
import pandas as pd
from datasets import load_dataset
from fuzzywuzzy import process
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
import io
# Load dataset
ds = load_dataset("smitathkr1/organic_reactions_enhanced")
df = ds['train'].to_pandas()
# Precompute unique values for autocomplete
reaction_names = df['name'].unique().tolist()
all_reactants = []
all_products = []
for _, row in df.iterrows():
all_reactants.extend(row['reactants'])
all_products.extend(row['products'])
unique_reactants = list(set(all_reactants))
unique_products = list(set(all_products))
def generate_reaction_svg(name):
if not name:
return "Please provide a reaction name."
# Find the reaction
result = df[df['name'].str.lower() == name.lower()]
if not result.empty:
row = result.iloc[0]
reactants_smiles = '.'.join(row['reactants_smiles'])
products_smiles = '.'.join(row['products_smiles'])
reaction_smiles = f"{reactants_smiles}>>{products_smiles}"
try:
rxn = AllChem.ReactionFromSmarts(reaction_smiles)
if rxn:
svg = Draw.ReactionToImage(rxn, useSVG=True)
return svg
else:
return "Failed to parse reaction SMILES."
except Exception as e:
return f"Error generating SVG: {str(e)}"
return "Reaction not found."
def search_by_reaction_name(query):
if not query:
return "Please enter a reaction name."
# Exact match first
result = df[df['name'].str.lower() == query.lower()]
if not result.empty:
row = result.iloc[0]
return f"**{row['name']}**\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
# Fuzzy match
matches = process.extract(query, reaction_names, limit=1)
if matches and matches[0][1] > 80:
best_match = matches[0][0]
result = df[df['name'] == best_match]
row = result.iloc[0]
return f"**{row['name']}** (closest match)\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
return "No matching reaction found."
def search_by_reactant(reactant):
if not reactant:
return "Please enter a reactant."
matches = df[df['reactants'].apply(lambda x: reactant.lower() in [r.lower() for r in x])]
if not matches.empty:
results = []
for _, row in matches.head(5).iterrows():
results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
return "\n\n".join(results)
# Fuzzy match for autocorrection
fuzzy_matches = process.extract(reactant, unique_reactants, limit=3)
if fuzzy_matches and fuzzy_matches[0][1] > 70:
closest = fuzzy_matches[0][0]
matches = df[df['reactants'].apply(lambda x: closest.lower() in [r.lower() for r in x])]
if not matches.empty:
results = [f"Did you mean '{closest}'?\n"]
for _, row in matches.head(5).iterrows():
results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
return "\n\n".join(results)
return "No reactions found with that reactant."
def search_by_product(product):
if not product:
return "Please enter a product."
matches = df[df['products'].apply(lambda x: product.lower() in [p.lower() for p in x])]
if not matches.empty:
results = []
for _, row in matches.head(5).iterrows():
results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
return "\n\n".join(results)
# Fuzzy match for autocorrection
fuzzy_matches = process.extract(product, unique_products, limit=3)
if fuzzy_matches and fuzzy_matches[0][1] > 70:
closest = fuzzy_matches[0][0]
matches = df[df['products'].apply(lambda x: closest.lower() in [p.lower() for p in x])]
if not matches.empty:
results = [f"Did you mean '{closest}'?\n"]
for _, row in matches.head(5).iterrows():
results.append(f"**{row['name']}**: {', '.join(row['reactants'])} β†’ {', '.join(row['products'])}")
return "\n\n".join(results)
return "No reactions found with that product."
def get_autocomplete_reactions(query):
if not query:
return reaction_names[:10]
matches = process.extract(query, reaction_names, limit=10)
return [m[0] for m in matches if m[1] > 60]
def get_autocomplete_reactants(query):
if not query:
return unique_reactants[:10]
matches = process.extract(query, unique_reactants, limit=10)
return [m[0] for m in matches if m[1] > 60]
def get_autocomplete_products(query):
if not query:
return unique_products[:10]
matches = process.extract(query, unique_products, limit=10)
return [m[0] for m in matches if m[1] > 60]
with gr.Blocks(title="Organic Reactions Search") as demo:
gr.Markdown("# Organic Reactions Search API")
gr.Markdown("Search through the organic reactions dataset by name, reactant, or product.")
with gr.Tab("Search by Reaction Name"):
reaction_input = gr.Dropdown(label="Reaction Name", choices=reaction_names, allow_custom_value=True, placeholder="Type or select a reaction name")
reaction_output = gr.Markdown(label="Result")
reaction_btn = gr.Button("Search")
reaction_btn.click(search_by_reaction_name, inputs=reaction_input, outputs=reaction_output)
with gr.Tab("Search by Reactant"):
reactant_input = gr.Dropdown(label="Reactant", choices=unique_reactants, allow_custom_value=True, placeholder="Type or select a reactant")
reactant_output = gr.Markdown(label="Results")
reactant_btn = gr.Button("Search")
reactant_btn.click(search_by_reactant, inputs=reactant_input, outputs=reactant_output)
with gr.Tab("View Reaction SVG"):
svg_input = gr.Dropdown(label="Reaction Name", choices=reaction_names, allow_custom_value=True, placeholder="Type or select a reaction name")
svg_output = gr.HTML(label="Reaction SVG")
svg_btn = gr.Button("Generate SVG")
svg_btn.click(generate_reaction_svg, inputs=svg_input, outputs=svg_output)
with gr.Tab("Search by Product"):
product_input = gr.Dropdown(label="Product", choices=unique_products, allow_custom_value=True, placeholder="Type or select a product")
product_output = gr.Markdown(label="Results")
product_btn = gr.Button("Search")
product_btn.click(search_by_product, inputs=product_input, outputs=product_output)
with gr.Tab("API Documentation"):
gr.Markdown("""
## API Endpoints
This Gradio app exposes the following functions as API endpoints. You can call them via HTTP POST requests to the `/api/predict` endpoint.
### Search by Reaction Name (fn_index: 0)
- **Input**: `query` (string) - The reaction name to search
- **Output**: Markdown string with reaction details
### Search by Reactant (fn_index: 1)
- **Input**: `reactant` (string) - The reactant to search for
- **Output**: Markdown string with matching reactions
### Search by Product (fn_index: 2)
- **Input**: `product` (string) - The product to search for
- **Output**: Markdown string with matching reactions
### Autocomplete Reaction Names (fn_index: 3)
- **Input**: `query` (string) - Partial reaction name
- **Output**: List of matching reaction names
### Autocomplete Reactants (fn_index: 4)
- **Input**: `query` (string) - Partial reactant name
- **Output**: List of matching reactants
### Autocomplete Products (fn_index: 5)
- **Input**: `query` (string) - Partial product name
- **Output**: List of matching products
### Generate Reaction SVG (fn_index: 6)
- **Input**: `name` (string) - Exact reaction name
- **Output**: SVG string of the reaction diagram
### Example API Call
```bash
curl -X POST "https://smitathkr1-namereaction-api.hf.space/api/predict" \\
-H "Content-Type: application/json" \\
-d '{"fn_index": 0, "data": ["appel-reaction"]}'
```
Note: `fn_index` corresponds to the function order in the app (0-based).
""")
if __name__ == "__main__":
demo.launch()