Vaishnav14220 commited on
Commit
a34bcb7
·
1 Parent(s): 6d09811

Add organic reactions search app with Gradio interface

Browse files
Files changed (3) hide show
  1. README.md +29 -1
  2. app.py +118 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -10,4 +10,32 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: apache-2.0
11
  ---
12
 
13
+ # Organic Reactions Search
14
+
15
+ This is a Gradio-based web app for searching the organic reactions dataset from Hugging Face.
16
+
17
+ ## Dataset
18
+
19
+ The dataset used is `smitathkr1/organic_reactions_enhanced`, which contains information about various organic reactions including names, reactants, products, conditions, mechanisms, and descriptions.
20
+
21
+ ## Features
22
+
23
+ - **Search by Reaction Name**: Enter a reaction name to get details
24
+ - **Search by Reactant**: Find reactions that use a specific reactant
25
+ - **Search by Product**: Find reactions that produce a specific product
26
+ - **Autocomplete**: Get suggestions for reaction names, reactants, and products
27
+
28
+ ## Performance
29
+
30
+ The dataset is loaded into memory using pandas for fast searches. With only 828 entries, all operations are sub-second.
31
+
32
+ ## Local Development
33
+
34
+ To run locally:
35
+
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ python app.py
39
+ ```
40
+
41
+ The app will launch in your browser.
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from datasets import load_dataset
4
+ from fuzzywuzzy import process
5
+
6
+ # Load dataset
7
+ ds = load_dataset("smitathkr1/organic_reactions_enhanced")
8
+ df = ds['train'].to_pandas()
9
+
10
+ # Precompute unique values for autocomplete
11
+ reaction_names = df['name'].unique().tolist()
12
+ all_reactants = []
13
+ all_products = []
14
+
15
+ for _, row in df.iterrows():
16
+ all_reactants.extend(row['reactants'])
17
+ all_products.extend(row['products'])
18
+
19
+ unique_reactants = list(set(all_reactants))
20
+ unique_products = list(set(all_products))
21
+
22
+ def search_by_reaction_name(query):
23
+ if not query:
24
+ return "Please enter a reaction name."
25
+ # Exact match first
26
+ result = df[df['name'].str.lower() == query.lower()]
27
+ if not result.empty:
28
+ row = result.iloc[0]
29
+ return f"**{row['name']}**\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
30
+ # Fuzzy match
31
+ matches = process.extract(query, reaction_names, limit=1)
32
+ if matches and matches[0][1] > 80:
33
+ best_match = matches[0][0]
34
+ result = df[df['name'] == best_match]
35
+ row = result.iloc[0]
36
+ return f"**{row['name']}** (closest match)\n\n**Reactants:** {', '.join(row['reactants'])}\n\n**Products:** {', '.join(row['products'])}\n\n**Description:** {row['description'][:500]}..."
37
+ return "No matching reaction found."
38
+
39
+ def search_by_reactant(reactant):
40
+ if not reactant:
41
+ return "Please enter a reactant."
42
+ matches = df[df['reactants'].apply(lambda x: reactant.lower() in [r.lower() for r in x])]
43
+ if not matches.empty:
44
+ results = []
45
+ for _, row in matches.head(5).iterrows():
46
+ results.append(f"**{row['name']}**: {', '.join(row['reactants'])} → {', '.join(row['products'])}")
47
+ return "\n\n".join(results)
48
+ return "No reactions found with that reactant."
49
+
50
+ def search_by_product(product):
51
+ if not product:
52
+ return "Please enter a product."
53
+ matches = df[df['products'].apply(lambda x: product.lower() in [p.lower() for p in x])]
54
+ if not matches.empty:
55
+ results = []
56
+ for _, row in matches.head(5).iterrows():
57
+ results.append(f"**{row['name']}**: {', '.join(row['reactants'])} → {', '.join(row['products'])}")
58
+ return "\n\n".join(results)
59
+ return "No reactions found with that product."
60
+
61
+ def get_autocomplete_reactions(query):
62
+ if not query:
63
+ return reaction_names[:10]
64
+ matches = process.extract(query, reaction_names, limit=10)
65
+ return [m[0] for m in matches if m[1] > 60]
66
+
67
+ def get_autocomplete_reactants(query):
68
+ if not query:
69
+ return unique_reactants[:10]
70
+ matches = process.extract(query, unique_reactants, limit=10)
71
+ return [m[0] for m in matches if m[1] > 60]
72
+
73
+ def get_autocomplete_products(query):
74
+ if not query:
75
+ return unique_products[:10]
76
+ matches = process.extract(query, unique_products, limit=10)
77
+ return [m[0] for m in matches if m[1] > 60]
78
+
79
+ with gr.Blocks(title="Organic Reactions Search") as demo:
80
+ gr.Markdown("# Organic Reactions Search API")
81
+ gr.Markdown("Search through the organic reactions dataset by name, reactant, or product.")
82
+
83
+ with gr.Tab("Search by Reaction Name"):
84
+ reaction_input = gr.Textbox(label="Reaction Name", placeholder="e.g., appel-reaction")
85
+ reaction_output = gr.Markdown(label="Result")
86
+ reaction_btn = gr.Button("Search")
87
+ reaction_btn.click(search_by_reaction_name, inputs=reaction_input, outputs=reaction_output)
88
+
89
+ with gr.Tab("Search by Reactant"):
90
+ reactant_input = gr.Textbox(label="Reactant", placeholder="e.g., alcohol")
91
+ reactant_output = gr.Markdown(label="Results")
92
+ reactant_btn = gr.Button("Search")
93
+ reactant_btn.click(search_by_reactant, inputs=reactant_input, outputs=reactant_output)
94
+
95
+ with gr.Tab("Search by Product"):
96
+ product_input = gr.Textbox(label="Product", placeholder="e.g., ester")
97
+ product_output = gr.Markdown(label="Results")
98
+ product_btn = gr.Button("Search")
99
+ product_btn.click(search_by_product, inputs=product_input, outputs=product_output)
100
+
101
+ with gr.Tab("Autocomplete"):
102
+ gr.Markdown("### Reaction Names")
103
+ reaction_query = gr.Textbox(label="Query")
104
+ reaction_suggestions = gr.Textbox(label="Suggestions", lines=5, interactive=False)
105
+ reaction_query.change(get_autocomplete_reactions, inputs=reaction_query, outputs=reaction_suggestions)
106
+
107
+ gr.Markdown("### Reactants")
108
+ reactant_query = gr.Textbox(label="Query")
109
+ reactant_suggestions = gr.Textbox(label="Suggestions", lines=5, interactive=False)
110
+ reactant_query.change(get_autocomplete_reactants, inputs=reactant_query, outputs=reactant_suggestions)
111
+
112
+ gr.Markdown("### Products")
113
+ product_query = gr.Textbox(label="Query")
114
+ product_suggestions = gr.Textbox(label="Suggestions", lines=5, interactive=False)
115
+ product_query.change(get_autocomplete_products, inputs=product_query, outputs=product_suggestions)
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ datasets
2
+ pandas
3
+ gradio
4
+ fuzzywuzzy
5
+ python-levenshtein