XMMR12 commited on
Commit
8cca16d
·
verified ·
1 Parent(s): c10c57f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -339
app.py CHANGED
@@ -1,339 +1,154 @@
1
- import os
2
- import gradio as gr
3
- from duckduckgo_search import DDGS
4
- import sqlite3
5
- import json
6
- import requests
7
- from typing import List, Dict, Optional
8
- import time
9
- from bs4 import BeautifulSoup
10
-
11
- specialtoken=os.getenv("SPECIALTOKEN")
12
-
13
-
14
-
15
- #plants = ["Echinacea", "Ginkgo biloba", "Turmeric"]
16
-
17
- #Unused
18
- def GET_full_plant_information(plant_name:str):
19
- """ """
20
- query = f"{plant_name} plant medicinal uses scientific information site:.edu OR site:.gov OR site:.org"
21
- search_results = DDGS().text(keywords=query, max_results=3)
22
- content=""
23
- for result in search_results:
24
- content+=fetch_page_content(result['href']);
25
- PROMPT_TEMPLATE = f"""
26
- Extract plant information from the following content in JSON format with these keys:
27
- ["Name", "Scientific Name", "Alternate Names", "Description", "Plant Family",
28
- "Origin", "Growth Habitat", "Active Components", "Treatable Conditions",
29
- "Preparation Methods", "Dosage", "Duration", "Contraindications", "Side Effects",
30
- "Interactions", "Part Used", "Harvesting Time", "Storage Tips", "Images",
31
- "Related Videos", "Sources"]
32
-
33
- Plant: {plant_name}
34
- Content:
35
- {content}
36
-
37
- Output ONLY valid JSON with the specified keys. Use empty strings for missing information."""
38
- response = requests.get(f"{specialtoken}/{PROMPT_TEMPLATE}")
39
- return response.json()
40
- #end Unused
41
-
42
- # Database setup
43
- DB_NAME = "plants.db"
44
-
45
- def init_db():
46
- conn = sqlite3.connect(DB_NAME)
47
- cursor = conn.cursor()
48
- cursor.execute('''
49
- CREATE TABLE IF NOT EXISTS plants (
50
- id INTEGER PRIMARY KEY AUTOINCREMENT,
51
- name TEXT NOT NULL,
52
- scientific_name TEXT,
53
- alternate_names TEXT,
54
- description TEXT,
55
- plant_family TEXT,
56
- origin TEXT,
57
- growth_habitat TEXT,
58
- active_components TEXT,
59
- treatable_conditions TEXT,
60
- preparation_methods TEXT,
61
- dosage TEXT,
62
- duration TEXT,
63
- contraindications TEXT,
64
- side_effects TEXT,
65
- interactions TEXT,
66
- part_used TEXT,
67
- harvesting_time TEXT,
68
- storage_tips TEXT,
69
- images TEXT,
70
- related_videos TEXT,
71
- sources TEXT
72
- )
73
- ''')
74
- conn.commit()
75
- conn.close()
76
-
77
- def process_with_ai(raw_data: str, plant_name: str) -> Dict:
78
- """Process raw plant data through AI to extract structured information"""
79
- PROMPT_TEMPLATE = """You are a botanist assistant that extracts and structures information about medicinal plants."""
80
- PROMPT = f"""Extract plant information from the following content (if available) in JSON format with these keys:
81
- ["Name", "Scientific Name", "Alternate Names", "Description", "Plant Family",
82
- "Origin", "Growth Habitat", "Active Components", "Treatable Conditions",
83
- "Preparation Methods", "Dosage", "Duration", "Contraindications", "Side Effects",
84
- "Interactions", "Part Used", "Harvesting Time", "Storage Tips", "Images",
85
- "Related Videos", "Sources"]
86
-
87
- Plant: {plant_name}
88
- Content:
89
- {raw_data}
90
-
91
- Output ONLY valid JSON with the specified keys. Use empty strings for missing information."""
92
-
93
- payload = {
94
- "model": "openai",
95
- "messages": [
96
- {"role": "system", "content": PROMPT_TEMPLATE},
97
- #{"role": "user", "content": f"Process this information about {plant_name}:\n{raw_data}"}
98
- {"role": "user", "content": PROMPT}
99
- ],
100
- #"response_format": { "type": "json_object" }
101
- }
102
-
103
- try:
104
- resp = requests.post(
105
- specialtoken,
106
- json=payload,
107
- headers={"Content-Type": "application/json"},
108
- timeout=30
109
- )
110
- resp.raise_for_status()
111
-
112
- # Improved response handling
113
- response_json = resp.json()
114
- if "choices" not in response_json or not response_json["choices"]:
115
- raise ValueError("Invalid API response format")
116
-
117
- ai_response = response_json["choices"]["message"]["content"]
118
- print(ai_response)
119
- try:
120
- return json.loads(ai_response)
121
- except json.JSONDecodeError:
122
- # If the response isn't valid JSON, create a minimal response
123
- return {
124
- "Name": plant_name,
125
- "Description": raw_data,
126
- "Sources": "Search results",
127
- "Error": "AI response format invalid"
128
- }
129
-
130
- except Exception as e:
131
- print(f"AI processing error for {plant_name}: {str(e)}")
132
- return {
133
- "Name": plant_name,
134
- "Description": raw_data,
135
- "Sources": "Search results",
136
- "Error": f"Processing failed: {str(e)}"
137
- }
138
-
139
- def fetch_plant_data(plant_name: str) -> Optional[Dict]:
140
- """Fetch and process plant data
141
- Args:
142
- plant_name: The plant name to search on.
143
- Returns:
144
- The top 3 results with title, link and body, or an error message if no results or API issues."""
145
- try:
146
- # 1. Get raw data from internet
147
- query = f"{plant_name} plant medicinal uses scientific information site:.edu OR site:.gov OR site:.org"
148
- search_results = DDGS().text(keywords=query, max_results=3)
149
-
150
- if not search_results:
151
- return {
152
- "Name": plant_name,
153
- "Error": "No search results found"
154
- }
155
-
156
- #raw_data = "\n".join([f"Source: {r['href']}\nContent: {r['body']}" for r in search_results])
157
- raw_data=""
158
- for result in search_results:
159
- raw_data+=fetch_page_content(result['href']);
160
-
161
- # 2. Process with AI
162
- processed_data = process_with_ai(raw_data, plant_name)
163
- if not processed_data:
164
- return {
165
- "Name": plant_name,
166
- "Error": "AI processing returned empty response"
167
- }
168
-
169
- # 3. Add metadata
170
- processed_data["Name"] = plant_name
171
- if "Sources" not in processed_data:
172
- processed_data["Sources"] = "\n".join([r["href"] for r in search_results])
173
-
174
- return processed_data
175
- except Exception as e:
176
- print(f"Error processing {plant_name}: {e}")
177
- return {
178
- "Name": plant_name,
179
- "Error": f"Processing failed: {str(e)}"
180
- }
181
-
182
-
183
- """
184
- Searches DuckDuckGo and returns the first result.
185
- Args:
186
- query: The search query.
187
- Returns:
188
- The first result's title and link, or an error message if no results or API issues.
189
- """
190
-
191
-
192
- #TODO arrange the logic together with tools
193
- def fetch_page_content(url: str):
194
- """Get webpage content with error handling"""
195
- try:
196
- response = requests.get(url, timeout=10)
197
- soup = BeautifulSoup(response.text, 'html.parser')
198
-
199
- # Remove unwanted elements
200
- for element in soup(['script', 'style', 'header', 'footer', 'nav']):
201
- element.decompose()
202
-
203
- text = soup.get_text(separator='\n', strip=True)
204
- for each in ["Page not available","403 Forbidden"]:
205
- if each in text:
206
- return ""
207
- return text[:5000] # Limit to 5k characters
208
- except Exception as e:
209
- return f"Error fetching page: {str(e)}"
210
-
211
- def save_to_db(plant_data: Dict) -> bool:
212
- """Save processed plant data to database"""
213
- try:
214
- conn = sqlite3.connect(DB_NAME)
215
- cursor = conn.cursor()
216
-
217
- # Convert arrays to strings if they exist
218
- for field in ["Alternate Names", "Active Components", "Treatable Conditions",
219
- "Preparation Methods", "Contraindications", "Side Effects",
220
- "Interactions"]:
221
- if field in plant_data:
222
- if isinstance(plant_data[field], list):
223
- plant_data[field] = ", ".join(plant_data[field])
224
- elif not isinstance(plant_data[field], str):
225
- plant_data[field] = str(plant_data[field])
226
-
227
- columns = []
228
- values = []
229
- for key, value in plant_data.items():
230
- if key.lower() == "error": # Skip error field
231
- continue
232
- columns.append(key.lower().replace(" ", "_"))
233
- values.append(str(value) if value else None)
234
-
235
- columns_str = ", ".join(columns)
236
- placeholders = ", ".join(["?"] * len(columns))
237
-
238
- cursor.execute(
239
- f"INSERT INTO plants ({columns_str}) VALUES ({placeholders})",
240
- values
241
- )
242
-
243
- conn.commit()
244
- conn.close()
245
- return True
246
- except Exception as e:
247
- print(f"Database save error: {e}")
248
- return False
249
-
250
- def process_plants(plants_array: List[str]) -> str:
251
- """Main processing pipeline"""
252
- results = []
253
- for plant in plants_array:
254
- plant = plant.strip()
255
- if not plant:
256
- continue
257
-
258
- print(f"Processing {plant}...")
259
- plant_data = fetch_plant_data(plant)
260
-
261
- if plant_data:
262
- save_success = save_to_db(plant_data)
263
- plant_data["Database_Save_Success"] = save_success
264
- results.append(plant_data)
265
-
266
- time.sleep(2) # Rate limiting
267
-
268
- return json.dumps(results, indent=2) if results else json.dumps({"message": "No valid results found"})
269
-
270
-
271
- #For View:
272
- def get_all_plants() -> List[Dict]:
273
- """Retrieve all plants from database"""
274
- try:
275
- conn = sqlite3.connect(DB_NAME)
276
- conn.row_factory = sqlite3.Row
277
- cursor = conn.cursor()
278
- cursor.execute("SELECT `_rowid_`,* FROM plants ORDER BY `_rowid_` DESC")
279
- plants = [dict(row) for row in cursor.fetchall()]
280
- conn.close()
281
- return plants
282
- except Exception as e:
283
- print(f"Database retrieval error: {e}")
284
- return [{"Error": "Failed to retrieve data from database"}]
285
-
286
- # Initialize database
287
- init_db()
288
-
289
- # Gradio Interface
290
- with gr.Blocks(title="AI-Powered Medicinal Plants Database") as app:
291
- gr.Markdown("# 🌿 AI-Powered Medicinal Plants Database")
292
-
293
- with gr.Tab("Fetch & Process Plants"):
294
- gr.Markdown("### Enter plant names (comma separated)")
295
- with gr.Row():
296
- plant_input = gr.Textbox(label="Plant Names",
297
- placeholder="e.g., Neem, Peppermint, Aloe Vera")
298
- fetch_btn = gr.Button("Process Plants", variant="primary")
299
-
300
- json_output = gr.JSON(label="AI-Processed Results")
301
-
302
- fetch_btn.click(
303
- fn=lambda x: process_plants([p.strip() for p in x.split(",")]),
304
- inputs=plant_input,
305
- outputs=json_output
306
- )
307
-
308
- with gr.Tab("View Database"):
309
- gr.Markdown("### Stored Plant Information")
310
- with gr.Row():
311
- refresh_btn = gr.Button("Refresh Data", variant="secondary")
312
- clear_db = gr.Button("Clear Database", variant="stop")
313
-
314
- db_table = gr.Dataframe(
315
- headers=["id", "name", "scientific_name", "description"],
316
- datatype=["number", "str", "str", "str"],
317
- col_count=(4, "fixed"),
318
- interactive=True
319
- )
320
-
321
- refresh_btn.click(
322
- fn=get_all_plants,
323
- outputs=db_table
324
- )
325
-
326
- def clear_database():
327
- conn = sqlite3.connect(DB_NAME)
328
- conn.execute("DELETE FROM plants")
329
- conn.commit()
330
- conn.close()
331
- return []
332
-
333
- clear_db.click(
334
- fn=clear_database,
335
- outputs=db_table
336
- )
337
-
338
- if __name__ == "__main__":
339
- app.launch(debug=True, share=False)
 
1
+ import os
2
+ import gradio as gr
3
+ from duckduckgo_search import DDGS
4
+ import sqlite3
5
+ import json
6
+ import requests
7
+ from typing import List, Dict, Optional
8
+ import time
9
+ from bs4 import BeautifulSoup
10
+
11
+ specialtoken=os.getenv("SPECIALTOKEN")
12
+ #plants=['Turmeric', 'Aloe Vera', 'Neem', 'Tulsi', 'Ashwagandha', 'Ginger', 'Basil', 'Peppermint', 'Lavender', 'Eucalyptus', 'Chamomile', 'Sandalwood', 'Giloy', 'Haritaki', 'Brahmi', 'Gotu Kola', 'Holy Basil', 'Fenugreek', 'Licorice', 'Fennel', 'Cinnamon', 'Clove', 'Black Pepper', 'Cardamom', 'Neem', 'Indian Gooseberry', 'Saffron', 'Thyme', 'Valerian', 'Marigold', 'Ginseng', 'Dandelion', 'Hibiscus', 'Milk Thistle', 'Magnolia', "St. John's Wort", 'Yarrow', 'Calendula', 'Coriander', 'Senna', 'Echinacea', 'Moringa', 'Plantain', 'Amla', 'Shatavari', 'Peppermint', 'Chamomile', 'Gotu Kola', 'Ashoka', 'Arnica', 'Burdock Root', "Cat's Claw", "Devil's Claw", 'Elderberry', 'Feverfew', 'Ginkgo Biloba', 'Goldenseal', 'Hawthorn', 'Kava', 'Lemon Balm', 'Marshmallow Root', 'Nettle', 'Olive Leaf', 'Passionflower', 'Red Clover', 'Reishi Mushroom', 'Rhodiola', 'Sage', 'Saw Palmetto', 'Slippery Elm', 'Stinging Nettle', 'Witch Hazel', 'Yellow Dock', 'Ashitaba', 'Bael', 'Bacopa', 'Cumin', 'Guduchi', 'Jamun', 'Jatamansi', 'Karela', 'Gudmar', 'Schisandra', 'Baikal Skullcap', 'Mullein', 'Chrysanthemum', 'Catuaba', 'Dong Quai', 'Jiaogulan', 'Muira Puama', 'Catnip', 'Olive']
13
+ plants = ["Echinacea", "Ginkgo biloba", "Turmeric"]
14
+
15
+ PROMPT_TEMPLATE = """
16
+ Extract plant information from the following content in JSON format with these keys:
17
+ ["Name", "Scientific Name", "Alternate Names", "Description", "Plant Family",
18
+ "Origin", "Growth Habitat", "Active Components", "Treatable Conditions",
19
+ "Preparation Methods", "Dosage", "Duration", "Contraindications", "Side Effects",
20
+ "Interactions", "Part Used", "Harvesting Time", "Storage Tips", "Images",
21
+ "Related Videos", "Sources"]
22
+
23
+ Plant: {plant_name}
24
+ Content:
25
+ {content}
26
+
27
+ Output ONLY valid JSON with the specified keys. Use empty strings for missing information.
28
+ """
29
+
30
+ def fetch_page_content(url: str):
31
+ """Get webpage content with error handling"""
32
+ try:
33
+ response = requests.get(url, timeout=10)
34
+ soup = BeautifulSoup(response.text, 'html.parser')
35
+
36
+ # Remove unwanted elements
37
+ for element in soup(['script', 'style', 'header', 'footer', 'nav']):
38
+ element.decompose()
39
+
40
+ text = soup.get_text(separator='\n', strip=True)
41
+ for each in ["Page not available","403 Forbidden"]:
42
+ if each in text:
43
+ return ""
44
+ return text[:5000] # Limit to 5k characters
45
+ except Exception as e:
46
+ return f"Error fetching page: {str(e)}"
47
+
48
+ def search_full_plant_information(plant_name:str):
49
+ """ """
50
+ query = f"{plant_name} plant medicinal uses scientific information site:.edu OR site:.gov OR site:.org"
51
+ counter=0
52
+ while True:
53
+ counter+=1
54
+ print(counter)
55
+ try:
56
+ search_results = DDGS().text(keywords=query, max_results=3)
57
+ break
58
+ except Exception as e :
59
+ time.sleep(2)
60
+ pass
61
+ content=""
62
+ for result in search_results:
63
+ content+=fetch_page_content(result['href']);
64
+ prompt = PROMPT_TEMPLATE.format(plant_name=plant_name, content=content)
65
+ response = requests.get(f"{specialtoken}/{prompt}")
66
+ print (response)
67
+ return response.json()
68
+
69
+ DB_NAME="plants.db"
70
+ def save_to_db(plant_data: Dict) -> bool:
71
+ """Save processed plant data to database"""
72
+ try:
73
+ conn = sqlite3.connect(DB_NAME)
74
+ cursor = conn.cursor()
75
+
76
+ # Convert arrays to strings if they exist
77
+ for field in ["Alternate Names", "Active Components", "Treatable Conditions",
78
+ "Preparation Methods", "Contraindications", "Side Effects",
79
+ "Interactions"]:
80
+ if field in plant_data:
81
+ if isinstance(plant_data[field], list):
82
+ plant_data[field] = ", ".join(plant_data[field])
83
+ elif not isinstance(plant_data[field], str):
84
+ plant_data[field] = str(plant_data[field])
85
+
86
+ columns = []
87
+ values = []
88
+ for key, value in plant_data.items():
89
+ if key.lower() == "error": # Skip error field
90
+ continue
91
+ columns.append(key.lower().replace(" ", "_"))
92
+ values.append(str(value) if value else None)
93
+
94
+ columns_str = ", ".join(columns)
95
+ placeholders = ", ".join(["?"] * len(columns))
96
+
97
+ cursor.execute(
98
+ f"INSERT INTO plants ({columns_str}) VALUES ({placeholders})",
99
+ values
100
+ )
101
+
102
+ conn.commit()
103
+ conn.close()
104
+ return True
105
+ except Exception as e:
106
+ print(f"Database save error: {e}")
107
+ return False
108
+
109
+
110
+ def process_plants(plants_array: List[str]) -> str:
111
+ """Main processing pipeline"""
112
+ results = []
113
+ for plant in plants_array:
114
+ plant = plant.strip()
115
+ if not plant:
116
+ continue
117
+
118
+ print(f"Processing {plant}...")
119
+ plant_data = search_full_plant_information(plant)
120
+
121
+ if plant_data:
122
+ save_success = save_to_db(plant_data)
123
+ plant_data["Database_Save_Success"] = save_success
124
+ results.append(plant_data)
125
+
126
+ time.sleep(2) # Rate limiting
127
+
128
+ return json.dumps(results, indent=2) if results else json.dumps({"message": "No valid results found"})
129
+
130
+
131
+ #use it here :
132
+ #process_plants(plants)
133
+
134
+ #or use interface:
135
+ with gr.Blocks(title="AI-Powered Medicinal Plants Database") as app:
136
+ gr.Markdown("# 🌿 AI-Powered Medicinal Plants Database")
137
+ with gr.Tab("Fetch & Process Plants"):
138
+ gr.Markdown("### Enter plant names (comma separated)")
139
+ with gr.Row():
140
+ plant_input = gr.Textbox(label="Plant Names",
141
+ placeholder="e.g., Neem, Peppermint, Aloe Vera")
142
+ fetch_btn = gr.Button("Process Plants", variant="primary")
143
+
144
+ json_output = gr.JSON(label="AI-Processed Results")
145
+
146
+ fetch_btn.click(
147
+ fn=process_plants(plants),
148
+ #fn=lambda x: process_plants([p.strip() for p in x.split(",")]),
149
+ #inputs=plant_input,
150
+ outputs=json_output
151
+ )
152
+
153
+ if __name__ == "__main__":
154
+ app.launch(debug=True, share=False)