Vaishnav14220 commited on
Commit
40820ca
Β·
1 Parent(s): d9b92b2

Add Hugging Face dataset push functionality for permanent database updates

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-311.pyc +0 -0
  2. app.py +46 -6
  3. requirements.txt +2 -1
__pycache__/app.cpython-311.pyc ADDED
Binary file (36.4 kB). View file
 
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import pandas as pd
3
- from datasets import load_dataset
4
  from fuzzywuzzy import process
5
  from rdkit import Chem
6
  from rdkit.Chem import AllChem, Draw
@@ -15,6 +15,7 @@ from reportlab.lib.pagesizes import letter
15
  from reportlab.lib.styles import getSampleStyleSheet
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
17
  from svglib.svglib import svg2rlg
 
18
 
19
  # Load dataset
20
  dataset = load_dataset("smitathkr1/organic_reactions_corrected")
@@ -309,25 +310,64 @@ def update_database_with_ai_fix(password):
309
  if idx not in df.index:
310
  return f"❌ Row index {idx} not found in database. The data may have been modified."
311
 
 
 
 
 
312
  df.at[idx, 'corrected_name'] = last_ai_fix['updated_data'].get('reaction name', last_ai_fix['reaction_name'])
313
  df.at[idx, 'general_reactants'] = ', '.join(last_ai_fix['updated_data'].get('reactants', []))
314
  df.at[idx, 'general_reagents'] = ', '.join(last_ai_fix['updated_data'].get('reagents', []))
315
  df.at[idx, 'general_products'] = ', '.join(last_ai_fix['updated_data'].get('products', []))
316
  df.at[idx, 'description'] = last_ai_fix['updated_data'].get('description', df.at[idx, 'description'])
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  # Log the update
319
- log_entry = f"[{last_ai_fix['timestamp']}] Database updated: '{last_ai_fix['reaction_name']}' -> '{last_ai_fix['updated_data'].get('reaction name', last_ai_fix['reaction_name'])}'\n"
320
  with open('database_updates.log', 'a') as f:
321
  f.write(log_entry)
322
 
323
- # Clear the last fix
324
- last_ai_fix = None
325
-
326
  # Update the global reaction_names list in case the name changed
327
  global reaction_names
328
  reaction_names = df['corrected_name'].unique().tolist()
329
 
330
- return f"βœ… **Database Updated Successfully!**\n\nThe reaction has been permanently updated in the current session. Changes logged to 'database_updates.log'.\n\n**Note:** Since this is a Hugging Face Space, the changes are session-only. To make permanent changes, please update your local dataset and re-upload."
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  except Exception as e:
333
  return f"❌ Error updating database: {str(e)}"
 
1
  import gradio as gr
2
  import pandas as pd
3
+ from datasets import load_dataset, Dataset
4
  from fuzzywuzzy import process
5
  from rdkit import Chem
6
  from rdkit.Chem import AllChem, Draw
 
15
  from reportlab.lib.styles import getSampleStyleSheet
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
17
  from svglib.svglib import svg2rlg
18
+ from huggingface_hub import HfApi
19
 
20
  # Load dataset
21
  dataset = load_dataset("smitathkr1/organic_reactions_corrected")
 
310
  if idx not in df.index:
311
  return f"❌ Row index {idx} not found in database. The data may have been modified."
312
 
313
+ # Store original values for logging
314
+ original_name = df.at[idx, 'corrected_name']
315
+
316
+ # Update the dataframe
317
  df.at[idx, 'corrected_name'] = last_ai_fix['updated_data'].get('reaction name', last_ai_fix['reaction_name'])
318
  df.at[idx, 'general_reactants'] = ', '.join(last_ai_fix['updated_data'].get('reactants', []))
319
  df.at[idx, 'general_reagents'] = ', '.join(last_ai_fix['updated_data'].get('reagents', []))
320
  df.at[idx, 'general_products'] = ', '.join(last_ai_fix['updated_data'].get('products', []))
321
  df.at[idx, 'description'] = last_ai_fix['updated_data'].get('description', df.at[idx, 'description'])
322
 
323
+ # Try to push to Hugging Face
324
+ hf_token = os.getenv('HF_TOKEN')
325
+ if hf_token:
326
+ try:
327
+ # Convert back to Hugging Face dataset
328
+ updated_dataset = Dataset.from_pandas(df)
329
+
330
+ # Initialize HF API
331
+ api = HfApi()
332
+
333
+ # Push to Hugging Face
334
+ updated_dataset.push_to_hub(
335
+ "smitathkr1/organic_reactions_corrected",
336
+ token=hf_token,
337
+ commit_message=f"AI fix: Updated reaction '{original_name}' -> '{df.at[idx, 'corrected_name']}'"
338
+ )
339
+
340
+ push_success = True
341
+ except Exception as push_error:
342
+ push_success = False
343
+ push_error_msg = str(push_error)
344
+ else:
345
+ push_success = False
346
+ push_error_msg = "HF_TOKEN not found in environment variables"
347
+
348
  # Log the update
349
+ log_entry = f"[{last_ai_fix['timestamp']}] Database updated: '{original_name}' -> '{df.at[idx, 'corrected_name']}' | HF Push: {'Success' if push_success else 'Failed: ' + push_error_msg}\n"
350
  with open('database_updates.log', 'a') as f:
351
  f.write(log_entry)
352
 
 
 
 
353
  # Update the global reaction_names list in case the name changed
354
  global reaction_names
355
  reaction_names = df['corrected_name'].unique().tolist()
356
 
357
+ # Clear the last fix
358
+ last_ai_fix = None
359
+
360
+ success_msg = "βœ… **Database Updated Successfully!**\n\n"
361
+ if push_success:
362
+ success_msg += "The reaction has been permanently updated on Hugging Face and is now live!\n\n"
363
+ else:
364
+ success_msg += "The reaction has been updated in the current session.\n"
365
+ success_msg += f"**Note:** Could not push to Hugging Face: {push_error_msg}\n"
366
+ success_msg += "Please check that HF_TOKEN is set in space secrets.\n\n"
367
+
368
+ success_msg += "Changes logged to 'database_updates.log'."
369
+
370
+ return success_msg
371
 
372
  except Exception as e:
373
  return f"❌ Error updating database: {str(e)}"
requirements.txt CHANGED
@@ -6,4 +6,5 @@ python-levenshtein
6
  rdkit
7
  reportlab
8
  svglib
9
- google-genai
 
 
6
  rdkit
7
  reportlab
8
  svglib
9
+ google-genai
10
+ huggingface_hub