Spaces:
Sleeping
Sleeping
Vaishnav14220
commited on
Commit
Β·
40820ca
1
Parent(s):
d9b92b2
Add Hugging Face dataset push functionality for permanent database updates
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- app.py +46 -6
- requirements.txt +2 -1
__pycache__/app.cpython-311.pyc
ADDED
|
Binary file (36.4 kB). View file
|
|
|
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
-
from datasets import load_dataset
|
| 4 |
from fuzzywuzzy import process
|
| 5 |
from rdkit import Chem
|
| 6 |
from rdkit.Chem import AllChem, Draw
|
|
@@ -15,6 +15,7 @@ from reportlab.lib.pagesizes import letter
|
|
| 15 |
from reportlab.lib.styles import getSampleStyleSheet
|
| 16 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
|
| 17 |
from svglib.svglib import svg2rlg
|
|
|
|
| 18 |
|
| 19 |
# Load dataset
|
| 20 |
dataset = load_dataset("smitathkr1/organic_reactions_corrected")
|
|
@@ -309,25 +310,64 @@ def update_database_with_ai_fix(password):
|
|
| 309 |
if idx not in df.index:
|
| 310 |
return f"β Row index {idx} not found in database. The data may have been modified."
|
| 311 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
df.at[idx, 'corrected_name'] = last_ai_fix['updated_data'].get('reaction name', last_ai_fix['reaction_name'])
|
| 313 |
df.at[idx, 'general_reactants'] = ', '.join(last_ai_fix['updated_data'].get('reactants', []))
|
| 314 |
df.at[idx, 'general_reagents'] = ', '.join(last_ai_fix['updated_data'].get('reagents', []))
|
| 315 |
df.at[idx, 'general_products'] = ', '.join(last_ai_fix['updated_data'].get('products', []))
|
| 316 |
df.at[idx, 'description'] = last_ai_fix['updated_data'].get('description', df.at[idx, 'description'])
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
# Log the update
|
| 319 |
-
log_entry = f"[{last_ai_fix['timestamp']}] Database updated: '{
|
| 320 |
with open('database_updates.log', 'a') as f:
|
| 321 |
f.write(log_entry)
|
| 322 |
|
| 323 |
-
# Clear the last fix
|
| 324 |
-
last_ai_fix = None
|
| 325 |
-
|
| 326 |
# Update the global reaction_names list in case the name changed
|
| 327 |
global reaction_names
|
| 328 |
reaction_names = df['corrected_name'].unique().tolist()
|
| 329 |
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
except Exception as e:
|
| 333 |
return f"β Error updating database: {str(e)}"
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
from datasets import load_dataset, Dataset
|
| 4 |
from fuzzywuzzy import process
|
| 5 |
from rdkit import Chem
|
| 6 |
from rdkit.Chem import AllChem, Draw
|
|
|
|
| 15 |
from reportlab.lib.styles import getSampleStyleSheet
|
| 16 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
|
| 17 |
from svglib.svglib import svg2rlg
|
| 18 |
+
from huggingface_hub import HfApi
|
| 19 |
|
| 20 |
# Load dataset
|
| 21 |
dataset = load_dataset("smitathkr1/organic_reactions_corrected")
|
|
|
|
| 310 |
if idx not in df.index:
|
| 311 |
return f"β Row index {idx} not found in database. The data may have been modified."
|
| 312 |
|
| 313 |
+
# Store original values for logging
|
| 314 |
+
original_name = df.at[idx, 'corrected_name']
|
| 315 |
+
|
| 316 |
+
# Update the dataframe
|
| 317 |
df.at[idx, 'corrected_name'] = last_ai_fix['updated_data'].get('reaction name', last_ai_fix['reaction_name'])
|
| 318 |
df.at[idx, 'general_reactants'] = ', '.join(last_ai_fix['updated_data'].get('reactants', []))
|
| 319 |
df.at[idx, 'general_reagents'] = ', '.join(last_ai_fix['updated_data'].get('reagents', []))
|
| 320 |
df.at[idx, 'general_products'] = ', '.join(last_ai_fix['updated_data'].get('products', []))
|
| 321 |
df.at[idx, 'description'] = last_ai_fix['updated_data'].get('description', df.at[idx, 'description'])
|
| 322 |
|
| 323 |
+
# Try to push to Hugging Face
|
| 324 |
+
hf_token = os.getenv('HF_TOKEN')
|
| 325 |
+
if hf_token:
|
| 326 |
+
try:
|
| 327 |
+
# Convert back to Hugging Face dataset
|
| 328 |
+
updated_dataset = Dataset.from_pandas(df)
|
| 329 |
+
|
| 330 |
+
# Initialize HF API
|
| 331 |
+
api = HfApi()
|
| 332 |
+
|
| 333 |
+
# Push to Hugging Face
|
| 334 |
+
updated_dataset.push_to_hub(
|
| 335 |
+
"smitathkr1/organic_reactions_corrected",
|
| 336 |
+
token=hf_token,
|
| 337 |
+
commit_message=f"AI fix: Updated reaction '{original_name}' -> '{df.at[idx, 'corrected_name']}'"
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
push_success = True
|
| 341 |
+
except Exception as push_error:
|
| 342 |
+
push_success = False
|
| 343 |
+
push_error_msg = str(push_error)
|
| 344 |
+
else:
|
| 345 |
+
push_success = False
|
| 346 |
+
push_error_msg = "HF_TOKEN not found in environment variables"
|
| 347 |
+
|
| 348 |
# Log the update
|
| 349 |
+
log_entry = f"[{last_ai_fix['timestamp']}] Database updated: '{original_name}' -> '{df.at[idx, 'corrected_name']}' | HF Push: {'Success' if push_success else 'Failed: ' + push_error_msg}\n"
|
| 350 |
with open('database_updates.log', 'a') as f:
|
| 351 |
f.write(log_entry)
|
| 352 |
|
|
|
|
|
|
|
|
|
|
| 353 |
# Update the global reaction_names list in case the name changed
|
| 354 |
global reaction_names
|
| 355 |
reaction_names = df['corrected_name'].unique().tolist()
|
| 356 |
|
| 357 |
+
# Clear the last fix
|
| 358 |
+
last_ai_fix = None
|
| 359 |
+
|
| 360 |
+
success_msg = "β
**Database Updated Successfully!**\n\n"
|
| 361 |
+
if push_success:
|
| 362 |
+
success_msg += "The reaction has been permanently updated on Hugging Face and is now live!\n\n"
|
| 363 |
+
else:
|
| 364 |
+
success_msg += "The reaction has been updated in the current session.\n"
|
| 365 |
+
success_msg += f"**Note:** Could not push to Hugging Face: {push_error_msg}\n"
|
| 366 |
+
success_msg += "Please check that HF_TOKEN is set in space secrets.\n\n"
|
| 367 |
+
|
| 368 |
+
success_msg += "Changes logged to 'database_updates.log'."
|
| 369 |
+
|
| 370 |
+
return success_msg
|
| 371 |
|
| 372 |
except Exception as e:
|
| 373 |
return f"β Error updating database: {str(e)}"
|
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ python-levenshtein
|
|
| 6 |
rdkit
|
| 7 |
reportlab
|
| 8 |
svglib
|
| 9 |
-
google-genai
|
|
|
|
|
|
| 6 |
rdkit
|
| 7 |
reportlab
|
| 8 |
svglib
|
| 9 |
+
google-genai
|
| 10 |
+
huggingface_hub
|