Spaces:

HUBioDataLab
/

PROBE

Running

App Files Files Community

mgyigit commited on Nov 25, 2024

Commit

4670ac7

verified ·

1 Parent(s): 8903ad9

Update src/saving_utils.py

Browse files

Files changed (1) hide show

src/saving_utils.py +54 -13

src/saving_utils.py CHANGED Viewed

@@ -1,16 +1,47 @@
 import os
 import pandas as pd
-script_dir = os.path.dirname(os.path.abspath(__file__))  # Directory of the running script
-def save_similarity_output(output_dict, method_name, leaderboard_path="data/leaderboard_results.csv", similarity_path="data/similarity_results.csv"):
-    leaderboard_path = os.path.join(script_dir, leaderboard_path)
-    similarity_path = os.path.join(script_dir, similarity_path)
-    with open("test_write.txt", "w") as f:
-        f.write("Write test successful!")
     # Load or initialize the DataFrames
     if os.path.exists(leaderboard_path):
         leaderboard_df = pd.read_csv(leaderboard_path)
@@ -24,7 +55,6 @@ def save_similarity_output(output_dict, method_name, leaderboard_path="data/lead
         print("Similarity file not found!")
         return -1
-    # Ensure the method exists in the similarity DataFrame
     if method_name not in similarity_df['Method'].values:
         # Create a new row for the method with default values
         new_row = {col: None for col in similarity_df.columns}
@@ -74,11 +104,22 @@ def save_similarity_output(output_dict, method_name, leaderboard_path="data/lead
             similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
             leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
-    # Save the updated DataFrames back to CSV
-    similarity_df.to_csv(similarity_path, index=False)
-    leaderboard_df.to_csv(leaderboard_path, index=False)
-    print(f"Updated files saved to {similarity_path} and {leaderboard_path}")
     return 0
 def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):

 import os
 import pandas as pd
+from huggingface_hub import HfApi
+script_dir = os.path.dirname(os.path.abspath(__file__))  # Directory of the running script
+def save_csv_locally(dataframe, file_name, save_dir="/tmp"):
+    # Ensure the save directory exists
+    os.makedirs(save_dir, exist_ok=True)
+    # Construct the full file path
+    file_path = os.path.join(save_dir, file_name)
+    # Save the DataFrame as a CSV
+    dataframe.to_csv(file_path, index=False)
+    print(f"Saved {file_name} to {file_path}")
+    return file_path
+def upload_to_hub(local_path, remote_path, repo_id, repo_type="dataset"):
+    api = HfApi()  # Requires authentication via HF_TOKEN
+    api.upload_file(
+        path_or_fileobj=local_path,
+        path_in_repo=remote_path,
+        repo_id=repo_id,
+        repo_type=repo_type,
+        commit_message=f"Updating {os.path.basename(remote_path)}"
+    )
+    print(f"Uploaded {local_path} to {repo_id}/{remote_path}")
+def cleanup_local_file(file_path):
+    if os.path.exists(file_path):
+        os.remove(file_path)
+        print(f"Removed local file: {file_path}")
+def save_similarity_output(
+    output_dict,
+    method_name,
+    leaderboard_path="/home/user/app/src/data/leaderboard_results.csv",
+    similarity_path="/home/user/app/src/data/similarity_results.csv",
+    repo_id="mgyigit/probe3",
+):
     # Load or initialize the DataFrames
     if os.path.exists(leaderboard_path):
         leaderboard_df = pd.read_csv(leaderboard_path)
         print("Similarity file not found!")
         return -1
     if method_name not in similarity_df['Method'].values:
         # Create a new row for the method with default values
         new_row = {col: None for col in similarity_df.columns}
             similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
             leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
+    # Save locally to a temporary directory
+    leaderboard_file = save_csv_locally(leaderboard_df, "leaderboard_results.csv")
+    similarity_file = save_csv_locally(similarity_df, "similarity_results.csv")
+    # Upload to Hugging Face Hub
+    try:
+        upload_to_hub(leaderboard_file, "leaderboard_results.csv", repo_id)
+        upload_to_hub(similarity_file, "similarity_results.csv", repo_id)
+    except Exception as e:
+        print(f"Failed to upload files: {e}")
+        return -1
+    # Clean up local files
+    cleanup_local_file(leaderboard_file)
+    cleanup_local_file(similarity_file)
     return 0
 def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):