Spaces:

Clivora
/

data-upload

Sleeping

App Files Files Community

DevClivora commited on Jul 9, 2025

Commit

8c12eef

verified ·

1 Parent(s): 28252e6

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -8

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # app.py
-# A Flask API to be hosted on a Hugging Face Space.
-# This version is fixed to avoid the /.cache permission error.
 import os
 import pandas as pd
@@ -11,24 +10,26 @@ from huggingface_hub import HfApi
 from datetime import datetime
 # --- Initialization ---
 app = Flask(__name__)
 CORS(app)
 # --- Configuration ---
-# Get credentials and config from the Space's secrets.
 try:
     HF_TOKEN = os.environ["HF_TOKEN"]
     AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"]
     TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]
 except KeyError as e:
     print(f"FATAL ERROR: Missing secret environment variable: {e}")
     HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
 # --- Hugging Face API Client ---
-# The HfApi client will automatically use the HF_TOKEN from the environment variables.
-# No need for HfFolder.save_token().
 if HF_TOKEN:
     api = HfApi(token=HF_TOKEN)
 else:
     api = None
     print("Warning: HfApi not initialized because HF_TOKEN is not set.")
@@ -40,11 +41,12 @@ def get_unique_filename():
     return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
-# --- API Endpoints (No changes needed here) ---
 @app.route('/')
 def index():
     """A simple index route to confirm the API is running."""
     return "Hugging Face Data Uploader API is running."
 @app.route('/add-audio', methods=['POST'])
@@ -52,16 +54,24 @@ def add_audio_data():
     """
     Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
     """
     if not api:
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
         data = request.get_json()
         if not data or 'audio' not in data or 'transcription' not in data:
             return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
         df = pd.DataFrame([data])
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
         buffer.seek(0)
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
@@ -69,9 +79,11 @@ def add_audio_data():
             repo_type="dataset",
             commit_message="Add new audio-transcription pair"
         )
         return jsonify({"message": "Audio data added successfully."}), 201
     except Exception as e:
-        print(f"Error in /add-audio: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
@@ -80,16 +92,24 @@ def add_text_data():
     """
     Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
     """
     if not api:
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
         data = request.get_json()
         if not data or 'transcription' not in data or 'summary' not in data:
             return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
         df = pd.DataFrame([data])
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
         buffer.seek(0)
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
@@ -97,11 +117,14 @@ def add_text_data():
             repo_type="dataset",
             commit_message="Add new transcription-summary pair"
         )
         return jsonify({"message": "Text data added successfully."}), 201
     except Exception as e:
-        print(f"Error in /add-text: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
 # To run on Hugging Face Spaces
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 # app.py
+# A Flask API with extra print statements for debugging on Hugging Face Spaces.
 import os
 import pandas as pd
 from datetime import datetime
 # --- Initialization ---
+print("--- Flask app.py is starting up! ---")
 app = Flask(__name__)
 CORS(app)
 # --- Configuration ---
+print("Loading environment variables...")
 try:
     HF_TOKEN = os.environ["HF_TOKEN"]
     AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"]
     TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]
+    print("Successfully loaded all required environment variables.")
 except KeyError as e:
     print(f"FATAL ERROR: Missing secret environment variable: {e}")
     HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
 # --- Hugging Face API Client ---
 if HF_TOKEN:
+    print("Initializing HfApi client...")
     api = HfApi(token=HF_TOKEN)
+    print("HfApi client initialized.")
 else:
     api = None
     print("Warning: HfApi not initialized because HF_TOKEN is not set.")
     return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
+# --- API Endpoints ---
 @app.route('/')
 def index():
     """A simple index route to confirm the API is running."""
+    print("Request received for / route.")
     return "Hugging Face Data Uploader API is running."
 @app.route('/add-audio', methods=['POST'])
     """
     Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
     """
+    print("Request received for /add-audio route.")
     if not api:
+        print("Error: API client not available.")
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
+        print("Attempting to process /add-audio data...")
         data = request.get_json()
         if not data or 'audio' not in data or 'transcription' not in data:
+            print("Invalid payload received for /add-audio.")
             return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
+        print("Data validated. Creating DataFrame.")
         df = pd.DataFrame([data])
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
         buffer.seek(0)
+        print(f"Uploading file to audio dataset: {AUDIO_DATASET_REPO_ID}")
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
             repo_type="dataset",
             commit_message="Add new audio-transcription pair"
         )
+        print("File successfully uploaded to audio dataset.")
         return jsonify({"message": "Audio data added successfully."}), 201
     except Exception as e:
+        print(f"---! UNEXPECTED ERROR in /add-audio !---: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
     """
     Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
     """
+    print("Request received for /add-text route.")
     if not api:
+        print("Error: API client not available.")
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
+        print("Attempting to process /add-text data...")
         data = request.get_json()
         if not data or 'transcription' not in data or 'summary' not in data:
+            print("Invalid payload received for /add-text.")
             return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
+        print("Data validated. Creating DataFrame.")
         df = pd.DataFrame([data])
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
         buffer.seek(0)
+        print(f"Uploading file to text dataset: {TEXT_DATASET_REPO_ID}")
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
             repo_type="dataset",
             commit_message="Add new transcription-summary pair"
         )
+        print("File successfully uploaded to text dataset.")
         return jsonify({"message": "Text data added successfully."}), 201
     except Exception as e:
+        print(f"---! UNEXPECTED ERROR in /add-text !---: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
 # To run on Hugging Face Spaces
 if __name__ == '__main__':
+    print("Starting Flask development server...")
     app.run(host='0.0.0.0', port=7860)