Spaces:

Clivora
/

data-upload

Sleeping

App Files Files Community

DevClivora commited on Jul 9, 2025

Commit

0737003

1 Parent(s): 8fe7a42

permission error fix

Browse files

Files changed (2) hide show

Dockerfile +11 -8
app.py +13 -45

Dockerfile CHANGED Viewed

@@ -7,19 +7,22 @@ FROM python:3.13-slim
 # 2. Set the working directory inside the container.
 WORKDIR /code
-# 3. Copy the requirements file into the working directory.
 COPY ./requirements.txt /code/requirements.txt
-# 4. Install the Python dependencies specified in the requirements file.
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
-# 5. Copy the rest of your application's code (e.g., app.py) into the working directory.
 COPY ./ /code/
-# 6. Expose the port that the application will run on.
-# Hugging Face Spaces typically use port 7860.
 EXPOSE 7860
-# 7. Define the command to run the application using Gunicorn.
-# Gunicorn is a robust production web server for Python.
-CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]

 # 2. Set the working directory inside the container.
 WORKDIR /code
+# --- FIX ---
+# 3. Set the HF_HOME environment variable to a writable directory.
+# This prevents the "Permission denied: '/.cache'" error.
+ENV HF_HOME="/tmp/.cache/huggingface"
+# 4. Copy the requirements file into the working directory.
 COPY ./requirements.txt /code/requirements.txt
+# 5. Install the Python dependencies specified in the requirements file.
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# 6. Copy the rest of your application's code (e.g., app.py) into the working directory.
 COPY ./ /code/
+# 7. Expose the port that the application will run on.
 EXPOSE 7860
+# 8. Define the command to run the application using Gunicorn.
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]

app.py CHANGED Viewed

@@ -1,43 +1,34 @@
 # app.py
 # A Flask API to be hosted on a Hugging Face Space.
-# This API receives data, converts it to Parquet format, and uploads it to specified HF datasets.
-# This version includes CORS support to be called directly from a frontend application.
 import os
 import pandas as pd
 import io
 from flask import Flask, request, jsonify
-from flask_cors import CORS  # Import CORS
-from huggingface_hub import HfApi, HfFolder
 from datetime import datetime
 # --- Initialization ---
 app = Flask(__name__)
-# --- Enable CORS ---
-# This allows the API to accept requests from different domains (i.e., your frontend).
-# For production, you might want to restrict origins for better security.
-# e.g., CORS(app, origins=["https://your-frontend-domain.com"])
 CORS(app)
 # --- Configuration ---
-# It's crucial to get these values from the Hugging Face Space's secrets for security.
-# DO NOT hardcode them here.
 try:
     HF_TOKEN = os.environ["HF_TOKEN"]
-    AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"] # E.g., "YourUsername/audio-clips-dataset"
-    TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]   # E.g., "YourUsername/transcription-summaries-dataset"
 except KeyError as e:
     print(f"FATAL ERROR: Missing secret environment variable: {e}")
-    # In a real scenario, the app should fail fast if config is missing.
     HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
 # --- Hugging Face API Client ---
-# Authenticate and initialize the Hub API client.
 if HF_TOKEN:
-    HfFolder.save_token(HF_TOKEN)
-    api = HfApi()
 else:
     api = None
     print("Warning: HfApi not initialized because HF_TOKEN is not set.")
@@ -49,7 +40,7 @@ def get_unique_filename():
     return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
-# --- API Endpoints ---
 @app.route('/')
 def index():
@@ -60,26 +51,17 @@ def index():
 def add_audio_data():
     """
     Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
-    Expected JSON payload: { "audio": "<base64_string>", "transcription": "<string>" }
     """
     if not api:
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
-        # 1. Get and validate the JSON data from the request
         data = request.get_json()
         if not data or 'audio' not in data or 'transcription' not in data:
             return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
-        # 2. Convert the data into a Pandas DataFrame
         df = pd.DataFrame([data])
-        # 3. Convert the DataFrame to a Parquet file in an in-memory buffer
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
-        buffer.seek(0) # Rewind the buffer to the beginning
-        # 4. Upload the in-memory file to the Hugging Face Hub dataset repository
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
@@ -87,9 +69,7 @@ def add_audio_data():
             repo_type="dataset",
             commit_message="Add new audio-transcription pair"
         )
         return jsonify({"message": "Audio data added successfully."}), 201
     except Exception as e:
         print(f"Error in /add-audio: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
@@ -99,26 +79,17 @@ def add_audio_data():
 def add_text_data():
     """
     Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
-    Expected JSON payload: { "transcription": "<string>", "summary": "<string>" }
     """
     if not api:
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
-        # 1. Get and validate the JSON data
         data = request.get_json()
         if not data or 'transcription' not in data or 'summary' not in data:
             return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
-        # 2. Convert to DataFrame
         df = pd.DataFrame([data])
-        # 3. Convert to in-memory Parquet file
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
         buffer.seek(0)
-        # 4. Upload the file to the Hub
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
@@ -126,14 +97,11 @@ def add_text_data():
             repo_type="dataset",
             commit_message="Add new transcription-summary pair"
         )
         return jsonify({"message": "Text data added successfully."}), 201
     except Exception as e:
         print(f"Error in /add-text: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
-# To run on Hugging Face Spaces, the app needs to be runnable.
-# The standard port for Spaces is 7860.
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860)

 # app.py
 # A Flask API to be hosted on a Hugging Face Space.
+# This version is fixed to avoid the /.cache permission error.
 import os
 import pandas as pd
 import io
 from flask import Flask, request, jsonify
+from flask_cors import CORS
+from huggingface_hub import HfApi
 from datetime import datetime
 # --- Initialization ---
 app = Flask(__name__)
 CORS(app)
 # --- Configuration ---
+# Get credentials and config from the Space's secrets.
 try:
     HF_TOKEN = os.environ["HF_TOKEN"]
+    AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"]
+    TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]
 except KeyError as e:
     print(f"FATAL ERROR: Missing secret environment variable: {e}")
     HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
 # --- Hugging Face API Client ---
+# The HfApi client will automatically use the HF_TOKEN from the environment variables.
+# No need for HfFolder.save_token().
 if HF_TOKEN:
+    api = HfApi(token=HF_TOKEN)
 else:
     api = None
     print("Warning: HfApi not initialized because HF_TOKEN is not set.")
     return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
+# --- API Endpoints (No changes needed here) ---
 @app.route('/')
 def index():
 def add_audio_data():
     """
     Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
     """
     if not api:
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
         data = request.get_json()
         if not data or 'audio' not in data or 'transcription' not in data:
             return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
         df = pd.DataFrame([data])
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
+        buffer.seek(0)
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
             repo_type="dataset",
             commit_message="Add new audio-transcription pair"
         )
         return jsonify({"message": "Audio data added successfully."}), 201
     except Exception as e:
         print(f"Error in /add-audio: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
 def add_text_data():
     """
     Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
     """
     if not api:
         return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
     try:
         data = request.get_json()
         if not data or 'transcription' not in data or 'summary' not in data:
             return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
         df = pd.DataFrame([data])
         buffer = io.BytesIO()
         df.to_parquet(buffer, index=False, engine='pyarrow')
         buffer.seek(0)
         api.upload_file(
             path_or_fileobj=buffer,
             path_in_repo=get_unique_filename(),
             repo_type="dataset",
             commit_message="Add new transcription-summary pair"
         )
         return jsonify({"message": "Text data added successfully."}), 201
     except Exception as e:
         print(f"Error in /add-text: {e}")
         return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
+# To run on Hugging Face Spaces
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)