DevClivora commited on
Commit
0737003
·
1 Parent(s): 8fe7a42

permission error fix

Browse files
Files changed (2) hide show
  1. Dockerfile +11 -8
  2. app.py +13 -45
Dockerfile CHANGED
@@ -7,19 +7,22 @@ FROM python:3.13-slim
7
  # 2. Set the working directory inside the container.
8
  WORKDIR /code
9
 
10
- # 3. Copy the requirements file into the working directory.
 
 
 
 
 
11
  COPY ./requirements.txt /code/requirements.txt
12
 
13
- # 4. Install the Python dependencies specified in the requirements file.
14
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
15
 
16
- # 5. Copy the rest of your application's code (e.g., app.py) into the working directory.
17
  COPY ./ /code/
18
 
19
- # 6. Expose the port that the application will run on.
20
- # Hugging Face Spaces typically use port 7860.
21
  EXPOSE 7860
22
 
23
- # 7. Define the command to run the application using Gunicorn.
24
- # Gunicorn is a robust production web server for Python.
25
- CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
 
7
  # 2. Set the working directory inside the container.
8
  WORKDIR /code
9
 
10
+ # --- FIX ---
11
+ # 3. Set the HF_HOME environment variable to a writable directory.
12
+ # This prevents the "Permission denied: '/.cache'" error.
13
+ ENV HF_HOME="/tmp/.cache/huggingface"
14
+
15
+ # 4. Copy the requirements file into the working directory.
16
  COPY ./requirements.txt /code/requirements.txt
17
 
18
+ # 5. Install the Python dependencies specified in the requirements file.
19
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
20
 
21
+ # 6. Copy the rest of your application's code (e.g., app.py) into the working directory.
22
  COPY ./ /code/
23
 
24
+ # 7. Expose the port that the application will run on.
 
25
  EXPOSE 7860
26
 
27
+ # 8. Define the command to run the application using Gunicorn.
28
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
 
app.py CHANGED
@@ -1,43 +1,34 @@
1
  # app.py
2
  # A Flask API to be hosted on a Hugging Face Space.
3
- # This API receives data, converts it to Parquet format, and uploads it to specified HF datasets.
4
- # This version includes CORS support to be called directly from a frontend application.
5
 
6
  import os
7
  import pandas as pd
8
  import io
9
  from flask import Flask, request, jsonify
10
- from flask_cors import CORS # Import CORS
11
- from huggingface_hub import HfApi, HfFolder
12
  from datetime import datetime
13
 
14
  # --- Initialization ---
15
  app = Flask(__name__)
16
-
17
- # --- Enable CORS ---
18
- # This allows the API to accept requests from different domains (i.e., your frontend).
19
- # For production, you might want to restrict origins for better security.
20
- # e.g., CORS(app, origins=["https://your-frontend-domain.com"])
21
  CORS(app)
22
 
23
  # --- Configuration ---
24
- # It's crucial to get these values from the Hugging Face Space's secrets for security.
25
- # DO NOT hardcode them here.
26
  try:
27
  HF_TOKEN = os.environ["HF_TOKEN"]
28
- AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"] # E.g., "YourUsername/audio-clips-dataset"
29
- TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"] # E.g., "YourUsername/transcription-summaries-dataset"
30
  except KeyError as e:
31
  print(f"FATAL ERROR: Missing secret environment variable: {e}")
32
- # In a real scenario, the app should fail fast if config is missing.
33
  HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
34
 
35
-
36
  # --- Hugging Face API Client ---
37
- # Authenticate and initialize the Hub API client.
 
38
  if HF_TOKEN:
39
- HfFolder.save_token(HF_TOKEN)
40
- api = HfApi()
41
  else:
42
  api = None
43
  print("Warning: HfApi not initialized because HF_TOKEN is not set.")
@@ -49,7 +40,7 @@ def get_unique_filename():
49
  return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
50
 
51
 
52
- # --- API Endpoints ---
53
 
54
  @app.route('/')
55
  def index():
@@ -60,26 +51,17 @@ def index():
60
  def add_audio_data():
61
  """
62
  Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
63
- Expected JSON payload: { "audio": "<base64_string>", "transcription": "<string>" }
64
  """
65
  if not api:
66
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
67
-
68
  try:
69
- # 1. Get and validate the JSON data from the request
70
  data = request.get_json()
71
  if not data or 'audio' not in data or 'transcription' not in data:
72
  return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
73
-
74
- # 2. Convert the data into a Pandas DataFrame
75
  df = pd.DataFrame([data])
76
-
77
- # 3. Convert the DataFrame to a Parquet file in an in-memory buffer
78
  buffer = io.BytesIO()
79
  df.to_parquet(buffer, index=False, engine='pyarrow')
80
- buffer.seek(0) # Rewind the buffer to the beginning
81
-
82
- # 4. Upload the in-memory file to the Hugging Face Hub dataset repository
83
  api.upload_file(
84
  path_or_fileobj=buffer,
85
  path_in_repo=get_unique_filename(),
@@ -87,9 +69,7 @@ def add_audio_data():
87
  repo_type="dataset",
88
  commit_message="Add new audio-transcription pair"
89
  )
90
-
91
  return jsonify({"message": "Audio data added successfully."}), 201
92
-
93
  except Exception as e:
94
  print(f"Error in /add-audio: {e}")
95
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
@@ -99,26 +79,17 @@ def add_audio_data():
99
  def add_text_data():
100
  """
101
  Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
102
- Expected JSON payload: { "transcription": "<string>", "summary": "<string>" }
103
  """
104
  if not api:
105
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
106
-
107
  try:
108
- # 1. Get and validate the JSON data
109
  data = request.get_json()
110
  if not data or 'transcription' not in data or 'summary' not in data:
111
  return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
112
-
113
- # 2. Convert to DataFrame
114
  df = pd.DataFrame([data])
115
-
116
- # 3. Convert to in-memory Parquet file
117
  buffer = io.BytesIO()
118
  df.to_parquet(buffer, index=False, engine='pyarrow')
119
  buffer.seek(0)
120
-
121
- # 4. Upload the file to the Hub
122
  api.upload_file(
123
  path_or_fileobj=buffer,
124
  path_in_repo=get_unique_filename(),
@@ -126,14 +97,11 @@ def add_text_data():
126
  repo_type="dataset",
127
  commit_message="Add new transcription-summary pair"
128
  )
129
-
130
  return jsonify({"message": "Text data added successfully."}), 201
131
-
132
  except Exception as e:
133
  print(f"Error in /add-text: {e}")
134
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
135
 
136
- # To run on Hugging Face Spaces, the app needs to be runnable.
137
- # The standard port for Spaces is 7860.
138
  if __name__ == '__main__':
139
- app.run(host='0.0.0.0', port=7860)
 
1
  # app.py
2
  # A Flask API to be hosted on a Hugging Face Space.
3
+ # This version is fixed to avoid the /.cache permission error.
 
4
 
5
  import os
6
  import pandas as pd
7
  import io
8
  from flask import Flask, request, jsonify
9
+ from flask_cors import CORS
10
+ from huggingface_hub import HfApi
11
  from datetime import datetime
12
 
13
  # --- Initialization ---
14
  app = Flask(__name__)
 
 
 
 
 
15
  CORS(app)
16
 
17
  # --- Configuration ---
18
+ # Get credentials and config from the Space's secrets.
 
19
  try:
20
  HF_TOKEN = os.environ["HF_TOKEN"]
21
+ AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"]
22
+ TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]
23
  except KeyError as e:
24
  print(f"FATAL ERROR: Missing secret environment variable: {e}")
 
25
  HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
26
 
 
27
  # --- Hugging Face API Client ---
28
+ # The HfApi client will automatically use the HF_TOKEN from the environment variables.
29
+ # No need for HfFolder.save_token().
30
  if HF_TOKEN:
31
+ api = HfApi(token=HF_TOKEN)
 
32
  else:
33
  api = None
34
  print("Warning: HfApi not initialized because HF_TOKEN is not set.")
 
40
  return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
41
 
42
 
43
+ # --- API Endpoints (No changes needed here) ---
44
 
45
  @app.route('/')
46
  def index():
 
51
  def add_audio_data():
52
  """
53
  Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
 
54
  """
55
  if not api:
56
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
 
57
  try:
 
58
  data = request.get_json()
59
  if not data or 'audio' not in data or 'transcription' not in data:
60
  return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
 
 
61
  df = pd.DataFrame([data])
 
 
62
  buffer = io.BytesIO()
63
  df.to_parquet(buffer, index=False, engine='pyarrow')
64
+ buffer.seek(0)
 
 
65
  api.upload_file(
66
  path_or_fileobj=buffer,
67
  path_in_repo=get_unique_filename(),
 
69
  repo_type="dataset",
70
  commit_message="Add new audio-transcription pair"
71
  )
 
72
  return jsonify({"message": "Audio data added successfully."}), 201
 
73
  except Exception as e:
74
  print(f"Error in /add-audio: {e}")
75
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
 
79
  def add_text_data():
80
  """
81
  Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
 
82
  """
83
  if not api:
84
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
 
85
  try:
 
86
  data = request.get_json()
87
  if not data or 'transcription' not in data or 'summary' not in data:
88
  return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
 
 
89
  df = pd.DataFrame([data])
 
 
90
  buffer = io.BytesIO()
91
  df.to_parquet(buffer, index=False, engine='pyarrow')
92
  buffer.seek(0)
 
 
93
  api.upload_file(
94
  path_or_fileobj=buffer,
95
  path_in_repo=get_unique_filename(),
 
97
  repo_type="dataset",
98
  commit_message="Add new transcription-summary pair"
99
  )
 
100
  return jsonify({"message": "Text data added successfully."}), 201
 
101
  except Exception as e:
102
  print(f"Error in /add-text: {e}")
103
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
104
 
105
+ # To run on Hugging Face Spaces
 
106
  if __name__ == '__main__':
107
+ app.run(host='0.0.0.0', port=7860)