DevClivora commited on
Commit
8c12eef
·
verified ·
1 Parent(s): 28252e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -8
app.py CHANGED
@@ -1,6 +1,5 @@
1
  # app.py
2
- # A Flask API to be hosted on a Hugging Face Space.
3
- # This version is fixed to avoid the /.cache permission error.
4
 
5
  import os
6
  import pandas as pd
@@ -11,24 +10,26 @@ from huggingface_hub import HfApi
11
  from datetime import datetime
12
 
13
  # --- Initialization ---
 
14
  app = Flask(__name__)
15
  CORS(app)
16
 
17
  # --- Configuration ---
18
- # Get credentials and config from the Space's secrets.
19
  try:
20
  HF_TOKEN = os.environ["HF_TOKEN"]
21
  AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"]
22
  TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]
 
23
  except KeyError as e:
24
  print(f"FATAL ERROR: Missing secret environment variable: {e}")
25
  HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
26
 
27
  # --- Hugging Face API Client ---
28
- # The HfApi client will automatically use the HF_TOKEN from the environment variables.
29
- # No need for HfFolder.save_token().
30
  if HF_TOKEN:
 
31
  api = HfApi(token=HF_TOKEN)
 
32
  else:
33
  api = None
34
  print("Warning: HfApi not initialized because HF_TOKEN is not set.")
@@ -40,11 +41,12 @@ def get_unique_filename():
40
  return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
41
 
42
 
43
- # --- API Endpoints (No changes needed here) ---
44
 
45
  @app.route('/')
46
  def index():
47
  """A simple index route to confirm the API is running."""
 
48
  return "Hugging Face Data Uploader API is running."
49
 
50
  @app.route('/add-audio', methods=['POST'])
@@ -52,16 +54,24 @@ def add_audio_data():
52
  """
53
  Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
54
  """
 
55
  if not api:
 
56
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
57
  try:
 
58
  data = request.get_json()
59
  if not data or 'audio' not in data or 'transcription' not in data:
 
60
  return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
 
 
61
  df = pd.DataFrame([data])
62
  buffer = io.BytesIO()
63
  df.to_parquet(buffer, index=False, engine='pyarrow')
64
  buffer.seek(0)
 
 
65
  api.upload_file(
66
  path_or_fileobj=buffer,
67
  path_in_repo=get_unique_filename(),
@@ -69,9 +79,11 @@ def add_audio_data():
69
  repo_type="dataset",
70
  commit_message="Add new audio-transcription pair"
71
  )
 
72
  return jsonify({"message": "Audio data added successfully."}), 201
 
73
  except Exception as e:
74
- print(f"Error in /add-audio: {e}")
75
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
76
 
77
 
@@ -80,16 +92,24 @@ def add_text_data():
80
  """
81
  Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
82
  """
 
83
  if not api:
 
84
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
85
  try:
 
86
  data = request.get_json()
87
  if not data or 'transcription' not in data or 'summary' not in data:
 
88
  return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
 
 
89
  df = pd.DataFrame([data])
90
  buffer = io.BytesIO()
91
  df.to_parquet(buffer, index=False, engine='pyarrow')
92
  buffer.seek(0)
 
 
93
  api.upload_file(
94
  path_or_fileobj=buffer,
95
  path_in_repo=get_unique_filename(),
@@ -97,11 +117,14 @@ def add_text_data():
97
  repo_type="dataset",
98
  commit_message="Add new transcription-summary pair"
99
  )
 
100
  return jsonify({"message": "Text data added successfully."}), 201
 
101
  except Exception as e:
102
- print(f"Error in /add-text: {e}")
103
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
104
 
105
  # To run on Hugging Face Spaces
106
  if __name__ == '__main__':
 
107
  app.run(host='0.0.0.0', port=7860)
 
1
  # app.py
2
+ # A Flask API with extra print statements for debugging on Hugging Face Spaces.
 
3
 
4
  import os
5
  import pandas as pd
 
10
  from datetime import datetime
11
 
12
  # --- Initialization ---
13
+ print("--- Flask app.py is starting up! ---")
14
  app = Flask(__name__)
15
  CORS(app)
16
 
17
  # --- Configuration ---
18
+ print("Loading environment variables...")
19
  try:
20
  HF_TOKEN = os.environ["HF_TOKEN"]
21
  AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"]
22
  TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"]
23
+ print("Successfully loaded all required environment variables.")
24
  except KeyError as e:
25
  print(f"FATAL ERROR: Missing secret environment variable: {e}")
26
  HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None
27
 
28
  # --- Hugging Face API Client ---
 
 
29
  if HF_TOKEN:
30
+ print("Initializing HfApi client...")
31
  api = HfApi(token=HF_TOKEN)
32
+ print("HfApi client initialized.")
33
  else:
34
  api = None
35
  print("Warning: HfApi not initialized because HF_TOKEN is not set.")
 
41
  return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet"
42
 
43
 
44
+ # --- API Endpoints ---
45
 
46
  @app.route('/')
47
  def index():
48
  """A simple index route to confirm the API is running."""
49
+ print("Request received for / route.")
50
  return "Hugging Face Data Uploader API is running."
51
 
52
  @app.route('/add-audio', methods=['POST'])
 
54
  """
55
  Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset.
56
  """
57
+ print("Request received for /add-audio route.")
58
  if not api:
59
+ print("Error: API client not available.")
60
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
61
  try:
62
+ print("Attempting to process /add-audio data...")
63
  data = request.get_json()
64
  if not data or 'audio' not in data or 'transcription' not in data:
65
+ print("Invalid payload received for /add-audio.")
66
  return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400
67
+
68
+ print("Data validated. Creating DataFrame.")
69
  df = pd.DataFrame([data])
70
  buffer = io.BytesIO()
71
  df.to_parquet(buffer, index=False, engine='pyarrow')
72
  buffer.seek(0)
73
+
74
+ print(f"Uploading file to audio dataset: {AUDIO_DATASET_REPO_ID}")
75
  api.upload_file(
76
  path_or_fileobj=buffer,
77
  path_in_repo=get_unique_filename(),
 
79
  repo_type="dataset",
80
  commit_message="Add new audio-transcription pair"
81
  )
82
+ print("File successfully uploaded to audio dataset.")
83
  return jsonify({"message": "Audio data added successfully."}), 201
84
+
85
  except Exception as e:
86
+ print(f"---! UNEXPECTED ERROR in /add-audio !---: {e}")
87
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
88
 
89
 
 
92
  """
93
  Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset.
94
  """
95
+ print("Request received for /add-text route.")
96
  if not api:
97
+ print("Error: API client not available.")
98
  return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500
99
  try:
100
+ print("Attempting to process /add-text data...")
101
  data = request.get_json()
102
  if not data or 'transcription' not in data or 'summary' not in data:
103
+ print("Invalid payload received for /add-text.")
104
  return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400
105
+
106
+ print("Data validated. Creating DataFrame.")
107
  df = pd.DataFrame([data])
108
  buffer = io.BytesIO()
109
  df.to_parquet(buffer, index=False, engine='pyarrow')
110
  buffer.seek(0)
111
+
112
+ print(f"Uploading file to text dataset: {TEXT_DATASET_REPO_ID}")
113
  api.upload_file(
114
  path_or_fileobj=buffer,
115
  path_in_repo=get_unique_filename(),
 
117
  repo_type="dataset",
118
  commit_message="Add new transcription-summary pair"
119
  )
120
+ print("File successfully uploaded to text dataset.")
121
  return jsonify({"message": "Text data added successfully."}), 201
122
+
123
  except Exception as e:
124
+ print(f"---! UNEXPECTED ERROR in /add-text !---: {e}")
125
  return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500
126
 
127
  # To run on Hugging Face Spaces
128
  if __name__ == '__main__':
129
+ print("Starting Flask development server...")
130
  app.run(host='0.0.0.0', port=7860)