liuyang commited on
Commit
731f4bf
·
1 Parent(s): 5dddf57

upload data

Browse files
Files changed (1) hide show
  1. app.py +21 -17
app.py CHANGED
@@ -61,19 +61,21 @@ S3_SECRET_KEY = os.getenv("S3_SECRET_KEY")
61
 
62
 
63
  # Function to upload file to Cloudflare R2
64
- def upload_to_r2(file_path, bucket_name, object_name):
65
  """
66
- Upload a file to a Cloudflare R2 bucket.
67
 
68
- :param file_path: Path to the file to upload.
69
  :param bucket_name: Name of the R2 bucket.
70
  :param object_name: Name of the object to save in the bucket.
71
- :param r2_access_key: Cloudflare R2 access key.
72
- :param r2_secret_key: Cloudflare R2 secret key.
73
- :param r2_account_id: Cloudflare R2 account ID.
74
- :return: True if file was uploaded, else False.
75
  """
76
  try:
 
 
 
 
77
  # Initialize a session using Cloudflare R2 credentials
78
  session = boto3.session.Session()
79
  s3 = session.client('s3',
@@ -81,21 +83,23 @@ def upload_to_r2(file_path, bucket_name, object_name):
81
  aws_access_key_id=S3_ACCESS_KEY,
82
  aws_secret_access_key=S3_SECRET_KEY,
83
  config = Config(s3={"addressing_style": "virtual"}, signature_version='s3v4'),
84
- #region_name = 'auto'
85
  )
86
 
87
- # Upload the file to R2 bucket
88
- s3.upload_file(file_path, bucket_name, object_name)
89
- print(f"File '{file_path}' uploaded to R2 bucket '{bucket_name}' as '{object_name}'")
 
 
 
 
 
90
  return True
91
- except FileNotFoundError:
92
- print(f"The file {file_path} was not found")
93
- return False
94
  except NoCredentialsError:
95
  print("Credentials not available")
96
  return False
97
  except ClientError as e:
98
- print(f"Failed to upload file to R2 bucket: {e}")
99
  return False
100
  except Exception as e:
101
  print(f"An unexpected error occurred: {e}")
@@ -199,7 +203,7 @@ def prepare_and_save_audio_for_model(task: dict, out_dir: str) -> dict:
199
  """
200
  1) Decode chunk to mono 16k PCM.
201
  2) Run VAD to locate head/tail silence.
202
- 3) Trim only if head or tail silence >= 10s.
203
  4) Save the (possibly trimmed) WAV to local file.
204
  5) Return timing metadata, including 'trimmed_start_ms' to preserve global timestamps.
205
  """
@@ -728,7 +732,7 @@ class WhisperTranscriber:
728
  job_id = pre_meta["job_id"]
729
  task_id = pre_meta["chunk_idx"]
730
  filekey = f"ai-transcribe/split/{job_id}-{task_id}.json"
731
- ret = upload_to_r2(json.dumps(result), "intermediate", filekey)
732
  if ret:
733
  return {"filekey": filekey}
734
  else:
 
61
 
62
 
63
  # Function to upload file to Cloudflare R2
64
+ def upload_data_to_r2(data, bucket_name, object_name, content_type='application/octet-stream'):
65
  """
66
+ Upload data directly to a Cloudflare R2 bucket.
67
 
68
+ :param data: Data to upload (bytes or string).
69
  :param bucket_name: Name of the R2 bucket.
70
  :param object_name: Name of the object to save in the bucket.
71
+ :param content_type: MIME type of the data.
72
+ :return: True if data was uploaded, else False.
 
 
73
  """
74
  try:
75
+ # Convert string to bytes if necessary
76
+ if isinstance(data, str):
77
+ data = data.encode('utf-8')
78
+
79
  # Initialize a session using Cloudflare R2 credentials
80
  session = boto3.session.Session()
81
  s3 = session.client('s3',
 
83
  aws_access_key_id=S3_ACCESS_KEY,
84
  aws_secret_access_key=S3_SECRET_KEY,
85
  config = Config(s3={"addressing_style": "virtual"}, signature_version='s3v4'),
86
+ region_name = 'auto'
87
  )
88
 
89
+ # Upload the data to R2 bucket
90
+ s3.put_object(
91
+ Bucket=bucket_name,
92
+ Key=object_name,
93
+ Body=data,
94
+ ContentType=content_type
95
+ )
96
+ print(f"Data uploaded to R2 bucket '{bucket_name}' as '{object_name}'")
97
  return True
 
 
 
98
  except NoCredentialsError:
99
  print("Credentials not available")
100
  return False
101
  except ClientError as e:
102
+ print(f"Failed to upload data to R2 bucket: {e}")
103
  return False
104
  except Exception as e:
105
  print(f"An unexpected error occurred: {e}")
 
203
  """
204
  1) Decode chunk to mono 16k PCM.
205
  2) Run VAD to locate head/tail silence.
206
+ 3) Trim only if head or tail >= 10s.
207
  4) Save the (possibly trimmed) WAV to local file.
208
  5) Return timing metadata, including 'trimmed_start_ms' to preserve global timestamps.
209
  """
 
732
  job_id = pre_meta["job_id"]
733
  task_id = pre_meta["chunk_idx"]
734
  filekey = f"ai-transcribe/split/{job_id}-{task_id}.json"
735
+ ret = upload_data_to_r2(json.dumps(result), "intermediate", filekey)
736
  if ret:
737
  return {"filekey": filekey}
738
  else: