Spaces:
Runtime error
Runtime error
liuyang
commited on
Commit
·
731f4bf
1
Parent(s):
5dddf57
upload data
Browse files
app.py
CHANGED
|
@@ -61,19 +61,21 @@ S3_SECRET_KEY = os.getenv("S3_SECRET_KEY")
|
|
| 61 |
|
| 62 |
|
| 63 |
# Function to upload file to Cloudflare R2
|
| 64 |
-
def
|
| 65 |
"""
|
| 66 |
-
Upload
|
| 67 |
|
| 68 |
-
:param
|
| 69 |
:param bucket_name: Name of the R2 bucket.
|
| 70 |
:param object_name: Name of the object to save in the bucket.
|
| 71 |
-
:param
|
| 72 |
-
:
|
| 73 |
-
:param r2_account_id: Cloudflare R2 account ID.
|
| 74 |
-
:return: True if file was uploaded, else False.
|
| 75 |
"""
|
| 76 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# Initialize a session using Cloudflare R2 credentials
|
| 78 |
session = boto3.session.Session()
|
| 79 |
s3 = session.client('s3',
|
|
@@ -81,21 +83,23 @@ def upload_to_r2(file_path, bucket_name, object_name):
|
|
| 81 |
aws_access_key_id=S3_ACCESS_KEY,
|
| 82 |
aws_secret_access_key=S3_SECRET_KEY,
|
| 83 |
config = Config(s3={"addressing_style": "virtual"}, signature_version='s3v4'),
|
| 84 |
-
|
| 85 |
)
|
| 86 |
|
| 87 |
-
# Upload the
|
| 88 |
-
s3.
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
return True
|
| 91 |
-
except FileNotFoundError:
|
| 92 |
-
print(f"The file {file_path} was not found")
|
| 93 |
-
return False
|
| 94 |
except NoCredentialsError:
|
| 95 |
print("Credentials not available")
|
| 96 |
return False
|
| 97 |
except ClientError as e:
|
| 98 |
-
print(f"Failed to upload
|
| 99 |
return False
|
| 100 |
except Exception as e:
|
| 101 |
print(f"An unexpected error occurred: {e}")
|
|
@@ -199,7 +203,7 @@ def prepare_and_save_audio_for_model(task: dict, out_dir: str) -> dict:
|
|
| 199 |
"""
|
| 200 |
1) Decode chunk to mono 16k PCM.
|
| 201 |
2) Run VAD to locate head/tail silence.
|
| 202 |
-
3) Trim only if head or tail
|
| 203 |
4) Save the (possibly trimmed) WAV to local file.
|
| 204 |
5) Return timing metadata, including 'trimmed_start_ms' to preserve global timestamps.
|
| 205 |
"""
|
|
@@ -728,7 +732,7 @@ class WhisperTranscriber:
|
|
| 728 |
job_id = pre_meta["job_id"]
|
| 729 |
task_id = pre_meta["chunk_idx"]
|
| 730 |
filekey = f"ai-transcribe/split/{job_id}-{task_id}.json"
|
| 731 |
-
ret =
|
| 732 |
if ret:
|
| 733 |
return {"filekey": filekey}
|
| 734 |
else:
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
# Function to upload file to Cloudflare R2
|
| 64 |
+
def upload_data_to_r2(data, bucket_name, object_name, content_type='application/octet-stream'):
|
| 65 |
"""
|
| 66 |
+
Upload data directly to a Cloudflare R2 bucket.
|
| 67 |
|
| 68 |
+
:param data: Data to upload (bytes or string).
|
| 69 |
:param bucket_name: Name of the R2 bucket.
|
| 70 |
:param object_name: Name of the object to save in the bucket.
|
| 71 |
+
:param content_type: MIME type of the data.
|
| 72 |
+
:return: True if data was uploaded, else False.
|
|
|
|
|
|
|
| 73 |
"""
|
| 74 |
try:
|
| 75 |
+
# Convert string to bytes if necessary
|
| 76 |
+
if isinstance(data, str):
|
| 77 |
+
data = data.encode('utf-8')
|
| 78 |
+
|
| 79 |
# Initialize a session using Cloudflare R2 credentials
|
| 80 |
session = boto3.session.Session()
|
| 81 |
s3 = session.client('s3',
|
|
|
|
| 83 |
aws_access_key_id=S3_ACCESS_KEY,
|
| 84 |
aws_secret_access_key=S3_SECRET_KEY,
|
| 85 |
config = Config(s3={"addressing_style": "virtual"}, signature_version='s3v4'),
|
| 86 |
+
region_name = 'auto'
|
| 87 |
)
|
| 88 |
|
| 89 |
+
# Upload the data to R2 bucket
|
| 90 |
+
s3.put_object(
|
| 91 |
+
Bucket=bucket_name,
|
| 92 |
+
Key=object_name,
|
| 93 |
+
Body=data,
|
| 94 |
+
ContentType=content_type
|
| 95 |
+
)
|
| 96 |
+
print(f"Data uploaded to R2 bucket '{bucket_name}' as '{object_name}'")
|
| 97 |
return True
|
|
|
|
|
|
|
|
|
|
| 98 |
except NoCredentialsError:
|
| 99 |
print("Credentials not available")
|
| 100 |
return False
|
| 101 |
except ClientError as e:
|
| 102 |
+
print(f"Failed to upload data to R2 bucket: {e}")
|
| 103 |
return False
|
| 104 |
except Exception as e:
|
| 105 |
print(f"An unexpected error occurred: {e}")
|
|
|
|
| 203 |
"""
|
| 204 |
1) Decode chunk to mono 16k PCM.
|
| 205 |
2) Run VAD to locate head/tail silence.
|
| 206 |
+
3) Trim only if head or tail >= 10s.
|
| 207 |
4) Save the (possibly trimmed) WAV to local file.
|
| 208 |
5) Return timing metadata, including 'trimmed_start_ms' to preserve global timestamps.
|
| 209 |
"""
|
|
|
|
| 732 |
job_id = pre_meta["job_id"]
|
| 733 |
task_id = pre_meta["chunk_idx"]
|
| 734 |
filekey = f"ai-transcribe/split/{job_id}-{task_id}.json"
|
| 735 |
+
ret = upload_data_to_r2(json.dumps(result), "intermediate", filekey)
|
| 736 |
if ret:
|
| 737 |
return {"filekey": filekey}
|
| 738 |
else:
|