eshan6704 commited on
Commit
f04bb3f
Β·
verified Β·
1 Parent(s): 4d62e6d

Create backblaze.py

Browse files
Files changed (1) hide show
  1. backblaze.py +100 -0
backblaze.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ from io import BytesIO
3
+ import pandas as pd
4
+ import json
5
+ import os
6
+
7
+ # ===========================
8
+ # Backblaze B2 Client Setup
9
+ # ===========================
10
+ S3_ENDPOINT = "https://s3.us-east-005.backblazeb2.com"
11
+ AWS_KEY_ID = "005239ca03b31af0000000001"
12
+ AWS_SECRET_KEY = "K005uGFZkrtYa4Hg1GliFUQohs/BTk4"
13
+
14
+ s3 = boto3.client(
15
+ "s3",
16
+ endpoint_url=S3_ENDPOINT,
17
+ aws_access_key_id=AWS_KEY_ID,
18
+ aws_secret_access_key=AWS_SECRET_KEY,
19
+ )
20
+
21
+
22
+ # ===========================
23
+ # Helper to get extension
24
+ # ===========================
25
+ def get_ext(file_name):
26
+ return os.path.splitext(file_name)[1].lower().replace(".", "")
27
+
28
+
29
+ # ===========================
30
+ # Upload any file
31
+ # ===========================
32
+ def upload_file(bucket_name, file_name, file_content):
33
+ """
34
+ Upload any file to Backblaze B2.
35
+ Auto-detect type from file_name extension.
36
+ - str β†’ txt
37
+ - dict β†’ json
38
+ - pd.DataFrame β†’ csv or excel
39
+ - bytes β†’ raw files (pdf, png, etc.)
40
+ """
41
+ ext = get_ext(file_name)
42
+
43
+ if isinstance(file_content, pd.DataFrame):
44
+ buffer = BytesIO()
45
+ if ext in ["csv"]:
46
+ file_content.to_csv(buffer, index=False)
47
+ elif ext in ["xlsx", "xls"]:
48
+ file_content.to_excel(buffer, index=False)
49
+ else:
50
+ raise ValueError(f"Unsupported dataframe extension: {ext}")
51
+ buffer.seek(0)
52
+ s3.put_object(Bucket=bucket_name, Key=file_name, Body=buffer.getvalue())
53
+ return
54
+
55
+ if isinstance(file_content, dict) and ext == "json":
56
+ file_content = json.dumps(file_content)
57
+
58
+ if isinstance(file_content, str) and ext in ["txt", "csv", "json", "html"]:
59
+ file_content = file_content.encode("utf-8")
60
+
61
+ if isinstance(file_content, bytes):
62
+ s3.put_object(Bucket=bucket_name, Key=file_name, Body=file_content)
63
+ return
64
+
65
+ # fallback for str
66
+ s3.put_object(Bucket=bucket_name, Key=file_name, Body=file_content)
67
+
68
+
69
+ # ===========================
70
+ # Read any file
71
+ # ===========================
72
+ def read_file(bucket_name, file_name):
73
+ """
74
+ Read a file from B2.
75
+ Auto-detect type from file_name extension.
76
+ Returns:
77
+ - str for txt, html, csv (or you can parse csv to DataFrame)
78
+ - dict for json
79
+ - bytes for pdf, images, etc.
80
+ """
81
+ ext = get_ext(file_name)
82
+ try:
83
+ obj = s3.get_object(Bucket=bucket_name, Key=file_name)
84
+ data = obj['Body'].read()
85
+
86
+ if ext in ["txt", "html"]:
87
+ return data.decode("utf-8")
88
+ elif ext == "csv":
89
+ return pd.read_csv(BytesIO(data))
90
+ elif ext in ["xlsx", "xls"]:
91
+ return pd.read_excel(BytesIO(data))
92
+ elif ext == "json":
93
+ return json.loads(data)
94
+ else:
95
+ return data # raw bytes for pdf, png, images, etc.
96
+ except s3.exceptions.NoSuchKey:
97
+ return None
98
+ except Exception as e:
99
+ print(f"Error reading {file_name} from B2: {e}")
100
+ return None