app2 / github_storage.py
Dooratre's picture
Upload 318 files
c37398c verified
"""
GitHub-based persistent storage.
Updated: gevent-safe locks, zero threading imports.
"""
import os
import json
import base64
import gevent
from gevent.lock import RLock
from datetime import datetime
try:
from http_pool import gpt_session as _http_session
except ImportError:
import requests as _http_session
class GitHubStorage:
"""GitHub API storage backend with gevent-safe locks."""
_instance = None
_init_lock = RLock()
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "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")
GITHUB_REPO = os.environ.get("GITHUB_REPO", "serversclass-dev/db")
GITHUB_BRANCH = os.environ.get("GITHUB_BRANCH", "main")
GITHUB_DATA_DIR = os.environ.get("GITHUB_DATA_DIR", "db")
GITHUB_API_BASE = "https://api.github.com"
DB_FILES = {
'users': 'users.json',
'telegram': 'users_db.json',
'cards': 'cards.json',
'chat_history': 'chat_history_db.json',
}
@classmethod
def get_instance(cls):
if cls._instance is None:
with cls._init_lock:
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
self._lock = RLock()
self._file_shas = {}
self._configured = bool(
self.GITHUB_TOKEN
and len(self.GITHUB_TOKEN) > 10
and self.GITHUB_REPO
and "/" in self.GITHUB_REPO
)
if self._configured:
print(f" ✅ GitHubStorage initialized")
print(f" Repo: {self.GITHUB_REPO}")
print(f" Branch: {self.GITHUB_BRANCH}")
print(f" Data dir: {self.GITHUB_DATA_DIR}/")
else:
print(f" ⚠️ GitHubStorage NOT configured")
def _headers(self):
return {
"Authorization": f"token {self.GITHUB_TOKEN}",
"Accept": "application/vnd.github.v3+json",
"Content-Type": "application/json",
}
def _file_url(self, filename):
path = f"{self.GITHUB_DATA_DIR}/{filename}" if self.GITHUB_DATA_DIR else filename
return (
f"{self.GITHUB_API_BASE}/repos/{self.GITHUB_REPO}"
f"/contents/{path}?ref={self.GITHUB_BRANCH}"
)
def _check_configured(self):
if not self._configured:
return False, "GitHubStorage not configured."
return True, None
def pull_file(self, filename):
ok, err = self._check_configured()
if not ok:
print(f" ❌ {err}")
return {}
url = self._file_url(filename)
try:
response = _http_session.get(url, headers=self._headers(), timeout=30)
if response.status_code == 404:
print(f" ℹ️ {filename} not on GitHub yet")
return {}
if response.status_code == 403:
remaining = response.headers.get('X-RateLimit-Remaining', '?')
print(f" ❌ GitHub API rate limited. Remaining: {remaining}")
return {}
response.raise_for_status()
data = response.json()
with self._lock:
self._file_shas[filename] = data.get('sha', '')
content_b64 = data.get('content', '')
if not content_b64:
return {}
content_bytes = base64.b64decode(content_b64)
content_str = content_bytes.decode('utf-8')
parsed = json.loads(content_str)
if not isinstance(parsed, dict):
return {}
record_count = len(parsed)
print(f" ✅ Pulled {filename} from GitHub ({record_count} records)")
return parsed
except json.JSONDecodeError as e:
print(f" ❌ {filename} invalid JSON: {e}")
return {}
except Exception as e:
print(f" ❌ Error pulling {filename}: {e}")
return {}
def pull_all(self):
ok, err = self._check_configured()
if not ok:
return {s: {} for s in self.DB_FILES}, err
print(f"\n 📥 Pulling all databases from GitHub...")
results = {}
for store_name, filename in self.DB_FILES.items():
data = self.pull_file(filename)
results[store_name] = data if data else {}
total_records = sum(len(v) for v in results.values())
if total_records == 0:
print(f" ℹ️ All databases empty (first run)")
else:
print(f" ✅ Pull complete: {total_records} total records")
return results, None
def push_file(self, filename, data_dict, message=None):
ok, err = self._check_configured()
if not ok:
return False, err
url = self._file_url(filename)
if message is None:
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
record_count = len(data_dict) if isinstance(data_dict, dict) else 0
message = f"Backup {filename} - {record_count} records - {timestamp}"
content_str = json.dumps(data_dict, indent=2, ensure_ascii=False)
content_b64 = base64.b64encode(content_str.encode('utf-8')).decode('utf-8')
payload = {
"message": message,
"content": content_b64,
"branch": self.GITHUB_BRANCH,
}
with self._lock:
current_sha = self._file_shas.get(filename)
if not current_sha:
try:
check_resp = _http_session.get(self._file_url(filename), headers=self._headers(), timeout=15)
if check_resp.status_code == 200:
current_sha = check_resp.json().get('sha', '')
with self._lock:
self._file_shas[filename] = current_sha
elif check_resp.status_code == 404:
current_sha = None
print(f" 📝 {filename} will be created on GitHub")
except Exception:
current_sha = None
if current_sha:
payload["sha"] = current_sha
try:
response = _http_session.put(url, headers=self._headers(), json=payload, timeout=30)
if response.status_code == 409:
print(f" ⚠️ SHA conflict for {filename}, retrying...")
try:
check_resp = _http_session.get(
self._file_url(filename), headers=self._headers(), timeout=15
)
if check_resp.status_code == 200:
fresh_sha = check_resp.json().get('sha', '')
payload["sha"] = fresh_sha
response = _http_session.put(
url, headers=self._headers(), json=payload, timeout=30
)
elif check_resp.status_code == 404:
if "sha" in payload:
del payload["sha"]
response = _http_session.put(
url, headers=self._headers(), json=payload, timeout=30
)
except Exception as retry_err:
return False, f"Retry failed: {retry_err}"
if response.status_code == 422:
if "sha" in payload:
del payload["sha"]
response = _http_session.put(
url, headers=self._headers(), json=payload, timeout=30
)
if response.status_code == 403:
remaining = response.headers.get('X-RateLimit-Remaining', '?')
return False, f"GitHub API rate limited. Remaining: {remaining}"
if response.status_code in [200, 201]:
resp_data = response.json()
new_sha = resp_data.get('content', {}).get('sha', '')
if new_sha:
with self._lock:
self._file_shas[filename] = new_sha
action = "Created" if response.status_code == 201 else "Updated"
record_count = len(data_dict) if isinstance(data_dict, dict) else 0
print(f" ✅ {action} {filename} on GitHub ({record_count} records)")
return True, None
else:
err_text = ""
try:
err_text = response.json().get('message', response.text[:200])
except Exception:
err_text = response.text[:200]
return False, f"GitHub API error {response.status_code}: {err_text}"
except Exception as e:
return False, f"Error pushing {filename}: {e}"
def push_all(self, data_dict_map=None):
ok, err = self._check_configured()
if not ok:
return False, [err]
if data_dict_map is None:
try:
from memory_db import get_db
db = get_db()
data_dict_map = {}
for store_name in db.STORES:
data_dict_map[store_name] = db.read(store_name)
except Exception as e:
return False, [f"Failed to read from MemoryDB: {e}"]
print(f"\n 📤 Pushing all databases to GitHub...")
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
errors = []
success_count = 0
for store_name, filename in self.DB_FILES.items():
data = data_dict_map.get(store_name, {})
record_count = len(data) if isinstance(data, dict) else 0
message = f"Backup {filename} - {record_count} records - {timestamp}"
success, error = self.push_file(filename, data, message=message)
if success:
success_count += 1
else:
errors.append(f"{filename}: {error}")
total = len(self.DB_FILES)
print(f" {'✅' if not errors else '⚠️'} Push complete: {success_count}/{total} files")
if errors:
for e in errors:
print(f" ❌ {e}")
return len(errors) == 0, errors
def get_status(self):
ok, err = self._check_configured()
if not ok:
return {"configured": False, "error": err}
status = {
"configured": True,
"repo": self.GITHUB_REPO,
"branch": self.GITHUB_BRANCH,
"data_dir": self.GITHUB_DATA_DIR,
"files": {},
"rate_limit": None,
}
try:
resp = _http_session.get(
f"{self.GITHUB_API_BASE}/rate_limit",
headers=self._headers(),
timeout=10
)
if resp.status_code == 200:
rl = resp.json().get('resources', {}).get('core', {})
status["rate_limit"] = {
"limit": rl.get('limit', 0),
"remaining": rl.get('remaining', 0),
"reset_at": datetime.fromtimestamp(
rl.get('reset', 0)
).isoformat() if rl.get('reset') else None,
"used": rl.get('used', 0),
}
except Exception as e:
status["rate_limit"] = {"error": str(e)}
for store_name, filename in self.DB_FILES.items():
with self._lock:
has_sha = filename in self._file_shas
status["files"][store_name] = {
"filename": filename,
"has_sha": has_sha,
}
return status
def get_github_storage():
return GitHubStorage.get_instance()