broadfield-dev commited on
Commit
794db80
·
verified ·
1 Parent(s): e0242b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -45
app.py CHANGED
@@ -6,64 +6,76 @@ from huggingface_hub import hf_hub_download, upload_file, HfApi
6
  app = Flask(__name__)
7
 
8
  # CONFIGURATION
9
- # Format: "username/dataset-name"
10
- DATASET_REPO_ = "YOUR_USERNAME/memvid-storage"
11
  FILENAME = "knowledge.mv2"
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
13
 
14
- # Global DB reference
15
  db = None
16
  DB_PATH = os.path.abspath(FILENAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def init_db():
19
  """
20
- 1. Try to download existing DB from HF Hub.
21
- 2. If not found, create a new one locally.
22
- 3. Load Memvid.
23
  """
24
- global db
25
 
26
- # 1. Try to sync from Hub
27
- if HF_TOKEN:
 
 
 
28
  api = HfApi(token=HF_TOKEN)
29
- username = api.whoami()['name']
30
- print(username)
31
- DATASET_REPO_ID = DATASET_REPO_.replace("YOUR_USERNAME", username)
32
- print(f"🔄 Attempting to download {FILENAME} from {DATASET_REPO_ID}...")
33
  try:
34
- # This downloads the file to the local cache and returns the path
35
- # We copy it or link it to our working dir if needed, but usually
36
- # we just want it in the current directory for Memvid to write to.
37
 
38
- # Check if file exists in repo first
39
-
40
- files = api.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
41
 
42
  if FILENAME in files:
43
  downloaded_path = hf_hub_download(
44
- repo_id=DATASET_REPO_ID,
45
  filename=FILENAME,
46
  repo_type="dataset",
47
  token=HF_TOKEN,
48
- local_dir=".", # Download to current directory
49
- local_dir_use_symlinks=False # We need the actual file to write to it
50
  )
51
  print(f"✅ Downloaded database to {downloaded_path}")
52
  else:
53
- print("⚠️ Database file not found in repo. Creating new one.")
 
54
  except Exception as e:
55
- print(f"⚠️ Could not download from Hub (might be first run): {e}")
56
 
57
- # 2. Open or Create Memvid
58
  try:
59
- # Memvid 2.0 pattern:
60
- # If file exists, open it. If not, create it.
61
- if os.path.exists(DB_PATH):
62
- db = Memvid.open(DB_PATH)
63
- print(f"📂 Memvid opened at {DB_PATH}")
64
- else:
65
- db = Memvid.create(DB_PATH)
66
- print(f"✨ New Memvid created at {DB_PATH}")
67
 
68
  except Exception as e:
69
  print(f"❌ CRITICAL ERROR initializing Memvid: {e}")
@@ -71,8 +83,10 @@ def init_db():
71
 
72
  def sync_to_hub():
73
  """Uploads the local .mv2 file back to Hugging Face"""
74
- if not HF_TOKEN:
75
- print("⚠️ No HF_TOKEN found. Skipping sync.")
 
 
76
  return
77
 
78
  try:
@@ -80,7 +94,7 @@ def sync_to_hub():
80
  upload_file(
81
  path_or_fileobj=DB_PATH,
82
  path_in_repo=FILENAME,
83
- repo_id=DATASET_REPO_ID,
84
  repo_type="dataset",
85
  token=HF_TOKEN,
86
  commit_message="Memvid: Auto-save memory update"
@@ -100,25 +114,20 @@ def index():
100
  def add_memory():
101
  global db
102
  if not db:
103
- # Try to re-init if it failed before
104
  init_db()
105
  if not db:
106
  return jsonify({"error": "Database could not be initialized. Check logs."}), 500
107
 
108
  content = request.form.get('content')
109
- tags = request.form.get('tags', '') # Not used in basic put, but good for expansion
110
-
111
  if not content:
112
  return jsonify({"error": "No content provided"}), 400
113
 
114
  try:
115
  # Add the memory
116
- # Note: Check if your SDK version requires explicit transaction/commit
117
- # Some versions use db.put(content), others db.add(content)
118
  db.put(content)
119
 
120
- # IMPORTANT: Force a commit/flush to disk before uploading
121
- # If the SDK has a .commit() or .flush(), call it here.
122
  if hasattr(db, 'commit'):
123
  db.commit()
124
 
@@ -142,12 +151,10 @@ def search_memory():
142
  # Search
143
  results = db.search(query, top_k=5)
144
 
145
- # Transform results based on SDK object structure
146
  formatted_results = []
147
  for hit in results:
148
  formatted_results.append({
149
  "text": hit.text,
150
- # "score": hit.score # Uncomment if available
151
  })
152
 
153
  return jsonify({"success": True, "results": formatted_results})
 
6
  app = Flask(__name__)
7
 
8
  # CONFIGURATION
 
 
9
  FILENAME = "knowledge.mv2"
10
  HF_TOKEN = os.environ.get("HF_TOKEN")
11
+ DATASET_NAME = "memvid-storage" # Just the name, we will append username dynamically
12
 
13
+ # Global variables
14
  db = None
15
  DB_PATH = os.path.abspath(FILENAME)
16
+ DATASET_REPO_ID = None # Will be set during initialization
17
+
18
+ def get_repo_id():
19
+ """Helper to dynamically resolve 'username/dataset_name'"""
20
+ global DATASET_REPO_ID
21
+ if DATASET_REPO_ID:
22
+ return DATASET_REPO_ID
23
+
24
+ if HF_TOKEN:
25
+ try:
26
+ api = HfApi(token=HF_TOKEN)
27
+ username = api.whoami()['name']
28
+ DATASET_REPO_ID = f"{username}/{DATASET_NAME}"
29
+ return DATASET_REPO_ID
30
+ except Exception as e:
31
+ print(f"⚠️ Error getting username: {e}")
32
+ return None
33
+ return None
34
 
35
  def init_db():
36
  """
37
+ 1. Ensure Dataset Exists.
38
+ 2. Try to download existing DB.
39
+ 3. Initialize Memvid.
40
  """
41
+ global db, DATASET_REPO_ID
42
 
43
+ repo_id = get_repo_id()
44
+
45
+ # 1. Sync / Setup Cloud Storage
46
+ if HF_TOKEN and repo_id:
47
+ print(f"🔄 Checking cloud storage at {repo_id}...")
48
  api = HfApi(token=HF_TOKEN)
49
+
 
 
 
50
  try:
51
+ # Create the repo if it doesn't exist (Fixes your 404 error)
52
+ api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
 
53
 
54
+ # Check for file existence
55
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
 
56
 
57
  if FILENAME in files:
58
  downloaded_path = hf_hub_download(
59
+ repo_id=repo_id,
60
  filename=FILENAME,
61
  repo_type="dataset",
62
  token=HF_TOKEN,
63
+ local_dir=".",
64
+ local_dir_use_symlinks=False
65
  )
66
  print(f"✅ Downloaded database to {downloaded_path}")
67
  else:
68
+ print("⚠️ Database file not found in repo. A new one will be created and synced.")
69
+
70
  except Exception as e:
71
+ print(f"⚠️ Cloud sync warning: {e}")
72
 
73
+ # 2. Initialize Memvid (Fixes 'no attribute create' error)
74
  try:
75
+ # In Python SDKs, the constructor usually handles Open OR Create.
76
+ # If the file exists, it opens it. If not, it creates it.
77
+ db = Memvid(DB_PATH)
78
+ print(f"✨ Memvid initialized at {DB_PATH}")
 
 
 
 
79
 
80
  except Exception as e:
81
  print(f"❌ CRITICAL ERROR initializing Memvid: {e}")
 
83
 
84
  def sync_to_hub():
85
  """Uploads the local .mv2 file back to Hugging Face"""
86
+ repo_id = get_repo_id()
87
+
88
+ if not HF_TOKEN or not repo_id:
89
+ print("⚠️ No HF_TOKEN or Repo ID found. Skipping sync.")
90
  return
91
 
92
  try:
 
94
  upload_file(
95
  path_or_fileobj=DB_PATH,
96
  path_in_repo=FILENAME,
97
+ repo_id=repo_id,
98
  repo_type="dataset",
99
  token=HF_TOKEN,
100
  commit_message="Memvid: Auto-save memory update"
 
114
  def add_memory():
115
  global db
116
  if not db:
 
117
  init_db()
118
  if not db:
119
  return jsonify({"error": "Database could not be initialized. Check logs."}), 500
120
 
121
  content = request.form.get('content')
122
+
 
123
  if not content:
124
  return jsonify({"error": "No content provided"}), 400
125
 
126
  try:
127
  # Add the memory
 
 
128
  db.put(content)
129
 
130
+ # Force commit if method exists (SDK dependent)
 
131
  if hasattr(db, 'commit'):
132
  db.commit()
133
 
 
151
  # Search
152
  results = db.search(query, top_k=5)
153
 
 
154
  formatted_results = []
155
  for hit in results:
156
  formatted_results.append({
157
  "text": hit.text,
 
158
  })
159
 
160
  return jsonify({"success": True, "results": formatted_results})