File size: 6,725 Bytes
9b45948
0626340
dae8d04
4a5b3e1
9b45948
 
 
dae8d04
4a5b3e1
 
ddd0a52
4a5b3e1
dae8d04
4a5b3e1
 
ddd0a52
794db80
 
dae8d04
794db80
 
 
 
 
 
 
 
 
 
dae8d04
 
794db80
 
4a5b3e1
 
dae8d04
 
 
 
 
794db80
4a5b3e1
794db80
 
dae8d04
794db80
dae8d04
872eb7d
dae8d04
4a5b3e1
ddd0a52
794db80
dae8d04
 
794db80
4a5b3e1
 
dae8d04
794db80
4a5b3e1
 
 
794db80
 
4a5b3e1
dae8d04
 
 
 
4a5b3e1
dae8d04
4a5b3e1
 
df09f89
ddd0a52
dae8d04
ddd0a52
dae8d04
 
 
 
 
 
df09f89
dae8d04
f65bd99
4eddf44
4a5b3e1
dae8d04
794db80
dae8d04
794db80
dae8d04
4a5b3e1
 
 
dae8d04
4a5b3e1
 
 
794db80
4a5b3e1
 
 
 
dae8d04
4a5b3e1
dae8d04
4a5b3e1
dae8d04
4a5b3e1
9b45948
 
 
 
 
 
 
0626340
4a5b3e1
9b45948
dae8d04
9b45948
 
 
0626340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42d0d21
0626340
 
 
d79bab4
0626340
 
9b45948
 
 
 
 
 
 
 
 
 
 
1474415
f8abd43
1474415
 
 
 
9b45948
1474415
 
 
059b4f4
1474415
 
 
 
 
 
 
 
 
 
 
 
059b4f4
ba1a5aa
1474415
ce0faa0
 
 
1474415
 
9b45948
 
ba1a5aa
9b45948
 
 
ba1a5aa
 
 
 
 
 
 
 
 
 
 
 
 
 
9b45948
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import os
from flask import Flask, render_template, request, jsonify, stream_with_context, Response
from memvid_sdk import create, open as open_memvid
from huggingface_hub import hf_hub_download, upload_file, HfApi

app = Flask(__name__)

# CONFIGURATION
FILENAME = "knowledge.mv2"
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "memvid-storage" 

# Global variables
db = None
DB_PATH = os.path.abspath(FILENAME)
DATASET_REPO_ID = None

def get_repo_id():
    """Helper to dynamically resolve 'username/dataset_name'"""
    global DATASET_REPO_ID
    if DATASET_REPO_ID:
        return DATASET_REPO_ID
    
    if HF_TOKEN:
        try:
            api = HfApi(token=HF_TOKEN)
            username = api.whoami()['name']
            DATASET_REPO_ID = f"{username}/{DATASET_NAME}"
            return DATASET_REPO_ID
        except Exception as e:
            print(f"⚠️ Error getting username: {e}")
            return None
    return None

def init_db():
    """
    1. Ensure Dataset Exists.
    2. Try to download existing DB.
    3. Initialize Memvid.
    """
    global db, DATASET_REPO_ID
    
    repo_id = get_repo_id()
    
    # 1. Sync / Setup Cloud Storage
    if HF_TOKEN and repo_id:
        print(f"🔄 Checking cloud storage at {repo_id}...")
        api = HfApi(token=HF_TOKEN)
        
        try:
            # Create the repo if it doesn't exist
            api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
            
            # Check for file existence
            files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
            
            if FILENAME in files:
                downloaded_path = hf_hub_download(
                    repo_id=repo_id,
                    filename=FILENAME,
                    repo_type="dataset",
                    token=HF_TOKEN,
                    local_dir=".", 
                    local_dir_use_symlinks=False
                )
                print(f"✅ Downloaded database to {downloaded_path}")
            else:
                print("⚠️ Database file not found in repo. A new one will be created and synced.")
                
        except Exception as e:
            print(f"⚠️ Cloud sync warning: {e}")

    try:
        if os.path.exists(DB_PATH):
            db = open_memvid(DB_PATH, read_only=False) 
        else:
            db = create(DB_PATH)      
            
    except ImportError:
        from memvid_sdk import Memvid
        if os.path.exists(DB_PATH):
            db = Memvid()
            db.open(DB_PATH) 
        else:
            db = Memvid()
            db.create(DB_PATH)

def sync_to_hub():
    """Uploads the local .mv2 file back to Hugging Face"""
    repo_id = get_repo_id()
    
    if not HF_TOKEN or not repo_id:
        print("⚠️ No HF_TOKEN or Repo ID found. Skipping sync.")
        return

    try:
        print("☁️ Syncing to Hub...")
        upload_file(
            path_or_fileobj=DB_PATH,
            path_in_repo=FILENAME,
            repo_id=repo_id,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message="Memvid: Auto-save memory update"
        )
        print("✅ Sync complete.")
    except Exception as e:
        print(f"❌ Sync failed: {e}")

# Initialize on startup
init_db()

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/add', methods=['POST'])
def add_memory():
    # 1. Setup Validation
    global db
    content = request.form.get('content')
    
    if not content:
        return jsonify({"error": "No content provided"}), 400

    # 2. Define the Stream Generator
    def generate():
        try:
            # Step A: Re-init if needed inside the stream
            global db
            if not db:
                init_db()
                if not db:
                    yield '{"status": "error", "message": "Database init failed"}\n'
                    return

            # Step B: Database Put
            yield '{"status": "processing", "message": "Ingesting content..."}\n'
            
            payload = {
                "text": content,
                "labels": ["web-entry"], 
                "title": "User Memory"
            }
            db.put(payload)
            
            # Step C: Flush to Disk
            yield '{"status": "processing", "message": "Flushing to disk..."}\n'
            del db
            db = None

            # Step D: Sync
            yield '{"status": "processing", "message": "Syncing to cloud (this may take a moment)..."}\n'
            sync_to_hub()
            
            # Step E: Reload
            yield '{"status": "processing", "message": "Reloading index..."}\n'
            init_db()
            
            # Final Success Message
            yield '{"status": "success", "message": "Memory added and synced."}\n'

        except Exception as e:
            # Capture any errors during the process
            yield f'{{"status": "error", "message": "{str(e)}"}}\n'

    # 3. Return the Stream
    return Response(stream_with_context(generate()), mimetype='application/x-ndjson')

@app.route('/search', methods=['POST'])
def search_memory():
    if not db:
        return jsonify({"error": "Database not initialized"}), 500

    query = request.form.get('query')
    if not query:
        return jsonify({"error": "No query provided"}), 400

    try:
        # 1. Search
        response = db.find(query)
        
        # 2. Parse & Clean
        clean_results = []
        hits = response.get('hits', [])
        
        for hit in hits:
            score = hit.get('score', 0.0)
            if score < 0.65: continue

            # --- CLEANING LOGIC ---
            raw_snippet = hit.get('snippet', '')
            
            lines = raw_snippet.split('\n')
            content_lines = [
                line for line in lines 
                if not line.strip().startswith(('title:', 'tags:', 'labels:', 'extractous_metadata:'))
            ]
            clean_text = "\n".join(content_lines).strip()
            
            tags = hit.get('tags', [])
            labels = hit.get('labels', [])

            clean_results.append({
                "title": hit.get('title') or "Untitled Memory",
                "text": clean_text,         
                "tags": tags,               
                "labels": labels,            
                "date": hit.get('created_at', ''),
                "score": f"{score:.2f}"
            })
            
        return jsonify({"success": True, "results": clean_results})
    except Exception as e:
        return jsonify({"error": str(e)}), 500















if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)