prompt-engine / backend /migrate_embeddings.py
siddhm11
feat: update backend with complete prompt engine v4.0 - Enhanced main.py with request logging, CORS config, embedding preload - Updated config.py with tier-based model routing, rate limits, Stripe stubs - Expanded prompts.py with streaming, voice-enhance, feedback, history, usage endpoints - Added llm_service.py with Groq client pool and key rotation - Updated memory_service.py with passive learning, feedback analysis - Added migrate_embeddings.py for multilingual model migration - Updated schemas.py, auth.py, saved_prompts.py, requirements.txt - Removed prompt_engineering_skeleton directory
257fe44
"""
One-time migration script: Re-embed all Qdrant vectors using the new multilingual model.
Old model: all-MiniLM-L6-v2 (English only)
New model: paraphrase-multilingual-MiniLM-L12-v2 (50+ languages)
Both models produce 384-dim vectors, but they live in DIFFERENT vector spaces,
so every existing vector must be re-computed with the new model.
Usage (from project root):
python -m backend.migrate_embeddings
What this does:
1. Connects to MongoDB + Qdrant using .env credentials
2. Deletes & recreates both Qdrant collections (prompt_memory, saved_prompt_vectors)
3. Loads the new multilingual embedding model
4. Re-embeds all prompt_logs docs β†’ prompt_memory collection
5. Re-embeds all saved_prompts docs β†’ saved_prompt_vectors collection
"""
import sys
import uuid
import time
from pymongo import MongoClient
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from sentence_transformers import SentenceTransformer
from .core.config import settings
# ─── CONFIG ────────────────────────────────────────────────────────────────────
PROMPT_MEMORY_COLLECTION = settings.COLLECTION_NAME # "prompt_memory"
SAVED_PROMPTS_COLLECTION = "saved_prompt_vectors"
VECTOR_SIZE = 384
NEW_MODEL_NAME = settings.EMBEDDING_MODEL_NAME # should already be the multilingual model
def _create_collection(qdrant: QdrantClient, name: str):
"""Delete if exists, then create fresh with 384-dim cosine + user_id index."""
# Delete old collection
try:
qdrant.delete_collection(name)
print(f" πŸ—‘οΈ Deleted old collection: '{name}'")
except Exception:
pass # didn't exist
# Create new
qdrant.create_collection(
collection_name=name,
vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),
)
print(f" βœ… Created collection: '{name}'")
# Add user_id payload index
try:
qdrant.create_payload_index(
collection_name=name,
field_name="user_id",
field_schema="keyword",
)
except Exception:
pass
def main():
print("=" * 60)
print("πŸ”„ Embedding Migration Script")
print(f" New model: {NEW_MODEL_NAME}")
print("=" * 60)
# ── 1. Connect to MongoDB ──────────────────────────────────────────────
mongo_uri = settings.MONGO_URI
if not mongo_uri:
print("❌ MONGO_URI not set in .env β€” cannot migrate.")
sys.exit(1)
print("\nπŸ“¦ Connecting to MongoDB...")
mongo_client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
try:
mongo_client.admin.command("ping")
except Exception as e:
print(f"❌ MongoDB connection failed: {e}")
sys.exit(1)
db = mongo_client["prompt_engine_db"]
prompt_logs_col = db["prompt_logs"]
saved_prompts_col = db["saved_prompts"]
prompt_logs_count = prompt_logs_col.count_documents({})
saved_prompts_count = saved_prompts_col.count_documents({})
print(f" βœ… MongoDB connected β€” {prompt_logs_count} prompt logs, {saved_prompts_count} saved prompts")
# ── 2. Connect to Qdrant ──────────────────────────────────────────────
print("\nπŸ“¦ Connecting to Qdrant...")
qdrant_url = settings.QDRANT_URL
qdrant_api_key = settings.QDRANT_API_KEY
if not qdrant_url or qdrant_url == ":memory:":
print("❌ QDRANT_URL not set or is :memory: β€” cannot migrate a persistent instance.")
sys.exit(1)
qdrant = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
print(f" βœ… Qdrant connected ({qdrant_url})")
# ── 3. Recreate collections ───────────────────────────────────────────
print("\nπŸ”¨ Recreating Qdrant collections...")
_create_collection(qdrant, PROMPT_MEMORY_COLLECTION)
_create_collection(qdrant, SAVED_PROMPTS_COLLECTION)
# ── 4. Load the new embedding model ───────────────────────────────────
print(f"\n⏳ Loading embedding model: {NEW_MODEL_NAME}")
start_load = time.time()
try:
model = SentenceTransformer(NEW_MODEL_NAME, backend="onnx")
print(f" βœ… Model loaded (ONNX backend) in {time.time() - start_load:.1f}s")
except Exception:
model = SentenceTransformer(NEW_MODEL_NAME)
print(f" βœ… Model loaded (default backend) in {time.time() - start_load:.1f}s")
def embed(text: str):
return model.encode(text, convert_to_numpy=True).tolist()
# ── 5. Re-embed prompt_logs β†’ prompt_memory ───────────────────────────
print(f"\nπŸ“ Re-embedding {prompt_logs_count} prompt logs β†’ '{PROMPT_MEMORY_COLLECTION}'...")
success_logs = 0
skipped_logs = 0
batch_points = []
BATCH_SIZE = 50
for i, doc in enumerate(prompt_logs_col.find({})):
original = doc.get("original", "")
enhanced = doc.get("enhanced", "")
user_id = doc.get("user_id", "")
if not original or not user_id:
skipped_logs += 1
continue
try:
vec = embed(original)
point_id = uuid.uuid4().int % (2**63)
batch_points.append(PointStruct(
id=point_id,
vector=vec,
payload={
"user_id": user_id,
"original_prompt": original,
"refined_prompt": enhanced or "",
},
))
success_logs += 1
# Flush batch
if len(batch_points) >= BATCH_SIZE:
qdrant.upsert(collection_name=PROMPT_MEMORY_COLLECTION, points=batch_points)
batch_points = []
print(f" ... processed {i + 1}/{prompt_logs_count}")
except Exception as e:
print(f" ⚠️ Failed to embed prompt log (id={doc.get('_id')}): {e}")
skipped_logs += 1
# Flush remaining
if batch_points:
qdrant.upsert(collection_name=PROMPT_MEMORY_COLLECTION, points=batch_points)
batch_points = []
print(f" βœ… Done β€” {success_logs} embedded, {skipped_logs} skipped")
# ── 6. Re-embed saved_prompts β†’ saved_prompt_vectors ──────────────────
print(f"\nπŸ“ Re-embedding {saved_prompts_count} saved prompts β†’ '{SAVED_PROMPTS_COLLECTION}'...")
success_saved = 0
skipped_saved = 0
for i, doc in enumerate(saved_prompts_col.find({})):
content = doc.get("content", "")
user_id = doc.get("user_id", "")
mongo_id = str(doc["_id"])
if not content or not user_id:
skipped_saved += 1
continue
try:
vec = embed(content)
point_id = abs(hash(mongo_id)) % (2**63)
batch_points.append(PointStruct(
id=point_id,
vector=vec,
payload={
"user_id": user_id,
"mongo_id": mongo_id,
"content": content,
"title": doc.get("title", "") or "",
"tags": doc.get("tags", []) or [],
},
))
success_saved += 1
if len(batch_points) >= BATCH_SIZE:
qdrant.upsert(collection_name=SAVED_PROMPTS_COLLECTION, points=batch_points)
batch_points = []
print(f" ... processed {i + 1}/{saved_prompts_count}")
except Exception as e:
print(f" ⚠️ Failed to embed saved prompt (id={mongo_id}): {e}")
skipped_saved += 1
if batch_points:
qdrant.upsert(collection_name=SAVED_PROMPTS_COLLECTION, points=batch_points)
print(f" βœ… Done β€” {success_saved} embedded, {skipped_saved} skipped")
# ── 7. Summary ────────────────────────────────────────────────────────
print("\n" + "=" * 60)
print("βœ… MIGRATION COMPLETE")
print(f" Model: {NEW_MODEL_NAME}")
print(f" prompt_memory: {success_logs} vectors ({skipped_logs} skipped)")
print(f" saved_prompts: {success_saved} vectors ({skipped_saved} skipped)")
print("=" * 60)
print("\nYou can now restart the server:")
print(" python -m uvicorn backend.main:app --reload")
if __name__ == "__main__":
main()