Spaces:

siddhm11
/

prompt-engine

Running

App Files Files Community

siddhm11 commited on Feb 6

Commit

515a3fb

1 Parent(s): 360b843

refactor for HF spaces: moved code to backend/

Browse files

Files changed (16) hide show

.gitignore +19 -0
Dockerfile +28 -19
backend/__init__.py +0 -0
backend/core/config.py +29 -0
backend/core/database.py +74 -0
backend/core/security.py +29 -0
backend/main.py +34 -0
backend/models/schemas.py +26 -0
requirements.txt → backend/requirements.txt +6 -1
backend/routers/auth.py +161 -0
backend/routers/prompts.py +142 -0
backend/routers/users.py +19 -0
backend/services/email_service.py +30 -0
backend/services/llm_service.py +43 -0
backend/services/memory_service.py +134 -0
main.py +0 -422

.gitignore ADDED Viewed

	@@ -0,0 +1,19 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+# Environments
+.env
+backend/.env
+.venv
+env/
+venv/
+# VS Code
+.vscode/
+.qodo/
+# Logs
+*.log
+.DS_Store

Dockerfile CHANGED Viewed

@@ -1,27 +1,36 @@
-FROM python:3.9
-# Set up a new user named "user" with user ID 1000
-RUN useradd -m -u 1000 user
-# Switch to the "user" user
-USER user
-# Set home to the user's home directory
-ENV HOME=/home/user \
-	PATH=/home/user/.local/bin:$PATH
-# Set the working directory to the user's home directory
-WORKDIR $HOME/app
-# Copy the current directory contents into the container at $HOME/app setting the owner to the user
-COPY --chown=user . $HOME/app
-# Install requirements
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-# Create a directory for the model cache and set permissions
-RUN mkdir -p $HOME/app/cache && chmod 777 $HOME/app/cache
-ENV SENTENCE_TRANSFORMERS_HOME=$HOME/app/cache
-# Run the application on port 7860
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Set environment variables to prevent pyc files and buffer output
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+# Install system dependencies (if any are needed for specific python packages)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements from backend
+COPY backend/requirements.txt /app/requirements.txt
+# Install dependencies
+RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
+# Copy the backend code
+COPY backend /app/backend
+# We do not copy the legacy main.py from root to enable clean separation
+# Create a non-root user (Hugging Face Spaces requirement)
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Expose the port HF Spaces uses (7860)
+EXPOSE 7860
+# Command to run the application
+# We use uvicorn to run the app found in backend.main:app
+CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]

backend/__init__.py ADDED Viewed

File without changes

backend/core/config.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+from pathlib import Path
+from dotenv import load_dotenv
+env_path = Path(__file__).resolve().parent.parent / ".env"
+load_dotenv(dotenv_path=env_path)
+class Settings:
+    # API Keys
+    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+    MONGO_URI = os.getenv("MONGO_URI")
+    QDRANT_URL = os.getenv("QDRANT_URL", ":memory:")
+    QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+    SENDGRID_API_KEY = os.getenv("SENDGRID_API_KEY")
+    # Auth
+    GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
+    GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
+    JWT_SECRET = os.getenv("JWT_SECRET", "unsafedefaultsecret")
+    ALGORITHM = "HS256"
+    GOOGLE_REDIRECT_URI = os.getenv("GOOGLE_REDIRECT_URI", "http://localhost:8000/auth/google/callback")
+    # Constants
+    EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+    COLLECTION_NAME = "prompt_memory"
+settings = Settings()

backend/core/database.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from pymongo import MongoClient
+from qdrant_client import QdrantClient
+from qdrant_client.models import VectorParams, Distance
+from .config import settings
+# MongoDB
+class MongoDB:
+    client: MongoClient = None
+    db = None
+    users_col = None
+    prompts_col = None
+    @classmethod
+    def connect(cls):
+        try:
+            cls.client = MongoClient(
+                settings.MONGO_URI or "mongodb://localhost:27017",
+                serverSelectionTimeoutMS=3000,
+            )
+            cls.client.admin.command("ping")
+            cls.db = cls.client["prompt_engine_db"]
+            cls.users_col = cls.db["users"]
+            cls.prompts_col = cls.db["prompt_logs"]
+            print("✅ MongoDB Connected")
+        except Exception as e:
+            print(f"⚠️ MongoDB not available ({e}) — using in-memory fallback.")
+            cls.users_col = None
+            cls.prompts_col = None
+# Qdrant
+class QdrantDB:
+    client: QdrantClient = None
+    @classmethod
+    def get_client(cls):
+        if cls.client is None:
+            try:
+                cls.client = QdrantClient(url=settings.QDRANT_URL, api_key=settings.QDRANT_API_KEY)
+                # Check/Create Collection
+                try:
+                    if not cls.client.collection_exists(settings.COLLECTION_NAME):
+                        cls.client.create_collection(
+                            collection_name=settings.COLLECTION_NAME,
+                            vectors_config=VectorParams(size=384, distance=Distance.COSINE),
+                        )
+                        print(f"✅ Created new Qdrant collection: '{settings.COLLECTION_NAME}'")
+                except Exception:
+                    # Fallback check
+                    try:
+                        cls.client.get_collection(settings.COLLECTION_NAME)
+                    except:
+                        pass # Creation might have failed or raced
+                # Create Payload Index
+                try:
+                    cls.client.create_payload_index(
+                        collection_name=settings.COLLECTION_NAME,
+                        field_name="user_id",
+                        field_schema="keyword"
+                    )
+                except Exception:
+                    pass
+                print(f"✅ Qdrant Connected ({settings.QDRANT_URL})")
+            except Exception as e:
+                print(f"❌ Qdrant Connection Failed: {e}")
+                return None
+        return cls.client
+# In-Memory Fallbacks
+in_memory_users = {}
+in_memory_prompt_logs = []

backend/core/security.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from datetime import datetime, timedelta
+import jwt
+from fastapi import HTTPException, Depends
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from .config import settings
+security_scheme = HTTPBearer()
+def create_jwt_token(user_id: str, email: str) -> str:
+    """Creates a signed JWT with 7-day expiration."""
+    expiration = datetime.utcnow() + timedelta(days=7)
+    payload = {
+        "sub": user_id,
+        "email": email,
+        "exp": expiration
+    }
+    return jwt.encode(payload, settings.JWT_SECRET, algorithm=settings.ALGORITHM)
+def verify_jwt(credentials: HTTPAuthorizationCredentials = Depends(security_scheme)):
+    """Dependency to verify JWT header."""
+    token = credentials.credentials
+    try:
+        payload = jwt.decode(token, settings.JWT_SECRET, algorithms=[settings.ALGORITHM])
+        return payload["sub"] # Returns user_id
+    except jwt.ExpiredSignatureError:
+        raise HTTPException(status_code=401, detail="Token expired")
+    except jwt.InvalidTokenError:
+        raise HTTPException(status_code=401, detail="Invalid token")

backend/main.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from .core.database import MongoDB
+from .routers import auth, users, prompts
+app = FastAPI(title="Context-Aware Prompt Engine")
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Startup Events
+@app.on_event("startup")
+def startup_db_client():
+    MongoDB.connect()
+@app.get("/")
+def health_check():
+    return {"status": "running", "service": "Context-Aware Prompt Engine", "production_ready": True}
+# Include Routers
+app.include_router(auth.router)
+app.include_router(users.router)
+app.include_router(prompts.router)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("backend.main:app", host="0.0.0.0", port=8000, reload=True)

backend/models/schemas.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from typing import List, Optional
+from pydantic import BaseModel
+class UserProfile(BaseModel):
+    user_id: str
+    email: Optional[str] = None
+    tech_stack: List[str]  # e.g., ["React", "Python", "AWS"]
+    preferences: str       # e.g., "Clean code, no comments"
+class PromptRequest(BaseModel):
+    user_id: str
+    prompt: str
+    platform: Optional[str] = "unknown"
+class TrackRequest(BaseModel):
+    user_id: str
+    prompt: str
+    platform: Optional[str] = "unknown"
+class OTPRequest(BaseModel):
+    email: str
+class OTPVerify(BaseModel):
+    email: str
+    code: str

requirements.txt → backend/requirements.txt RENAMED Viewed

@@ -26,4 +26,9 @@ httpx==0.24.1
 # DB
 pymongo==4.6.1
-numpy<2

 # DB
 pymongo==4.6.1
+numpy<2
+# Auth & Utilities
+pyjwt==2.8.0
+requests==2.31.0

backend/routers/auth.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import time
+import uuid
+import httpx
+from fastapi import APIRouter, HTTPException, Depends
+from fastapi.responses import HTMLResponse
+from ..models.schemas import OTPRequest, OTPVerify
+from ..core.config import settings
+from ..core.database import MongoDB, in_memory_users
+from ..core.security import create_jwt_token
+from ..services.email_service import send_email_sendgrid
+router = APIRouter()
+_otp_store = {}
+@router.post("/auth/request-otp")
+def request_otp(request: OTPRequest):
+    email = request.email.strip().lower()
+    # Generate 6-digit code
+    import random
+    code = f"{random.randint(100000, 999999)}"
+    _otp_store[email] = {
+        "code": code,
+        "expires": time.time() + 300 # 5 minutes
+    }
+    email_body = f"Your Prompt Memory Login Code is: {code}\n\nIt expires in 5 minutes."
+    send_email_sendgrid(email, "Your Login Code", email_body)
+    # Dev Log
+    print(f"\n📨 [EMAIL LOG] To: {email} | Code: {code}\n")
+    return {"message": "OTP sent."}
+@router.post("/auth/verify-otp")
+def verify_otp(request: OTPVerify):
+    email = request.email.strip().lower()
+    code = request.code.strip()
+    if email not in _otp_store:
+        raise HTTPException(status_code=400, detail="No OTP requested for this email.")
+    stored_data = _otp_store[email]
+    if time.time() > stored_data["expires"]:
+        del _otp_store[email]
+        raise HTTPException(status_code=400, detail="OTP expired.")
+    if stored_data["code"] != code:
+        raise HTTPException(status_code=400, detail="Invalid code.")
+    del _otp_store[email]
+    # Find or Register
+    user_id = None
+    if MongoDB.users_col is not None:
+        user = MongoDB.users_col.find_one({"email": email})
+        if user: user_id = user["user_id"]
+    else:
+        for uid, profile in in_memory_users.items():
+            if profile.get("email") == email:
+                user_id = uid
+                break
+    if not user_id:
+        user_id = str(uuid.uuid4())
+        new_profile = {"user_id": user_id, "email": email, "tech_stack": ["General"], "preferences": "Default"}
+        if MongoDB.users_col is not None:
+            MongoDB.users_col.insert_one(new_profile)
+        else:
+            in_memory_users[user_id] = new_profile
+    token = create_jwt_token(user_id, email)
+    return {"token": token, "email": email, "user_id": user_id}
+# --- GOOGLE OAUTH ---
+@router.get("/auth/google/login")
+def google_login():
+    if not settings.GOOGLE_CLIENT_ID:
+        raise HTTPException(status_code=500, detail="Server missing Google Client ID")
+    redirect_uri = settings.GOOGLE_REDIRECT_URI
+    scope = "openid email profile"
+    auth_url = (
+        f"https://accounts.google.com/o/oauth2/v2/auth?"
+        f"response_type=code&client_id={settings.GOOGLE_CLIENT_ID}&"
+        f"redirect_uri={redirect_uri}&scope={scope}&"
+        f"access_type=offline&prompt=consent"
+    )
+    return {"url": auth_url}
+@router.get("/auth/google/callback")
+async def google_callback(code: str):
+    if not settings.GOOGLE_CLIENT_ID or not settings.GOOGLE_CLIENT_SECRET:
+         raise HTTPException(status_code=500, detail="Server missing Google Secrets")
+    token_url = "https://oauth2.googleapis.com/token"
+    payload = {
+        "client_id": settings.GOOGLE_CLIENT_ID,
+        "client_secret": settings.GOOGLE_CLIENT_SECRET,
+        "code": code,
+        "grant_type": "authorization_code",
+        "redirect_uri": settings.GOOGLE_REDIRECT_URI
+    }
+    async with httpx.AsyncClient() as client:
+        res = await client.post(token_url, data=payload)
+        if res.status_code != 200:
+            return {"error": "Failed to exchange code", "details": res.text}
+        tokens = res.json()
+        access_token = tokens.get("access_token")
+        user_res = await client.get(
+            "https://www.googleapis.com/oauth2/v2/userinfo",
+            headers={"Authorization": f"Bearer {access_token}"}
+        )
+        user_info = user_res.json()
+    email = user_info.get("email")
+    if not email:
+        return {"error": "No email found in Google Account"}
+    # Find/Create User
+    user_id = None
+    if MongoDB.users_col is not None:
+        user = MongoDB.users_col.find_one({"email": email})
+        if user: user_id = user["user_id"]
+    else:
+        for uid, profile in in_memory_users.items():
+            if profile.get("email") == email:
+                user_id = uid
+                break
+    if not user_id:
+        user_id = str(uuid.uuid4())
+        new_profile = {"user_id": user_id, "email": email, "tech_stack": ["General"], "preferences": "Default"}
+        if MongoDB.users_col is not None:
+            MongoDB.users_col.insert_one(new_profile)
+        else:
+            in_memory_users[user_id] = new_profile
+    token = create_jwt_token(user_id, email)
+    html_content = f"""
+    <html>
+    <body>
+    <script>
+        if (window.opener) {{
+            window.opener.postMessage({{ type: "GOOGLE_AUTH_SUCCESS", token: "{token}", email: "{email}", user_id: "{user_id}" }}, "*");
+            window.close();
+        }} else {{
+            document.write("Login Successful! You can close this tab.");
+        }}
+    </script>
+    </body>
+    </html>
+    """
+    return HTMLResponse(content=html_content)

backend/routers/prompts.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import time
+from fastapi import APIRouter, Depends
+from ..models.schemas import PromptRequest, TrackRequest
+from ..core.security import verify_jwt
+from ..core.database import MongoDB, in_memory_users
+from ..services.memory_service import MemoryService
+from ..services.llm_service import get_groq_client
+router = APIRouter()
+SOTA_SYSTEM_PROMPT = """
+You are a Principal Prompt Architect. Your goal is not to "fix" the user's prompt, but to translate their raw intent into a "SOTA" executable specification for an LLM.
+### THE PHILOSOPHY (The 7 Rules)
+1. **Clarity**: Eliminate ambiguity.
+2. **Context**: Inject User Tech Stack [{tech_stack}] & Preferences [{preferences}].
+3. **Tasks**: Break complex goals into a step-by-step "Chain of Thought".
+4. **Format**: Explicitly define the output format (JSON, Markdown, etc.).
+5. **Examples**: Request few-shot examples if abstract.
+6. **Role**: Assign a HYPER-SPECIFIC persona (e.g., "Senior Geo-Spatial Data Engineer").
+7. **Constraints**: Define Negative Constraints (what NOT to do).
+### YOUR PROTOCOL
+1. **Analyze**: Identify the user's core intent.
+2. **Architect**: Construct a prompt using the **CO-STAR+** framework:
+   - [ROLE]: Act as {{Specific Expert Role}}...
+   - [CONTEXT]: User context is {tech_stack}...
+   - [TASK]: Your specific objective is...
+   - [STRATEGY]: Before writing code, outline your step-by-step reasoning...
+   - [CONSTRAINTS]: Do NOT use...
+   - [OUTPUT]: Provide the answer in {{Specific Format}}...
+### INSTRUCTIONS
+- Return ONLY the final refined prompt.
+- Do NOT provide explanations.
+- If the prompt is a question TO YOU (like "what is this?"), answer it as a helper.
+"""
+@router.post("/track")
+def track_prompt(request: TrackRequest, user_id: str = Depends(verify_jwt)):
+    """Silently learns from user prompts."""
+    request.user_id = user_id
+    # 0. Log to Short-Term
+    MemoryService.log_prompt(
+        user_id=request.user_id,
+        original=request.prompt,
+        source="passive_tracker"
+    )
+    # 1. Redundancy Check
+    _, max_similarity = MemoryService.retrieve_context(request.user_id, request.prompt)
+    if max_similarity > 0.95:
+        return {"status": "skipped", "reason": "redundant"}
+    # 2. Vectorize
+    MemoryService.memorize_strategy(request.user_id, request.prompt, request.prompt)
+    return {"status": "memorized"}
+@router.post("/enhance")
+def enhance_prompt(request: PromptRequest, user_id: str = Depends(verify_jwt)):
+    request.user_id = user_id
+    start_time = time.time()
+    # 1. GET USER CONTEXT
+    user_data = None
+    if MongoDB.users_col is not None:
+        user_data = MongoDB.users_col.find_one({"user_id": request.user_id})
+    if user_data is None:
+        user_data = in_memory_users.get(request.user_id, {})
+    ts_raw = user_data.get("tech_stack", ["General Python", "Data Science"])
+    tech_stack = ", ".join(ts_raw) if isinstance(ts_raw, list) else str(ts_raw)
+    preferences = user_data.get("preferences", "Clean, modular code with docstrings.")
+    # 2. RETRIEVE MEMORY
+    past_context, max_similarity = MemoryService.retrieve_context(request.user_id, request.prompt)
+    # 3. RECENT HISTORY
+    recent_prompts = MemoryService.get_recent_prompts(request.user_id)
+    recent_history_str = "\n".join([f"- {p}" for p in recent_prompts]) if recent_prompts else "No recent history."
+    # 4. CONSTRUCT PROMPT
+    formatted_system = SOTA_SYSTEM_PROMPT.format(
+        tech_stack=tech_stack,
+        preferences=preferences
+    )
+    user_message = f"""
+    ### 1. RECENT ACTIVITY (Immediate Context)
+    {recent_history_str}
+    ### 2. LONG-TERM MEMORY & PAST STRATEGIES
+    {past_context}
+    ### 3. RAW USER INPUT
+    "{request.prompt}"
+    ### 4. TASK
+    Apply the 7 Rules. Transform the raw input into a SOTA prompt.
+    """
+    enhanced_prompt = request.prompt
+    try:
+        client = get_groq_client()
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {"role": "system", "content": formatted_system},
+                {"role": "user", "content": user_message}
+            ],
+            model="openai/gpt-oss-120b",
+            temperature=0.3,
+        )
+        enhanced_prompt = chat_completion.choices[0].message.content
+    except Exception as e:
+        print(f"❌ Groq API Error: {e}")
+    process_time = round(time.time() - start_time, 2)
+    # 5. LOG
+    log_id = MemoryService.log_prompt(
+        user_id=request.user_id,
+        original=request.prompt,
+        enhanced=enhanced_prompt,
+        score=max_similarity,
+        latency=process_time,
+    )
+    # 6. MEMORIZE (if unique)
+    if max_similarity < 0.90:
+        MemoryService.memorize_strategy(request.user_id, request.prompt, enhanced_prompt)
+    else:
+        print(f"♻️ Redundancy detected (Score {max_similarity:.2f}). Skipping save.")
+    return {
+        "original": request.prompt,
+        "enhanced": enhanced_prompt,
+        "log_id": log_id,
+        "latency": process_time
+    }

backend/routers/users.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from fastapi import APIRouter
+from ..models.schemas import UserProfile
+from ..core.database import MongoDB, in_memory_users
+router = APIRouter()
+@router.post("/users/register")
+def register_user(profile: UserProfile):
+    """Creates or updates a user profile."""
+    if MongoDB.users_col is not None:
+        MongoDB.users_col.update_one(
+            {"user_id": profile.user_id},
+            {"$set": profile.dict()},
+            upsert=True,
+        )
+    else:
+        in_memory_users[profile.user_id] = profile.dict()
+    return {"message": f"User {profile.user_id} registered successfully."}

backend/services/email_service.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import requests
+from ..core.config import settings
+def send_email_sendgrid(to_email: str, subject: str, content: str):
+    """Sends authentic email via SendGrid if Key is present."""
+    if not settings.SENDGRID_API_KEY:
+        print(f"⚠️ No SendGrid Key. Simulating email to {to_email}")
+        return False
+    url = "https://api.sendgrid.com/v3/mail/send"
+    headers = {
+        "Authorization": f"Bearer {settings.SENDGRID_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "personalizations": [{"to": [{"email": to_email}]}],
+        "from": {"email": "aminyahouse2000@gmail.com", "name": "Prompt Memory"},
+        "subject": subject,
+        "content": [{"type": "text/plain", "value": content}]
+    }
+    try:
+        res = requests.post(url, headers=headers, json=data)
+        if res.status_code >= 400:
+            print(f"❌ SendGrid Error: {res.text}")
+        else:
+            print(f"✅ Email sent to {to_email}")
+    except Exception as e:
+        print(f"❌ Email Failed: {e}")

backend/services/llm_service.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from groq import Groq
+from sentence_transformers import SentenceTransformer
+from ..core.config import settings
+# Global singletons
+_embedding_model = None
+_groq_client = None
+_embedding_unavailable = False
+def get_groq_client():
+    """Lazily initialize Groq client."""
+    global _groq_client
+    if _groq_client is None:
+        try:
+            _groq_client = Groq(api_key=settings.GROQ_API_KEY)
+        except Exception as e:
+            print(f"⚠️ Warning: Groq client initialization failed: {e}")
+    return _groq_client
+def get_embedding(text: str):
+    """Converts text to vector using free MiniLM model."""
+    global _embedding_model, _embedding_unavailable
+    if _embedding_unavailable:
+        return None
+    if _embedding_model is None:
+        try:
+            print("⏳ Loading free embedding model...")
+            try:
+                # Try ONNX for performance
+                _embedding_model = SentenceTransformer(settings.EMBEDDING_MODEL_NAME, backend="onnx")
+                print("✅ Embedding model loaded (ONNX backend)")
+            except Exception:
+                _embedding_model = SentenceTransformer(settings.EMBEDDING_MODEL_NAME)
+                print("✅ Embedding model loaded (default backend)")
+        except Exception as e:
+            _embedding_unavailable = True
+            print(f"⚠️ Embedding unavailable: {e}")
+            return None
+    return _embedding_model.encode(text, convert_to_numpy=True).tolist()

backend/services/memory_service.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import time
+from datetime import datetime
+from typing import List, Tuple
+from qdrant_client.models import PointStruct, Filter, FieldCondition, MatchValue
+from ..core.config import settings
+from ..core.database import QdrantDB, MongoDB, in_memory_prompt_logs
+from ..services.llm_service import get_embedding
+class MemoryService:
+    @staticmethod
+    def retrieve_context(user_id: str, query_text: str, limit: int = 3) -> Tuple[str, float]:
+        """
+        Finds similar past prompts.
+        Returns: (context_str, max_score)
+        """
+        qdrant = QdrantDB.get_client()
+        # Default return if DB is down
+        if qdrant is None:
+            return "No relevant past context found.", 0.0
+        query_vector = get_embedding(query_text)
+        if query_vector is None:
+            return "No relevant past context found.", 0.0
+        # Search with User ID Filter
+        try:
+            results = qdrant.search(
+                collection_name=settings.COLLECTION_NAME,
+                query_vector=query_vector,
+                query_filter=Filter(
+                    must=[
+                        FieldCondition(
+                            key="user_id",
+                            match=MatchValue(value=user_id)
+                        )
+                    ]
+                ),
+                limit=limit
+            )
+        except Exception as e:
+            print(f"⚠️ Search failed: {e}")
+            return "No relevant past context found.", 0.0
+        context_str = ""
+        max_score = 0.0
+        for hit in results:
+            if hit.score > max_score:
+                max_score = hit.score
+            payload = hit.payload
+            # Relevance threshold
+            if hit.score > 0.25:
+                context_str += f"- Past Prompt: \"{payload.get('original_prompt')}\"\n"
+                context_str += f"- Refined Version: \"{payload.get('refined_prompt')}\"\n\n"
+        final_context = context_str if context_str else "No relevant past context found."
+        return final_context, max_score
+    @staticmethod
+    def get_recent_prompts(user_id: str, limit: int = 5) -> List[str]:
+        """Fetches most recent prompts."""
+        recent_prompts = []
+        # 1. Try MongoDB
+        if MongoDB.prompts_col is not None:
+            try:
+                cursor = MongoDB.prompts_col.find(
+                    {"user_id": user_id}
+                ).sort("timestamp", -1).limit(limit)
+                for doc in cursor:
+                    if "original" in doc:
+                        recent_prompts.append(doc["original"])
+            except Exception as e:
+                print(f"⚠️ Error fetching recent prompts from Mongo: {e}")
+        # 2. Fallback to In-Memory
+        if MongoDB.prompts_col is None:
+            user_logs = [log for log in in_memory_prompt_logs if log.get("user_id") == user_id]
+            recent_prompts = [log["original"] for log in user_logs[-limit:]]
+            recent_prompts.reverse()
+        return recent_prompts
+    @staticmethod
+    def log_prompt(user_id: str, original: str, enhanced: str = None, score: float = 0.0, latency: float = 0.0, source: str = "active"):
+        """Logs prompt to Mongo or Memory."""
+        log_entry = {
+            "user_id": user_id,
+            "timestamp": datetime.now(),
+            "original": original,
+            "enhanced": enhanced,
+            "score": score,
+            "latency": latency,
+            "source": source
+        }
+        log_id = "memory-only"
+        if MongoDB.prompts_col is not None:
+            try:
+                res = MongoDB.prompts_col.insert_one(log_entry)
+                log_id = str(res.inserted_id)
+            except: pass
+        else:
+            in_memory_prompt_logs.append(log_entry)
+        return log_id
+    @staticmethod
+    def memorize_strategy(user_id: str, original: str, refined: str):
+        """Saves high-quality prompts to Vector DB."""
+        try:
+            vec = get_embedding(original)
+            if vec:
+                q_client = QdrantDB.get_client()
+                if q_client:
+                    q_client.upsert(
+                        collection_name=settings.COLLECTION_NAME,
+                        points=[PointStruct(
+                            id=int(time.time()),
+                            vector=vec,
+                            payload={
+                                "user_id": user_id,
+                                "original_prompt": original,
+                                "refined_prompt": refined
+                            }
+                        )]
+                    )
+                    print("💾 New strategy memorized.")
+        except Exception as e:
+            print(f"❌ Memorization failed: {e}")

main.py DELETED Viewed

@@ -1,422 +0,0 @@
-import os
-import time
-from datetime import datetime
-from typing import List, Optional
-# Third-party libraries
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from qdrant_client import QdrantClient
-from qdrant_client.models import PointStruct, Distance, VectorParams, Filter, FieldCondition, MatchValue# Lazy import: from sentence_transformers import SentenceTransformer
-from groq import Groq
-from pymongo import MongoClient
-from dotenv import load_dotenv
-# --- 1. CONFIGURATION & SECRETS ---
-# Load environment variables from .env file
-load_dotenv()
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-MONGO_URI = os.getenv("MONGO_URI")
-QDRANT_URL = os.getenv("QDRANT_URL", ":memory:")
-QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
-# Free embedding model: all-MiniLM-L6-v2 (384-dim, Apache 2.0). No API key required.
-EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-# Basic check to ensure keys are present (only warn at startup; fail on /enhance if missing)
-if not GROQ_API_KEY:
-    print("⚠️ GROQ_API_KEY is missing from .env — /enhance will fail until you add it.")
-# --- 2. SETUP CLIENTS ---
-# A. FastAPI App
-app = FastAPI()
-# B. CORS (Critical for Chrome Extension)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
-    allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods (POST, GET, etc.)
-    allow_headers=["*"],
-)
-# C. MongoDB (User Profiles & Logs) — optional; use in-memory fallback if unavailable
-users_col = None
-prompts_col = None
-_in_memory_users = {}  # fallback when MongoDB is not running
-try:
-    mongo_client = MongoClient(
-        MONGO_URI or "mongodb://localhost:27017",
-        serverSelectionTimeoutMS=3000,
-    )
-    mongo_client.admin.command("ping")
-    db = mongo_client["prompt_engine_db"]
-    users_col = db["users"]
-    prompts_col = db["prompt_logs"]
-    print("✅ MongoDB Connected")
-except Exception as e:
-    print(f"⚠️ MongoDB not available ({e}) — using in-memory fallback for profiles/logs.")
-# D. Qdrant (Vector Memory)
-qdrant = None
-COLLECTION_NAME = "prompt_memory"
-# Add this with your other class definitions
-class TrackRequest(BaseModel):
-    user_id: str
-    prompt: str
-    platform: Optional[str] = "unknown"
-def init_qdrant():
-    """Lazily initialize Qdrant connection."""
-    global qdrant
-    if qdrant is None:
-        try:
-            qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
-            # 1. Check if collection exists
-            try:
-                collection_exists = qdrant.collection_exists(COLLECTION_NAME)
-            except (AttributeError, Exception):
-                try:
-                    qdrant.get_collection(COLLECTION_NAME)
-                    collection_exists = True
-                except:
-                    collection_exists = False
-            # 2. Create collection if it doesn't exist
-            if not collection_exists:
-                try:
-                    qdrant.create_collection(
-                        collection_name=COLLECTION_NAME,
-                        vectors_config=VectorParams(size=384, distance=Distance.COSINE),
-                    )
-                    print(f"✅ Created new Qdrant collection: '{COLLECTION_NAME}'")
-                except Exception as e:
-                    if "409" in str(e) or "already exists" in str(e):
-                        pass
-                    else:
-                        raise
-            # --- THE FIX: CREATE PAYLOAD INDEX FOR USER_ID ---
-            # This tells Qdrant: "Please optimize searches for 'user_id'"
-            try:
-                qdrant.create_payload_index(
-                    collection_name=COLLECTION_NAME,
-                    field_name="user_id",
-                    field_schema="keyword"  # 'keyword' is best for exact string matches like IDs
-                )
-                print("✅ Payload index for 'user_id' ensured.")
-            except Exception as e:
-                # If index already exists, Qdrant might return an error or ignore it.
-                # We catch it just in case, but usually it's safe.
-                print(f"ℹ️ Note on Indexing: {e}")
-            print(f"✅ Qdrant Connected ({QDRANT_URL})")
-        except Exception as e:
-            print(f"❌ Qdrant Connection Failed: {e}")
-    return qdrant
-# E. AI Models — free local embeddings (MiniLM via sentence-transformers)
-print("Embedding: free model (MiniLM) will load on first use")
-EMBEDDING_MODEL = None
-_embedding_unavailable = False
-# Lazy-load Groq client to avoid initialization errors
-groq_client = None
-def get_groq_client():
-    """Lazily initialize Groq client."""
-    global groq_client
-    if groq_client is None:
-        try:
-            groq_client = Groq(api_key=GROQ_API_KEY)
-        except Exception as e:
-            print(f"⚠️ Warning: Groq client initialization failed: {e}")
-    return groq_client
-# --- 3. DATA MODELS (Pydantic) ---
-class UserProfile(BaseModel):
-    user_id: str
-    tech_stack: List[str]  # e.g., ["React", "Python", "AWS"]
-    preferences: str       # e.g., "Clean code, no comments"
-class PromptRequest(BaseModel):
-    user_id: str
-    prompt: str            # Matches 'prompt' sent from your Extension
-    platform: Optional[str] = "unknown"
-# --- 4. HELPER FUNCTIONS ---
-def get_embedding(text: str):
-    """Converts text to 384-dim vector using free MiniLM model (sentence-transformers). Returns None if unavailable."""
-    global EMBEDDING_MODEL, _embedding_unavailable
-    if _embedding_unavailable:
-        return None
-    if EMBEDDING_MODEL is None:
-        try:
-            from sentence_transformers import SentenceTransformer
-            print("⏳ Loading free embedding model (all-MiniLM-L6-v2)...")
-            # Prefer ONNX backend (lighter, CPU-friendly); fallback to default
-            try:
-                EMBEDDING_MODEL = SentenceTransformer(EMBEDDING_MODEL_NAME, backend="onnx")
-                print("✅ Embedding model loaded (ONNX backend)")
-            except Exception:
-                EMBEDDING_MODEL = SentenceTransformer(EMBEDDING_MODEL_NAME)
-                print("✅ Embedding model loaded (default backend)")
-        except Exception as e:
-            _embedding_unavailable = True
-            print(f"⚠️ Embedding unavailable: {e} — install: pip install sentence-transformers (or sentence-transformers[onnx] for CPU)")
-            return None
-    return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist()
-def retrieve_context(user_id: str, query_text: str, limit: int = 3):
-    """
-    Finds similar past prompts and returns both the text context AND the highest similarity score.
-    Returns: (context_str, max_score)
-    """
-    global qdrant
-    qdrant = init_qdrant()
-    # Default return values if DB is down or empty
-    if qdrant is None:
-        return "No relevant past context found.", 0.0
-    query_vector = get_embedding(query_text)
-    if query_vector is None:
-        return "No relevant past context found.", 0.0
-    # Search with User ID Filter
-    results = qdrant.search(
-        collection_name=COLLECTION_NAME,
-        query_vector=query_vector,
-        query_filter=Filter(
-            must=[
-                FieldCondition(
-                    key="user_id",
-                    match=MatchValue(value=user_id)
-                )
-            ]
-        ),
-        limit=limit
-    )
-    print(f"\n🔍 Searching Memory for User '{user_id}'...")
-    context_str = ""
-    max_score = 0.0  # Track the highest score found
-    for hit in results:
-        # Update max_score if this hit is higher
-        if hit.score > max_score:
-            max_score = hit.score
-        payload = hit.payload
-        print(f"   Found candidate (Score: {hit.score:.4f}): {payload.get('original_prompt')}")
-        # Only add to string if it passes the "relevance" threshold (0.25)
-        if hit.score > 0.25:
-            context_str += f"- Past Prompt: \"{payload.get('original_prompt')}\"\n"
-            context_str += f"- Refined Version: \"{payload.get('refined_prompt')}\"\n\n"
-    final_context = context_str if context_str else "No relevant past context found."
-    return final_context, max_score
-# --- 5. API ENDPOINTS ---
-@app.get("/")
-def health_check():
-    return {"status": "running", "service": "Context-Aware Prompt Engine"}
-@app.post("/users/register")
-def register_user(profile: UserProfile):
-    """Creates or updates a user profile."""
-    if users_col is not None:
-        users_col.update_one(
-            {"user_id": profile.user_id},
-            {"$set": profile.dict()},
-            upsert=True,
-        )
-    else:
-        _in_memory_users[profile.user_id] = profile.dict()
-    return {"message": f"User {profile.user_id} registered successfully."}
-@app.post("/track")
-def track_prompt(request: TrackRequest):
-    """
-    Silently learns from user prompts without modifying them.
-    """
-    # 1. Check for Redundancy (Don't memorize exact duplicates)
-    # We use a high threshold (0.95) because we want to capture distinct thoughts
-    _, max_similarity = retrieve_context(request.user_id, request.prompt)
-    if max_similarity > 0.95:
-        return {"status": "skipped", "reason": "redundant"}
-    # 2. Vectorize & Save to Qdrant
-    try:
-        vec = get_embedding(request.prompt)
-        if vec:
-            q_client = init_qdrant()
-            if q_client:
-                q_client.upsert(
-                    collection_name=COLLECTION_NAME,
-                    points=[PointStruct(
-                        id=int(time.time()),
-                        vector=vec,
-                        payload={
-                            "user_id": request.user_id,
-                            "original_prompt": request.prompt,
-                            "refined_prompt": request.prompt, # No refinement, so we map it to itself
-                            "source": "passive_tracker"
-                        }
-                    )]
-                )
-                print(f"🧠 passively learned: {request.prompt[:50]}...")
-    except Exception as e:
-        print(f"❌ Tracking Error: {e}")
-        return {"status": "error", "message": str(e)}
-    return {"status": "memorized"}
-SOTA_SYSTEM_PROMPT = """
-You are a Principal Prompt Architect. Your goal is not to "fix" the user's prompt, but to translate their raw intent into a "SOTA" executable specification for an LLM.
-### THE PHILOSOPHY (The 7 Rules)
-1. **Clarity**: Eliminate ambiguity.
-2. **Context**: Inject User Tech Stack [{tech_stack}] & Preferences [{preferences}].
-3. **Tasks**: Break complex goals into a step-by-step "Chain of Thought".
-4. **Format**: Explicitly define the output format (JSON, Markdown, etc.).
-5. **Examples**: Request few-shot examples if abstract.
-6. **Role**: Assign a HYPER-SPECIFIC persona (e.g., "Senior Geo-Spatial Data Engineer").
-7. **Constraints**: Define Negative Constraints (what NOT to do).
-### YOUR PROTOCOL
-1. **Analyze**: Identify the user's core intent.
-2. **Architect**: Construct a prompt using the **CO-STAR+** framework:
-   - [ROLE]: Act as {{Specific Expert Role}}...
-   - [CONTEXT]: User context is {tech_stack}...
-   - [TASK]: Your specific objective is...
-   - [STRATEGY]: Before writing code, outline your step-by-step reasoning...
-   - [CONSTRAINTS]: Do NOT use...
-   - [OUTPUT]: Provide the answer in {{Specific Format}}...
-### INSTRUCTIONS
-- Return ONLY the final refined prompt.
-- Do NOT provide explanations.
-- If the prompt is a question TO YOU (like "what is this?"), answer it as a helper.
-"""
-@app.post("/enhance")
-def enhance_prompt(request: PromptRequest):
-    start_time = time.time()
-    # 1. GET USER CONTEXT (MongoDB Priority)
-    user_data = None
-    if users_col is not None:
-        user_data = users_col.find_one({"user_id": request.user_id})
-    if user_data is None:
-        user_data = _in_memory_users.get(request.user_id, {})
-    # Defaults
-    ts_raw = user_data.get("tech_stack", ["General Python", "Data Science"])
-    tech_stack = ", ".join(ts_raw) if isinstance(ts_raw, list) else str(ts_raw)
-    preferences = user_data.get("preferences", "Clean, modular code with docstrings.")
-    # 2. RETRIEVE MEMORY
-    past_context, max_similarity = retrieve_context(request.user_id, request.prompt)
-    # 3. CONSTRUCT SOTA PROMPT
-    formatted_system = SOTA_SYSTEM_PROMPT.format(
-        tech_stack=tech_stack,
-        preferences=preferences
-    )
-    user_message = f"""
-    ### 1. MEMORY & PAST STRATEGIES
-    {past_context}
-    ### 2. RAW USER INPUT
-    "{request.prompt}"
-    ### 3. TASK
-    Apply the 7 Rules. Transform the raw input into a SOTA prompt.
-    Ensure you define a specific EXPERT ROLE and Negative Constraints.
-    """
-    enhanced_prompt = request.prompt # Fallback
-    try:
-        client = get_groq_client()
-        chat_completion = client.chat.completions.create(
-            messages=[
-                {"role": "system", "content": formatted_system},
-                {"role": "user", "content": user_message}
-            ],
-            model="openai/gpt-oss-120b",
-            temperature=0.3, # Low temp for precision
-        )
-        enhanced_prompt = chat_completion.choices[0].message.content
-    except Exception as e:
-        print(f"❌ Groq API Error: {e}")
-    # 4. LOGGING (MongoDB)
-    process_time = round(time.time() - start_time, 2)
-    log_id = "memory-only"
-    if prompts_col is not None:
-        try:
-            log_entry = {
-                "user_id": request.user_id,
-                "timestamp": datetime.now(),
-                "original": request.prompt,
-                "enhanced": enhanced_prompt,
-                "score": max_similarity,
-                "latency": process_time
-            }
-            res = prompts_col.insert_one(log_entry)
-            log_id = str(res.inserted_id)
-        except: pass    # <--- HANDLE ERRORS HERE
-    # 5. MEMORY STORAGE (Qdrant)
-    # Only save if unique (similarity < 0.90)
-    if max_similarity < 0.90:
-        try:
-            vec = get_embedding(request.prompt)
-            if vec:
-                q_client = init_qdrant()
-                if q_client:
-                    q_client.upsert(
-                        collection_name=COLLECTION_NAME,
-                        points=[PointStruct(
-                            id=int(time.time()),
-                            vector=vec,
-                            payload={"user_id": request.user_id, "original_prompt": request.prompt, "refined_prompt": enhanced_prompt}
-                        )]
-                    )
-                    print("💾 New strategy memorized.")
-        except: pass
-    else:
-        print(f"♻️ Redundancy detected (Score {max_similarity:.2f}). Skipping save.")
-    return {
-        "original": request.prompt,
-        "enhanced": enhanced_prompt,
-        "log_id": log_id,
-        "latency": process_time
-    }
-# Run with: uvicorn main:app --reload
-## change content.js as well