siddhm11 commited on
Commit
515a3fb
·
1 Parent(s): 360b843

refactor for HF spaces: moved code to backend/

Browse files
.gitignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Environments
7
+ .env
8
+ backend/.env
9
+ .venv
10
+ env/
11
+ venv/
12
+
13
+ # VS Code
14
+ .vscode/
15
+ .qodo/
16
+
17
+ # Logs
18
+ *.log
19
+ .DS_Store
Dockerfile CHANGED
@@ -1,27 +1,36 @@
1
- FROM python:3.9
2
 
3
- # Set up a new user named "user" with user ID 1000
4
- RUN useradd -m -u 1000 user
5
 
6
- # Switch to the "user" user
7
- USER user
 
8
 
9
- # Set home to the user's home directory
10
- ENV HOME=/home/user \
11
- PATH=/home/user/.local/bin:$PATH
 
 
 
 
12
 
13
- # Set the working directory to the user's home directory
14
- WORKDIR $HOME/app
15
 
16
- # Copy the current directory contents into the container at $HOME/app setting the owner to the user
17
- COPY --chown=user . $HOME/app
 
18
 
19
- # Install requirements
20
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
 
 
21
 
22
- # Create a directory for the model cache and set permissions
23
- RUN mkdir -p $HOME/app/cache && chmod 777 $HOME/app/cache
24
- ENV SENTENCE_TRANSFORMERS_HOME=$HOME/app/cache
25
 
26
- # Run the application on port 7860
27
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
1
+ FROM python:3.10-slim
2
 
3
+ # Set working directory
4
+ WORKDIR /app
5
 
6
+ # Set environment variables to prevent pyc files and buffer output
7
+ ENV PYTHONDONTWRITEBYTECODE=1 \
8
+ PYTHONUNBUFFERED=1
9
 
10
+ # Install system dependencies (if any are needed for specific python packages)
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ build-essential \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy requirements from backend
16
+ COPY backend/requirements.txt /app/requirements.txt
17
 
18
+ # Install dependencies
19
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
20
 
21
+ # Copy the backend code
22
+ COPY backend /app/backend
23
+ # We do not copy the legacy main.py from root to enable clean separation
24
 
25
+ # Create a non-root user (Hugging Face Spaces requirement)
26
+ RUN useradd -m -u 1000 user
27
+ USER user
28
+ ENV HOME=/home/user \
29
+ PATH=/home/user/.local/bin:$PATH
30
 
31
+ # Expose the port HF Spaces uses (7860)
32
+ EXPOSE 7860
 
33
 
34
+ # Command to run the application
35
+ # We use uvicorn to run the app found in backend.main:app
36
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]
backend/__init__.py ADDED
File without changes
backend/core/config.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+
5
+ env_path = Path(__file__).resolve().parent.parent / ".env"
6
+ load_dotenv(dotenv_path=env_path)
7
+
8
+
9
+
10
+ class Settings:
11
+ # API Keys
12
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
13
+ MONGO_URI = os.getenv("MONGO_URI")
14
+ QDRANT_URL = os.getenv("QDRANT_URL", ":memory:")
15
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
16
+ SENDGRID_API_KEY = os.getenv("SENDGRID_API_KEY")
17
+
18
+ # Auth
19
+ GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
20
+ GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
21
+ JWT_SECRET = os.getenv("JWT_SECRET", "unsafedefaultsecret")
22
+ ALGORITHM = "HS256"
23
+ GOOGLE_REDIRECT_URI = os.getenv("GOOGLE_REDIRECT_URI", "http://localhost:8000/auth/google/callback")
24
+
25
+ # Constants
26
+ EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
27
+ COLLECTION_NAME = "prompt_memory"
28
+
29
+ settings = Settings()
backend/core/database.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from pymongo import MongoClient
3
+ from qdrant_client import QdrantClient
4
+ from qdrant_client.models import VectorParams, Distance
5
+ from .config import settings
6
+
7
+ # MongoDB
8
+ class MongoDB:
9
+ client: MongoClient = None
10
+ db = None
11
+ users_col = None
12
+ prompts_col = None
13
+
14
+ @classmethod
15
+ def connect(cls):
16
+ try:
17
+ cls.client = MongoClient(
18
+ settings.MONGO_URI or "mongodb://localhost:27017",
19
+ serverSelectionTimeoutMS=3000,
20
+ )
21
+ cls.client.admin.command("ping")
22
+ cls.db = cls.client["prompt_engine_db"]
23
+ cls.users_col = cls.db["users"]
24
+ cls.prompts_col = cls.db["prompt_logs"]
25
+ print("✅ MongoDB Connected")
26
+ except Exception as e:
27
+ print(f"⚠️ MongoDB not available ({e}) — using in-memory fallback.")
28
+ cls.users_col = None
29
+ cls.prompts_col = None
30
+
31
+ # Qdrant
32
+ class QdrantDB:
33
+ client: QdrantClient = None
34
+
35
+ @classmethod
36
+ def get_client(cls):
37
+ if cls.client is None:
38
+ try:
39
+ cls.client = QdrantClient(url=settings.QDRANT_URL, api_key=settings.QDRANT_API_KEY)
40
+
41
+ # Check/Create Collection
42
+ try:
43
+ if not cls.client.collection_exists(settings.COLLECTION_NAME):
44
+ cls.client.create_collection(
45
+ collection_name=settings.COLLECTION_NAME,
46
+ vectors_config=VectorParams(size=384, distance=Distance.COSINE),
47
+ )
48
+ print(f"✅ Created new Qdrant collection: '{settings.COLLECTION_NAME}'")
49
+ except Exception:
50
+ # Fallback check
51
+ try:
52
+ cls.client.get_collection(settings.COLLECTION_NAME)
53
+ except:
54
+ pass # Creation might have failed or raced
55
+
56
+ # Create Payload Index
57
+ try:
58
+ cls.client.create_payload_index(
59
+ collection_name=settings.COLLECTION_NAME,
60
+ field_name="user_id",
61
+ field_schema="keyword"
62
+ )
63
+ except Exception:
64
+ pass
65
+
66
+ print(f"✅ Qdrant Connected ({settings.QDRANT_URL})")
67
+ except Exception as e:
68
+ print(f"❌ Qdrant Connection Failed: {e}")
69
+ return None
70
+ return cls.client
71
+
72
+ # In-Memory Fallbacks
73
+ in_memory_users = {}
74
+ in_memory_prompt_logs = []
backend/core/security.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from datetime import datetime, timedelta
3
+ import jwt
4
+ from fastapi import HTTPException, Depends
5
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
6
+ from .config import settings
7
+
8
+ security_scheme = HTTPBearer()
9
+
10
+ def create_jwt_token(user_id: str, email: str) -> str:
11
+ """Creates a signed JWT with 7-day expiration."""
12
+ expiration = datetime.utcnow() + timedelta(days=7)
13
+ payload = {
14
+ "sub": user_id,
15
+ "email": email,
16
+ "exp": expiration
17
+ }
18
+ return jwt.encode(payload, settings.JWT_SECRET, algorithm=settings.ALGORITHM)
19
+
20
+ def verify_jwt(credentials: HTTPAuthorizationCredentials = Depends(security_scheme)):
21
+ """Dependency to verify JWT header."""
22
+ token = credentials.credentials
23
+ try:
24
+ payload = jwt.decode(token, settings.JWT_SECRET, algorithms=[settings.ALGORITHM])
25
+ return payload["sub"] # Returns user_id
26
+ except jwt.ExpiredSignatureError:
27
+ raise HTTPException(status_code=401, detail="Token expired")
28
+ except jwt.InvalidTokenError:
29
+ raise HTTPException(status_code=401, detail="Invalid token")
backend/main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from .core.database import MongoDB
5
+ from .routers import auth, users, prompts
6
+
7
+ app = FastAPI(title="Context-Aware Prompt Engine")
8
+
9
+ # CORS
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ # Startup Events
19
+ @app.on_event("startup")
20
+ def startup_db_client():
21
+ MongoDB.connect()
22
+
23
+ @app.get("/")
24
+ def health_check():
25
+ return {"status": "running", "service": "Context-Aware Prompt Engine", "production_ready": True}
26
+
27
+ # Include Routers
28
+ app.include_router(auth.router)
29
+ app.include_router(users.router)
30
+ app.include_router(prompts.router)
31
+
32
+ if __name__ == "__main__":
33
+ import uvicorn
34
+ uvicorn.run("backend.main:app", host="0.0.0.0", port=8000, reload=True)
backend/models/schemas.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from typing import List, Optional
3
+ from pydantic import BaseModel
4
+
5
+ class UserProfile(BaseModel):
6
+ user_id: str
7
+ email: Optional[str] = None
8
+ tech_stack: List[str] # e.g., ["React", "Python", "AWS"]
9
+ preferences: str # e.g., "Clean code, no comments"
10
+
11
+ class PromptRequest(BaseModel):
12
+ user_id: str
13
+ prompt: str
14
+ platform: Optional[str] = "unknown"
15
+
16
+ class TrackRequest(BaseModel):
17
+ user_id: str
18
+ prompt: str
19
+ platform: Optional[str] = "unknown"
20
+
21
+ class OTPRequest(BaseModel):
22
+ email: str
23
+
24
+ class OTPVerify(BaseModel):
25
+ email: str
26
+ code: str
requirements.txt → backend/requirements.txt RENAMED
@@ -26,4 +26,9 @@ httpx==0.24.1
26
  # DB
27
  pymongo==4.6.1
28
 
29
- numpy<2
 
 
 
 
 
 
26
  # DB
27
  pymongo==4.6.1
28
 
29
+
30
+ numpy<2
31
+
32
+ # Auth & Utilities
33
+ pyjwt==2.8.0
34
+ requests==2.31.0
backend/routers/auth.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ import uuid
4
+ import httpx
5
+ from fastapi import APIRouter, HTTPException, Depends
6
+ from fastapi.responses import HTMLResponse
7
+ from ..models.schemas import OTPRequest, OTPVerify
8
+ from ..core.config import settings
9
+ from ..core.database import MongoDB, in_memory_users
10
+ from ..core.security import create_jwt_token
11
+ from ..services.email_service import send_email_sendgrid
12
+
13
+ router = APIRouter()
14
+ _otp_store = {}
15
+
16
+ @router.post("/auth/request-otp")
17
+ def request_otp(request: OTPRequest):
18
+ email = request.email.strip().lower()
19
+
20
+ # Generate 6-digit code
21
+ import random
22
+ code = f"{random.randint(100000, 999999)}"
23
+
24
+ _otp_store[email] = {
25
+ "code": code,
26
+ "expires": time.time() + 300 # 5 minutes
27
+ }
28
+
29
+ email_body = f"Your Prompt Memory Login Code is: {code}\n\nIt expires in 5 minutes."
30
+ send_email_sendgrid(email, "Your Login Code", email_body)
31
+
32
+ # Dev Log
33
+ print(f"\n📨 [EMAIL LOG] To: {email} | Code: {code}\n")
34
+ return {"message": "OTP sent."}
35
+
36
+ @router.post("/auth/verify-otp")
37
+ def verify_otp(request: OTPVerify):
38
+ email = request.email.strip().lower()
39
+ code = request.code.strip()
40
+
41
+ if email not in _otp_store:
42
+ raise HTTPException(status_code=400, detail="No OTP requested for this email.")
43
+
44
+ stored_data = _otp_store[email]
45
+
46
+ if time.time() > stored_data["expires"]:
47
+ del _otp_store[email]
48
+ raise HTTPException(status_code=400, detail="OTP expired.")
49
+
50
+ if stored_data["code"] != code:
51
+ raise HTTPException(status_code=400, detail="Invalid code.")
52
+
53
+ del _otp_store[email]
54
+
55
+ # Find or Register
56
+ user_id = None
57
+ if MongoDB.users_col is not None:
58
+ user = MongoDB.users_col.find_one({"email": email})
59
+ if user: user_id = user["user_id"]
60
+ else:
61
+ for uid, profile in in_memory_users.items():
62
+ if profile.get("email") == email:
63
+ user_id = uid
64
+ break
65
+
66
+ if not user_id:
67
+ user_id = str(uuid.uuid4())
68
+ new_profile = {"user_id": user_id, "email": email, "tech_stack": ["General"], "preferences": "Default"}
69
+ if MongoDB.users_col is not None:
70
+ MongoDB.users_col.insert_one(new_profile)
71
+ else:
72
+ in_memory_users[user_id] = new_profile
73
+
74
+ token = create_jwt_token(user_id, email)
75
+ return {"token": token, "email": email, "user_id": user_id}
76
+
77
+ # --- GOOGLE OAUTH ---
78
+
79
+ @router.get("/auth/google/login")
80
+ def google_login():
81
+ if not settings.GOOGLE_CLIENT_ID:
82
+ raise HTTPException(status_code=500, detail="Server missing Google Client ID")
83
+
84
+ redirect_uri = settings.GOOGLE_REDIRECT_URI
85
+ scope = "openid email profile"
86
+ auth_url = (
87
+ f"https://accounts.google.com/o/oauth2/v2/auth?"
88
+ f"response_type=code&client_id={settings.GOOGLE_CLIENT_ID}&"
89
+ f"redirect_uri={redirect_uri}&scope={scope}&"
90
+ f"access_type=offline&prompt=consent"
91
+ )
92
+ return {"url": auth_url}
93
+
94
+ @router.get("/auth/google/callback")
95
+ async def google_callback(code: str):
96
+ if not settings.GOOGLE_CLIENT_ID or not settings.GOOGLE_CLIENT_SECRET:
97
+ raise HTTPException(status_code=500, detail="Server missing Google Secrets")
98
+
99
+ token_url = "https://oauth2.googleapis.com/token"
100
+ payload = {
101
+ "client_id": settings.GOOGLE_CLIENT_ID,
102
+ "client_secret": settings.GOOGLE_CLIENT_SECRET,
103
+ "code": code,
104
+ "grant_type": "authorization_code",
105
+ "redirect_uri": settings.GOOGLE_REDIRECT_URI
106
+ }
107
+
108
+ async with httpx.AsyncClient() as client:
109
+ res = await client.post(token_url, data=payload)
110
+ if res.status_code != 200:
111
+ return {"error": "Failed to exchange code", "details": res.text}
112
+
113
+ tokens = res.json()
114
+ access_token = tokens.get("access_token")
115
+
116
+ user_res = await client.get(
117
+ "https://www.googleapis.com/oauth2/v2/userinfo",
118
+ headers={"Authorization": f"Bearer {access_token}"}
119
+ )
120
+ user_info = user_res.json()
121
+
122
+ email = user_info.get("email")
123
+ if not email:
124
+ return {"error": "No email found in Google Account"}
125
+
126
+ # Find/Create User
127
+ user_id = None
128
+ if MongoDB.users_col is not None:
129
+ user = MongoDB.users_col.find_one({"email": email})
130
+ if user: user_id = user["user_id"]
131
+ else:
132
+ for uid, profile in in_memory_users.items():
133
+ if profile.get("email") == email:
134
+ user_id = uid
135
+ break
136
+
137
+ if not user_id:
138
+ user_id = str(uuid.uuid4())
139
+ new_profile = {"user_id": user_id, "email": email, "tech_stack": ["General"], "preferences": "Default"}
140
+ if MongoDB.users_col is not None:
141
+ MongoDB.users_col.insert_one(new_profile)
142
+ else:
143
+ in_memory_users[user_id] = new_profile
144
+
145
+ token = create_jwt_token(user_id, email)
146
+
147
+ html_content = f"""
148
+ <html>
149
+ <body>
150
+ <script>
151
+ if (window.opener) {{
152
+ window.opener.postMessage({{ type: "GOOGLE_AUTH_SUCCESS", token: "{token}", email: "{email}", user_id: "{user_id}" }}, "*");
153
+ window.close();
154
+ }} else {{
155
+ document.write("Login Successful! You can close this tab.");
156
+ }}
157
+ </script>
158
+ </body>
159
+ </html>
160
+ """
161
+ return HTMLResponse(content=html_content)
backend/routers/prompts.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ from fastapi import APIRouter, Depends
4
+ from ..models.schemas import PromptRequest, TrackRequest
5
+ from ..core.security import verify_jwt
6
+ from ..core.database import MongoDB, in_memory_users
7
+ from ..services.memory_service import MemoryService
8
+ from ..services.llm_service import get_groq_client
9
+
10
+ router = APIRouter()
11
+
12
+ SOTA_SYSTEM_PROMPT = """
13
+ You are a Principal Prompt Architect. Your goal is not to "fix" the user's prompt, but to translate their raw intent into a "SOTA" executable specification for an LLM.
14
+
15
+ ### THE PHILOSOPHY (The 7 Rules)
16
+ 1. **Clarity**: Eliminate ambiguity.
17
+ 2. **Context**: Inject User Tech Stack [{tech_stack}] & Preferences [{preferences}].
18
+ 3. **Tasks**: Break complex goals into a step-by-step "Chain of Thought".
19
+ 4. **Format**: Explicitly define the output format (JSON, Markdown, etc.).
20
+ 5. **Examples**: Request few-shot examples if abstract.
21
+ 6. **Role**: Assign a HYPER-SPECIFIC persona (e.g., "Senior Geo-Spatial Data Engineer").
22
+ 7. **Constraints**: Define Negative Constraints (what NOT to do).
23
+
24
+ ### YOUR PROTOCOL
25
+ 1. **Analyze**: Identify the user's core intent.
26
+ 2. **Architect**: Construct a prompt using the **CO-STAR+** framework:
27
+ - [ROLE]: Act as {{Specific Expert Role}}...
28
+ - [CONTEXT]: User context is {tech_stack}...
29
+ - [TASK]: Your specific objective is...
30
+ - [STRATEGY]: Before writing code, outline your step-by-step reasoning...
31
+ - [CONSTRAINTS]: Do NOT use...
32
+ - [OUTPUT]: Provide the answer in {{Specific Format}}...
33
+
34
+ ### INSTRUCTIONS
35
+ - Return ONLY the final refined prompt.
36
+ - Do NOT provide explanations.
37
+ - If the prompt is a question TO YOU (like "what is this?"), answer it as a helper.
38
+ """
39
+
40
+ @router.post("/track")
41
+ def track_prompt(request: TrackRequest, user_id: str = Depends(verify_jwt)):
42
+ """Silently learns from user prompts."""
43
+ request.user_id = user_id
44
+
45
+ # 0. Log to Short-Term
46
+ MemoryService.log_prompt(
47
+ user_id=request.user_id,
48
+ original=request.prompt,
49
+ source="passive_tracker"
50
+ )
51
+
52
+ # 1. Redundancy Check
53
+ _, max_similarity = MemoryService.retrieve_context(request.user_id, request.prompt)
54
+
55
+ if max_similarity > 0.95:
56
+ return {"status": "skipped", "reason": "redundant"}
57
+
58
+ # 2. Vectorize
59
+ MemoryService.memorize_strategy(request.user_id, request.prompt, request.prompt)
60
+ return {"status": "memorized"}
61
+
62
+ @router.post("/enhance")
63
+ def enhance_prompt(request: PromptRequest, user_id: str = Depends(verify_jwt)):
64
+ request.user_id = user_id
65
+ start_time = time.time()
66
+
67
+ # 1. GET USER CONTEXT
68
+ user_data = None
69
+ if MongoDB.users_col is not None:
70
+ user_data = MongoDB.users_col.find_one({"user_id": request.user_id})
71
+ if user_data is None:
72
+ user_data = in_memory_users.get(request.user_id, {})
73
+
74
+ ts_raw = user_data.get("tech_stack", ["General Python", "Data Science"])
75
+ tech_stack = ", ".join(ts_raw) if isinstance(ts_raw, list) else str(ts_raw)
76
+ preferences = user_data.get("preferences", "Clean, modular code with docstrings.")
77
+
78
+ # 2. RETRIEVE MEMORY
79
+ past_context, max_similarity = MemoryService.retrieve_context(request.user_id, request.prompt)
80
+
81
+ # 3. RECENT HISTORY
82
+ recent_prompts = MemoryService.get_recent_prompts(request.user_id)
83
+ recent_history_str = "\n".join([f"- {p}" for p in recent_prompts]) if recent_prompts else "No recent history."
84
+
85
+ # 4. CONSTRUCT PROMPT
86
+ formatted_system = SOTA_SYSTEM_PROMPT.format(
87
+ tech_stack=tech_stack,
88
+ preferences=preferences
89
+ )
90
+
91
+ user_message = f"""
92
+ ### 1. RECENT ACTIVITY (Immediate Context)
93
+ {recent_history_str}
94
+
95
+ ### 2. LONG-TERM MEMORY & PAST STRATEGIES
96
+ {past_context}
97
+
98
+ ### 3. RAW USER INPUT
99
+ "{request.prompt}"
100
+
101
+ ### 4. TASK
102
+ Apply the 7 Rules. Transform the raw input into a SOTA prompt.
103
+ """
104
+
105
+ enhanced_prompt = request.prompt
106
+ try:
107
+ client = get_groq_client()
108
+ chat_completion = client.chat.completions.create(
109
+ messages=[
110
+ {"role": "system", "content": formatted_system},
111
+ {"role": "user", "content": user_message}
112
+ ],
113
+ model="openai/gpt-oss-120b",
114
+ temperature=0.3,
115
+ )
116
+ enhanced_prompt = chat_completion.choices[0].message.content
117
+ except Exception as e:
118
+ print(f"❌ Groq API Error: {e}")
119
+
120
+ process_time = round(time.time() - start_time, 2)
121
+
122
+ # 5. LOG
123
+ log_id = MemoryService.log_prompt(
124
+ user_id=request.user_id,
125
+ original=request.prompt,
126
+ enhanced=enhanced_prompt,
127
+ score=max_similarity,
128
+ latency=process_time,
129
+ )
130
+
131
+ # 6. MEMORIZE (if unique)
132
+ if max_similarity < 0.90:
133
+ MemoryService.memorize_strategy(request.user_id, request.prompt, enhanced_prompt)
134
+ else:
135
+ print(f"♻️ Redundancy detected (Score {max_similarity:.2f}). Skipping save.")
136
+
137
+ return {
138
+ "original": request.prompt,
139
+ "enhanced": enhanced_prompt,
140
+ "log_id": log_id,
141
+ "latency": process_time
142
+ }
backend/routers/users.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import APIRouter
3
+ from ..models.schemas import UserProfile
4
+ from ..core.database import MongoDB, in_memory_users
5
+
6
+ router = APIRouter()
7
+
8
+ @router.post("/users/register")
9
+ def register_user(profile: UserProfile):
10
+ """Creates or updates a user profile."""
11
+ if MongoDB.users_col is not None:
12
+ MongoDB.users_col.update_one(
13
+ {"user_id": profile.user_id},
14
+ {"$set": profile.dict()},
15
+ upsert=True,
16
+ )
17
+ else:
18
+ in_memory_users[profile.user_id] = profile.dict()
19
+ return {"message": f"User {profile.user_id} registered successfully."}
backend/services/email_service.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ from ..core.config import settings
4
+
5
+ def send_email_sendgrid(to_email: str, subject: str, content: str):
6
+ """Sends authentic email via SendGrid if Key is present."""
7
+ if not settings.SENDGRID_API_KEY:
8
+ print(f"⚠️ No SendGrid Key. Simulating email to {to_email}")
9
+ return False
10
+
11
+ url = "https://api.sendgrid.com/v3/mail/send"
12
+ headers = {
13
+ "Authorization": f"Bearer {settings.SENDGRID_API_KEY}",
14
+ "Content-Type": "application/json"
15
+ }
16
+ data = {
17
+ "personalizations": [{"to": [{"email": to_email}]}],
18
+ "from": {"email": "aminyahouse2000@gmail.com", "name": "Prompt Memory"},
19
+ "subject": subject,
20
+ "content": [{"type": "text/plain", "value": content}]
21
+ }
22
+
23
+ try:
24
+ res = requests.post(url, headers=headers, json=data)
25
+ if res.status_code >= 400:
26
+ print(f"❌ SendGrid Error: {res.text}")
27
+ else:
28
+ print(f"✅ Email sent to {to_email}")
29
+ except Exception as e:
30
+ print(f"❌ Email Failed: {e}")
backend/services/llm_service.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from groq import Groq
3
+ from sentence_transformers import SentenceTransformer
4
+ from ..core.config import settings
5
+
6
+ # Global singletons
7
+ _embedding_model = None
8
+ _groq_client = None
9
+ _embedding_unavailable = False
10
+
11
+ def get_groq_client():
12
+ """Lazily initialize Groq client."""
13
+ global _groq_client
14
+ if _groq_client is None:
15
+ try:
16
+ _groq_client = Groq(api_key=settings.GROQ_API_KEY)
17
+ except Exception as e:
18
+ print(f"⚠️ Warning: Groq client initialization failed: {e}")
19
+ return _groq_client
20
+
21
+ def get_embedding(text: str):
22
+ """Converts text to vector using free MiniLM model."""
23
+ global _embedding_model, _embedding_unavailable
24
+
25
+ if _embedding_unavailable:
26
+ return None
27
+
28
+ if _embedding_model is None:
29
+ try:
30
+ print("⏳ Loading free embedding model...")
31
+ try:
32
+ # Try ONNX for performance
33
+ _embedding_model = SentenceTransformer(settings.EMBEDDING_MODEL_NAME, backend="onnx")
34
+ print("✅ Embedding model loaded (ONNX backend)")
35
+ except Exception:
36
+ _embedding_model = SentenceTransformer(settings.EMBEDDING_MODEL_NAME)
37
+ print("✅ Embedding model loaded (default backend)")
38
+ except Exception as e:
39
+ _embedding_unavailable = True
40
+ print(f"⚠️ Embedding unavailable: {e}")
41
+ return None
42
+
43
+ return _embedding_model.encode(text, convert_to_numpy=True).tolist()
backend/services/memory_service.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ from datetime import datetime
4
+ from typing import List, Tuple
5
+ from qdrant_client.models import PointStruct, Filter, FieldCondition, MatchValue
6
+ from ..core.config import settings
7
+ from ..core.database import QdrantDB, MongoDB, in_memory_prompt_logs
8
+ from ..services.llm_service import get_embedding
9
+
10
+ class MemoryService:
11
+ @staticmethod
12
+ def retrieve_context(user_id: str, query_text: str, limit: int = 3) -> Tuple[str, float]:
13
+ """
14
+ Finds similar past prompts.
15
+ Returns: (context_str, max_score)
16
+ """
17
+ qdrant = QdrantDB.get_client()
18
+
19
+ # Default return if DB is down
20
+ if qdrant is None:
21
+ return "No relevant past context found.", 0.0
22
+
23
+ query_vector = get_embedding(query_text)
24
+ if query_vector is None:
25
+ return "No relevant past context found.", 0.0
26
+
27
+ # Search with User ID Filter
28
+ try:
29
+ results = qdrant.search(
30
+ collection_name=settings.COLLECTION_NAME,
31
+ query_vector=query_vector,
32
+ query_filter=Filter(
33
+ must=[
34
+ FieldCondition(
35
+ key="user_id",
36
+ match=MatchValue(value=user_id)
37
+ )
38
+ ]
39
+ ),
40
+ limit=limit
41
+ )
42
+ except Exception as e:
43
+ print(f"⚠️ Search failed: {e}")
44
+ return "No relevant past context found.", 0.0
45
+
46
+ context_str = ""
47
+ max_score = 0.0
48
+
49
+ for hit in results:
50
+ if hit.score > max_score:
51
+ max_score = hit.score
52
+
53
+ payload = hit.payload
54
+ # Relevance threshold
55
+ if hit.score > 0.25:
56
+ context_str += f"- Past Prompt: \"{payload.get('original_prompt')}\"\n"
57
+ context_str += f"- Refined Version: \"{payload.get('refined_prompt')}\"\n\n"
58
+
59
+ final_context = context_str if context_str else "No relevant past context found."
60
+ return final_context, max_score
61
+
62
+ @staticmethod
63
+ def get_recent_prompts(user_id: str, limit: int = 5) -> List[str]:
64
+ """Fetches most recent prompts."""
65
+ recent_prompts = []
66
+
67
+ # 1. Try MongoDB
68
+ if MongoDB.prompts_col is not None:
69
+ try:
70
+ cursor = MongoDB.prompts_col.find(
71
+ {"user_id": user_id}
72
+ ).sort("timestamp", -1).limit(limit)
73
+
74
+ for doc in cursor:
75
+ if "original" in doc:
76
+ recent_prompts.append(doc["original"])
77
+ except Exception as e:
78
+ print(f"⚠️ Error fetching recent prompts from Mongo: {e}")
79
+
80
+ # 2. Fallback to In-Memory
81
+ if MongoDB.prompts_col is None:
82
+ user_logs = [log for log in in_memory_prompt_logs if log.get("user_id") == user_id]
83
+ recent_prompts = [log["original"] for log in user_logs[-limit:]]
84
+ recent_prompts.reverse()
85
+
86
+ return recent_prompts
87
+
88
+ @staticmethod
89
+ def log_prompt(user_id: str, original: str, enhanced: str = None, score: float = 0.0, latency: float = 0.0, source: str = "active"):
90
+ """Logs prompt to Mongo or Memory."""
91
+ log_entry = {
92
+ "user_id": user_id,
93
+ "timestamp": datetime.now(),
94
+ "original": original,
95
+ "enhanced": enhanced,
96
+ "score": score,
97
+ "latency": latency,
98
+ "source": source
99
+ }
100
+
101
+ log_id = "memory-only"
102
+ if MongoDB.prompts_col is not None:
103
+ try:
104
+ res = MongoDB.prompts_col.insert_one(log_entry)
105
+ log_id = str(res.inserted_id)
106
+ except: pass
107
+ else:
108
+ in_memory_prompt_logs.append(log_entry)
109
+
110
+ return log_id
111
+
112
+ @staticmethod
113
+ def memorize_strategy(user_id: str, original: str, refined: str):
114
+ """Saves high-quality prompts to Vector DB."""
115
+ try:
116
+ vec = get_embedding(original)
117
+ if vec:
118
+ q_client = QdrantDB.get_client()
119
+ if q_client:
120
+ q_client.upsert(
121
+ collection_name=settings.COLLECTION_NAME,
122
+ points=[PointStruct(
123
+ id=int(time.time()),
124
+ vector=vec,
125
+ payload={
126
+ "user_id": user_id,
127
+ "original_prompt": original,
128
+ "refined_prompt": refined
129
+ }
130
+ )]
131
+ )
132
+ print("💾 New strategy memorized.")
133
+ except Exception as e:
134
+ print(f"❌ Memorization failed: {e}")
main.py DELETED
@@ -1,422 +0,0 @@
1
- import os
2
- import time
3
- from datetime import datetime
4
- from typing import List, Optional
5
-
6
- # Third-party libraries
7
- from fastapi import FastAPI, HTTPException
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from pydantic import BaseModel
10
- from qdrant_client import QdrantClient
11
- from qdrant_client.models import PointStruct, Distance, VectorParams, Filter, FieldCondition, MatchValue# Lazy import: from sentence_transformers import SentenceTransformer
12
- from groq import Groq
13
- from pymongo import MongoClient
14
- from dotenv import load_dotenv
15
-
16
- # --- 1. CONFIGURATION & SECRETS ---
17
- # Load environment variables from .env file
18
- load_dotenv()
19
-
20
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
- MONGO_URI = os.getenv("MONGO_URI")
22
- QDRANT_URL = os.getenv("QDRANT_URL", ":memory:")
23
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
24
-
25
- # Free embedding model: all-MiniLM-L6-v2 (384-dim, Apache 2.0). No API key required.
26
- EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
27
-
28
- # Basic check to ensure keys are present (only warn at startup; fail on /enhance if missing)
29
- if not GROQ_API_KEY:
30
- print("⚠️ GROQ_API_KEY is missing from .env — /enhance will fail until you add it.")
31
-
32
- # --- 2. SETUP CLIENTS ---
33
-
34
- # A. FastAPI App
35
- app = FastAPI()
36
-
37
- # B. CORS (Critical for Chrome Extension)
38
- app.add_middleware(
39
- CORSMiddleware,
40
- allow_origins=["*"], # Allows all origins
41
- allow_credentials=True,
42
- allow_methods=["*"], # Allows all methods (POST, GET, etc.)
43
- allow_headers=["*"],
44
- )
45
-
46
- # C. MongoDB (User Profiles & Logs) — optional; use in-memory fallback if unavailable
47
- users_col = None
48
- prompts_col = None
49
- _in_memory_users = {} # fallback when MongoDB is not running
50
-
51
- try:
52
- mongo_client = MongoClient(
53
- MONGO_URI or "mongodb://localhost:27017",
54
- serverSelectionTimeoutMS=3000,
55
- )
56
- mongo_client.admin.command("ping")
57
- db = mongo_client["prompt_engine_db"]
58
- users_col = db["users"]
59
- prompts_col = db["prompt_logs"]
60
- print("✅ MongoDB Connected")
61
- except Exception as e:
62
- print(f"⚠️ MongoDB not available ({e}) — using in-memory fallback for profiles/logs.")
63
-
64
- # D. Qdrant (Vector Memory)
65
- qdrant = None
66
- COLLECTION_NAME = "prompt_memory"
67
-
68
- # Add this with your other class definitions
69
- class TrackRequest(BaseModel):
70
- user_id: str
71
- prompt: str
72
- platform: Optional[str] = "unknown"
73
-
74
-
75
- def init_qdrant():
76
- """Lazily initialize Qdrant connection."""
77
- global qdrant
78
- if qdrant is None:
79
- try:
80
- qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
81
-
82
- # 1. Check if collection exists
83
- try:
84
- collection_exists = qdrant.collection_exists(COLLECTION_NAME)
85
- except (AttributeError, Exception):
86
- try:
87
- qdrant.get_collection(COLLECTION_NAME)
88
- collection_exists = True
89
- except:
90
- collection_exists = False
91
-
92
- # 2. Create collection if it doesn't exist
93
- if not collection_exists:
94
- try:
95
- qdrant.create_collection(
96
- collection_name=COLLECTION_NAME,
97
- vectors_config=VectorParams(size=384, distance=Distance.COSINE),
98
- )
99
- print(f"✅ Created new Qdrant collection: '{COLLECTION_NAME}'")
100
- except Exception as e:
101
- if "409" in str(e) or "already exists" in str(e):
102
- pass
103
- else:
104
- raise
105
-
106
- # --- THE FIX: CREATE PAYLOAD INDEX FOR USER_ID ---
107
- # This tells Qdrant: "Please optimize searches for 'user_id'"
108
- try:
109
- qdrant.create_payload_index(
110
- collection_name=COLLECTION_NAME,
111
- field_name="user_id",
112
- field_schema="keyword" # 'keyword' is best for exact string matches like IDs
113
- )
114
- print("✅ Payload index for 'user_id' ensured.")
115
- except Exception as e:
116
- # If index already exists, Qdrant might return an error or ignore it.
117
- # We catch it just in case, but usually it's safe.
118
- print(f"ℹ️ Note on Indexing: {e}")
119
-
120
- print(f"✅ Qdrant Connected ({QDRANT_URL})")
121
- except Exception as e:
122
- print(f"❌ Qdrant Connection Failed: {e}")
123
- return qdrant
124
-
125
- # E. AI Models — free local embeddings (MiniLM via sentence-transformers)
126
- print("Embedding: free model (MiniLM) will load on first use")
127
- EMBEDDING_MODEL = None
128
- _embedding_unavailable = False
129
-
130
- # Lazy-load Groq client to avoid initialization errors
131
- groq_client = None
132
-
133
- def get_groq_client():
134
- """Lazily initialize Groq client."""
135
- global groq_client
136
- if groq_client is None:
137
- try:
138
- groq_client = Groq(api_key=GROQ_API_KEY)
139
- except Exception as e:
140
- print(f"⚠️ Warning: Groq client initialization failed: {e}")
141
- return groq_client
142
-
143
- # --- 3. DATA MODELS (Pydantic) ---
144
-
145
- class UserProfile(BaseModel):
146
- user_id: str
147
- tech_stack: List[str] # e.g., ["React", "Python", "AWS"]
148
- preferences: str # e.g., "Clean code, no comments"
149
-
150
- class PromptRequest(BaseModel):
151
- user_id: str
152
- prompt: str # Matches 'prompt' sent from your Extension
153
- platform: Optional[str] = "unknown"
154
-
155
- # --- 4. HELPER FUNCTIONS ---
156
-
157
- def get_embedding(text: str):
158
- """Converts text to 384-dim vector using free MiniLM model (sentence-transformers). Returns None if unavailable."""
159
- global EMBEDDING_MODEL, _embedding_unavailable
160
- if _embedding_unavailable:
161
- return None
162
- if EMBEDDING_MODEL is None:
163
- try:
164
- from sentence_transformers import SentenceTransformer
165
- print("⏳ Loading free embedding model (all-MiniLM-L6-v2)...")
166
- # Prefer ONNX backend (lighter, CPU-friendly); fallback to default
167
- try:
168
- EMBEDDING_MODEL = SentenceTransformer(EMBEDDING_MODEL_NAME, backend="onnx")
169
- print("✅ Embedding model loaded (ONNX backend)")
170
- except Exception:
171
- EMBEDDING_MODEL = SentenceTransformer(EMBEDDING_MODEL_NAME)
172
- print("✅ Embedding model loaded (default backend)")
173
- except Exception as e:
174
- _embedding_unavailable = True
175
- print(f"⚠️ Embedding unavailable: {e} — install: pip install sentence-transformers (or sentence-transformers[onnx] for CPU)")
176
- return None
177
- return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist()
178
-
179
- def retrieve_context(user_id: str, query_text: str, limit: int = 3):
180
- """
181
- Finds similar past prompts and returns both the text context AND the highest similarity score.
182
- Returns: (context_str, max_score)
183
- """
184
- global qdrant
185
- qdrant = init_qdrant()
186
-
187
- # Default return values if DB is down or empty
188
- if qdrant is None:
189
- return "No relevant past context found.", 0.0
190
-
191
- query_vector = get_embedding(query_text)
192
- if query_vector is None:
193
- return "No relevant past context found.", 0.0
194
-
195
- # Search with User ID Filter
196
- results = qdrant.search(
197
- collection_name=COLLECTION_NAME,
198
- query_vector=query_vector,
199
- query_filter=Filter(
200
- must=[
201
- FieldCondition(
202
- key="user_id",
203
- match=MatchValue(value=user_id)
204
- )
205
- ]
206
- ),
207
- limit=limit
208
- )
209
-
210
- print(f"\n🔍 Searching Memory for User '{user_id}'...")
211
-
212
- context_str = ""
213
- max_score = 0.0 # Track the highest score found
214
-
215
- for hit in results:
216
- # Update max_score if this hit is higher
217
- if hit.score > max_score:
218
- max_score = hit.score
219
-
220
- payload = hit.payload
221
- print(f" Found candidate (Score: {hit.score:.4f}): {payload.get('original_prompt')}")
222
-
223
- # Only add to string if it passes the "relevance" threshold (0.25)
224
- if hit.score > 0.25:
225
- context_str += f"- Past Prompt: \"{payload.get('original_prompt')}\"\n"
226
- context_str += f"- Refined Version: \"{payload.get('refined_prompt')}\"\n\n"
227
-
228
- final_context = context_str if context_str else "No relevant past context found."
229
-
230
- return final_context, max_score
231
-
232
- # --- 5. API ENDPOINTS ---
233
-
234
- @app.get("/")
235
- def health_check():
236
- return {"status": "running", "service": "Context-Aware Prompt Engine"}
237
-
238
- @app.post("/users/register")
239
- def register_user(profile: UserProfile):
240
- """Creates or updates a user profile."""
241
- if users_col is not None:
242
- users_col.update_one(
243
- {"user_id": profile.user_id},
244
- {"$set": profile.dict()},
245
- upsert=True,
246
- )
247
- else:
248
- _in_memory_users[profile.user_id] = profile.dict()
249
- return {"message": f"User {profile.user_id} registered successfully."}
250
-
251
-
252
- @app.post("/track")
253
- def track_prompt(request: TrackRequest):
254
- """
255
- Silently learns from user prompts without modifying them.
256
- """
257
- # 1. Check for Redundancy (Don't memorize exact duplicates)
258
- # We use a high threshold (0.95) because we want to capture distinct thoughts
259
- _, max_similarity = retrieve_context(request.user_id, request.prompt)
260
-
261
- if max_similarity > 0.95:
262
- return {"status": "skipped", "reason": "redundant"}
263
-
264
- # 2. Vectorize & Save to Qdrant
265
- try:
266
- vec = get_embedding(request.prompt)
267
- if vec:
268
- q_client = init_qdrant()
269
- if q_client:
270
- q_client.upsert(
271
- collection_name=COLLECTION_NAME,
272
- points=[PointStruct(
273
- id=int(time.time()),
274
- vector=vec,
275
- payload={
276
- "user_id": request.user_id,
277
- "original_prompt": request.prompt,
278
- "refined_prompt": request.prompt, # No refinement, so we map it to itself
279
- "source": "passive_tracker"
280
- }
281
- )]
282
- )
283
- print(f"🧠 passively learned: {request.prompt[:50]}...")
284
- except Exception as e:
285
- print(f"❌ Tracking Error: {e}")
286
- return {"status": "error", "message": str(e)}
287
-
288
- return {"status": "memorized"}
289
-
290
-
291
- SOTA_SYSTEM_PROMPT = """
292
- You are a Principal Prompt Architect. Your goal is not to "fix" the user's prompt, but to translate their raw intent into a "SOTA" executable specification for an LLM.
293
-
294
- ### THE PHILOSOPHY (The 7 Rules)
295
- 1. **Clarity**: Eliminate ambiguity.
296
- 2. **Context**: Inject User Tech Stack [{tech_stack}] & Preferences [{preferences}].
297
- 3. **Tasks**: Break complex goals into a step-by-step "Chain of Thought".
298
- 4. **Format**: Explicitly define the output format (JSON, Markdown, etc.).
299
- 5. **Examples**: Request few-shot examples if abstract.
300
- 6. **Role**: Assign a HYPER-SPECIFIC persona (e.g., "Senior Geo-Spatial Data Engineer").
301
- 7. **Constraints**: Define Negative Constraints (what NOT to do).
302
-
303
- ### YOUR PROTOCOL
304
- 1. **Analyze**: Identify the user's core intent.
305
- 2. **Architect**: Construct a prompt using the **CO-STAR+** framework:
306
- - [ROLE]: Act as {{Specific Expert Role}}...
307
- - [CONTEXT]: User context is {tech_stack}...
308
- - [TASK]: Your specific objective is...
309
- - [STRATEGY]: Before writing code, outline your step-by-step reasoning...
310
- - [CONSTRAINTS]: Do NOT use...
311
- - [OUTPUT]: Provide the answer in {{Specific Format}}...
312
-
313
- ### INSTRUCTIONS
314
- - Return ONLY the final refined prompt.
315
- - Do NOT provide explanations.
316
- - If the prompt is a question TO YOU (like "what is this?"), answer it as a helper.
317
- """
318
-
319
-
320
- @app.post("/enhance")
321
- def enhance_prompt(request: PromptRequest):
322
- start_time = time.time()
323
-
324
- # 1. GET USER CONTEXT (MongoDB Priority)
325
- user_data = None
326
- if users_col is not None:
327
- user_data = users_col.find_one({"user_id": request.user_id})
328
- if user_data is None:
329
- user_data = _in_memory_users.get(request.user_id, {})
330
-
331
- # Defaults
332
- ts_raw = user_data.get("tech_stack", ["General Python", "Data Science"])
333
- tech_stack = ", ".join(ts_raw) if isinstance(ts_raw, list) else str(ts_raw)
334
- preferences = user_data.get("preferences", "Clean, modular code with docstrings.")
335
-
336
- # 2. RETRIEVE MEMORY
337
- past_context, max_similarity = retrieve_context(request.user_id, request.prompt)
338
-
339
- # 3. CONSTRUCT SOTA PROMPT
340
- formatted_system = SOTA_SYSTEM_PROMPT.format(
341
- tech_stack=tech_stack,
342
- preferences=preferences
343
- )
344
-
345
- user_message = f"""
346
- ### 1. MEMORY & PAST STRATEGIES
347
- {past_context}
348
-
349
- ### 2. RAW USER INPUT
350
- "{request.prompt}"
351
-
352
- ### 3. TASK
353
- Apply the 7 Rules. Transform the raw input into a SOTA prompt.
354
- Ensure you define a specific EXPERT ROLE and Negative Constraints.
355
- """
356
-
357
- enhanced_prompt = request.prompt # Fallback
358
- try:
359
- client = get_groq_client()
360
- chat_completion = client.chat.completions.create(
361
- messages=[
362
- {"role": "system", "content": formatted_system},
363
- {"role": "user", "content": user_message}
364
- ],
365
- model="openai/gpt-oss-120b",
366
- temperature=0.3, # Low temp for precision
367
- )
368
- enhanced_prompt = chat_completion.choices[0].message.content
369
- except Exception as e:
370
- print(f"❌ Groq API Error: {e}")
371
-
372
- # 4. LOGGING (MongoDB)
373
-
374
- process_time = round(time.time() - start_time, 2)
375
- log_id = "memory-only"
376
- if prompts_col is not None:
377
- try:
378
- log_entry = {
379
- "user_id": request.user_id,
380
- "timestamp": datetime.now(),
381
- "original": request.prompt,
382
- "enhanced": enhanced_prompt,
383
- "score": max_similarity,
384
- "latency": process_time
385
- }
386
- res = prompts_col.insert_one(log_entry)
387
- log_id = str(res.inserted_id)
388
- except: pass # <--- HANDLE ERRORS HERE
389
-
390
- # 5. MEMORY STORAGE (Qdrant)
391
- # Only save if unique (similarity < 0.90)
392
- if max_similarity < 0.90:
393
- try:
394
- vec = get_embedding(request.prompt)
395
- if vec:
396
- q_client = init_qdrant()
397
- if q_client:
398
- q_client.upsert(
399
- collection_name=COLLECTION_NAME,
400
- points=[PointStruct(
401
- id=int(time.time()),
402
- vector=vec,
403
- payload={"user_id": request.user_id, "original_prompt": request.prompt, "refined_prompt": enhanced_prompt}
404
- )]
405
- )
406
- print("💾 New strategy memorized.")
407
- except: pass
408
- else:
409
- print(f"♻️ Redundancy detected (Score {max_similarity:.2f}). Skipping save.")
410
-
411
- return {
412
- "original": request.prompt,
413
- "enhanced": enhanced_prompt,
414
- "log_id": log_id,
415
- "latency": process_time
416
- }
417
-
418
-
419
- # Run with: uvicorn main:app --reload
420
-
421
- ## change content.js as well
422
-