USAMA BHATTI commited on
Commit
370480b
·
0 Parent(s):

Adding local files to new repository

Browse files
Files changed (45) hide show
  1. .dockerignore +28 -0
  2. .gitignore +27 -0
  3. Dockerfile +32 -0
  4. Procfile +1 -0
  5. backend/src/api/routes/auth.py +67 -0
  6. backend/src/api/routes/chat.py +50 -0
  7. backend/src/api/routes/deps.py +45 -0
  8. backend/src/api/routes/ingestion.py +148 -0
  9. backend/src/api/routes/settings.py +299 -0
  10. backend/src/core/config.py +76 -0
  11. backend/src/db/base.py +5 -0
  12. backend/src/db/session.py +36 -0
  13. backend/src/init_db.py +27 -0
  14. backend/src/main.py +53 -0
  15. backend/src/models/chat.py +17 -0
  16. backend/src/models/ingestion.py +40 -0
  17. backend/src/models/integration.py +34 -0
  18. backend/src/models/user.py +19 -0
  19. backend/src/schemas/chat.py +15 -0
  20. backend/src/services/chat_service.py +598 -0
  21. backend/src/services/connectors/base.py +36 -0
  22. backend/src/services/connectors/cms_base.py +30 -0
  23. backend/src/services/connectors/mongo_connector.py +85 -0
  24. backend/src/services/connectors/sanity_connector.py +133 -0
  25. backend/src/services/embeddings/factory.py +48 -0
  26. backend/src/services/ingestion/crawler.py +169 -0
  27. backend/src/services/ingestion/file_processor.py +94 -0
  28. backend/src/services/ingestion/guardrail_factory.py +28 -0
  29. backend/src/services/ingestion/web_processor.py +53 -0
  30. backend/src/services/ingestion/zip_processor.py +132 -0
  31. backend/src/services/llm/factory.py +66 -0
  32. backend/src/services/routing/semantic_router.py +52 -0
  33. backend/src/services/security/pii_scrubber.py +67 -0
  34. backend/src/services/tools/cms_agent.py +67 -0
  35. backend/src/services/tools/cms_tool.py +74 -0
  36. backend/src/services/tools/nosql_agent.py +65 -0
  37. backend/src/services/tools/nosql_tool.py +61 -0
  38. backend/src/services/tools/secure_agent.py +57 -0
  39. backend/src/services/tools/sql_tool.py +45 -0
  40. backend/src/services/vector_store/qdrant_adapter.py +78 -0
  41. backend/src/utils/auth.py +30 -0
  42. backend/src/utils/security.py +29 -0
  43. dummy_cms_data.json +37 -0
  44. requirements.txt +194 -0
  45. static/widget.js +153 -0
.dockerignore ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ env/
8
+ venv/
9
+ .venv/
10
+ pip-log.txt
11
+ pip-delete-this-directory.txt
12
+
13
+ # Git
14
+ .git
15
+ .gitignore
16
+
17
+ # OS
18
+ .DS_Store
19
+ Thumbs.db
20
+
21
+ # Logs & Temp
22
+ *.log
23
+ uploaded_files/
24
+ temp_unzip_*/
25
+
26
+ # Local DBs (Don't copy local DBs into image, use volumes instead)
27
+ omni_agent.db
28
+ fake_ecommerce.db
.gitignore ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Security (Inhein kabhi upload mat karna) ---
2
+ .env
3
+ .env.local
4
+
5
+ # --- Python Garbage ---
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+
11
+ # --- Virtual Environment (Heavy folders) ---
12
+ venv/
13
+ env/
14
+ .venv/
15
+
16
+ # --- Local Databases (Railway par naya banega) ---
17
+ omni_agent.db
18
+ fake_ecommerce.db
19
+ *.sqlite3
20
+
21
+ # --- OS Junk ---
22
+ .DS_Store
23
+ Thumbs.db
24
+
25
+ # --- Logs ---
26
+ *.log
27
+ uploaded_files/
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Base Image (Lightweight Python)
2
+ FROM python:3.10-slim
3
+
4
+ # 2. Set Environment Variables
5
+ # Prevents Python from writing pyc files to disc
6
+ ENV PYTHONDONTWRITEBYTECODE 1
7
+ # Prevents Python from buffering stdout and stderr (logs show up immediately)
8
+ ENV PYTHONUNBUFFERED 1
9
+
10
+ # 3. Install System Dependencies
11
+ # 'build-essential' is often needed for compiling python packages like numpy/cryptography
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ build-essential \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # 4. Set Work Directory
17
+ WORKDIR /app
18
+
19
+ # 5. Install Dependencies (Layer Caching Strategy)
20
+ # We copy requirements FIRST. If requirements don't change, Docker uses cached layer here.
21
+ COPY requirements.txt .
22
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
23
+
24
+ # 6. Copy Application Code
25
+ COPY . .
26
+
27
+ # 7. Expose Port
28
+ EXPOSE 8000
29
+
30
+ # 8. Run Command
31
+ # We use host 0.0.0.0 so it is accessible outside the container
32
+ CMD ["uvicorn", "backend.src.main:app", "--host", "0.0.0.0", "--port", "8000"]
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: uvicorn backend.src.main:app --host 0.0.0.0 --port $PORT
backend/src/api/routes/auth.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, status
2
+ from fastapi.security import OAuth2PasswordRequestForm
3
+ from sqlalchemy.ext.asyncio import AsyncSession
4
+ from sqlalchemy.future import select
5
+ from pydantic import BaseModel, EmailStr
6
+
7
+ from backend.src.db.session import get_db
8
+ from backend.src.models.user import User
9
+ from backend.src.utils.auth import get_password_hash, verify_password, create_access_token
10
+
11
+ router = APIRouter()
12
+
13
+ # --- Schemas ---
14
+ class UserCreate(BaseModel):
15
+ email: EmailStr
16
+ password: str
17
+ full_name: str | None = None
18
+
19
+ class Token(BaseModel):
20
+ access_token: str
21
+ token_type: str
22
+
23
+ # --- 1. Registration Endpoint ---
24
+ @router.post("/auth/register", response_model=Token)
25
+ async def register(user_in: UserCreate, db: AsyncSession = Depends(get_db)):
26
+ # Check agar email pehle se exist karta hai
27
+ result = await db.execute(select(User).where(User.email == user_in.email))
28
+ existing_user = result.scalars().first()
29
+
30
+ if existing_user:
31
+ raise HTTPException(
32
+ status_code=400,
33
+ detail="Email already registered"
34
+ )
35
+
36
+ # Naya User Banao
37
+ new_user = User(
38
+ email=user_in.email,
39
+ hashed_password=get_password_hash(user_in.password),
40
+ full_name=user_in.full_name
41
+ )
42
+ db.add(new_user)
43
+ await db.commit()
44
+ await db.refresh(new_user)
45
+
46
+ # Direct Login Token do
47
+ access_token = create_access_token(data={"sub": str(new_user.id)})
48
+ return {"access_token": access_token, "token_type": "bearer"}
49
+
50
+ # --- 2. Login Endpoint ---
51
+ @router.post("/auth/login", response_model=Token)
52
+ async def login(form_data: OAuth2PasswordRequestForm = Depends(), db: AsyncSession = Depends(get_db)):
53
+ # User dhoondo
54
+ result = await db.execute(select(User).where(User.email == form_data.username)) # OAuth2 form mein email 'username' field mein hota hai
55
+ user = result.scalars().first()
56
+
57
+ # Password check karo
58
+ if not user or not verify_password(form_data.password, user.hashed_password):
59
+ raise HTTPException(
60
+ status_code=status.HTTP_401_UNAUTHORIZED,
61
+ detail="Incorrect email or password",
62
+ headers={"WWW-Authenticate": "Bearer"},
63
+ )
64
+
65
+ # Token generate karo
66
+ access_token = create_access_token(data={"sub": str(user.id)})
67
+ return {"access_token": access_token, "token_type": "bearer"}
backend/src/api/routes/chat.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import APIRouter, Depends, HTTPException
3
+ from sqlalchemy.ext.asyncio import AsyncSession
4
+ from backend.src.db.session import get_db
5
+ from backend.src.schemas.chat import ChatRequest, ChatResponse
6
+ from backend.src.services.chat_service import process_chat
7
+ from backend.src.core.config import settings
8
+
9
+ # --- Security Imports ---
10
+ from backend.src.api.routes.deps import get_current_user
11
+ from backend.src.models.user import User
12
+
13
+ router = APIRouter()
14
+
15
+ @router.post("/chat", response_model=ChatResponse)
16
+ async def chat_endpoint(
17
+ request: ChatRequest,
18
+ db: AsyncSession = Depends(get_db),
19
+ current_user: User = Depends(get_current_user) # <-- User Logged in hai
20
+ ):
21
+ """
22
+ Protected Chat Endpoint.
23
+ Only accessible with a valid JWT Token.
24
+ """
25
+ try:
26
+ # User ki ID token se aayegi (Secure)
27
+ # Session ID user maintain kar sakta hai taake alag-alag chats yaad rahein
28
+ user_id = str(current_user.id)
29
+ session_id = request.session_id or user_id # Fallback
30
+
31
+ # --- FIX IS HERE: 'user_id' pass kiya ja raha hai ---
32
+ response_text = await process_chat(
33
+ message=request.message,
34
+ session_id=session_id,
35
+ user_id=user_id, # <--- Ye hum bhool gaye thay
36
+ db=db
37
+ )
38
+
39
+ return ChatResponse(
40
+ response=response_text,
41
+ session_id=session_id,
42
+ # 'provider' ab chat_service se aayega, humein yahan hardcode nahi karna
43
+ provider="omni_agent"
44
+ )
45
+
46
+ except Exception as e:
47
+ print(f"Error in chat endpoint: {e}")
48
+ import traceback
49
+ traceback.print_exc() # Poora error print karega
50
+ raise HTTPException(status_code=500, detail=str(e))
backend/src/api/routes/deps.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Depends, HTTPException, status
2
+ from fastapi.security import OAuth2PasswordBearer
3
+ from jose import jwt, JWTError
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+ from sqlalchemy.future import select
6
+
7
+ from backend.src.core.config import settings
8
+ from backend.src.db.session import get_db
9
+ from backend.src.models.user import User
10
+ from backend.src.utils.auth import ALGORITHM
11
+
12
+ # Ye Swagger UI ko batata hai ke Token kahan se lena hai (/auth/login se)
13
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl=f"{settings.API_V1_STR}/auth/login")
14
+
15
+ async def get_current_user(
16
+ token: str = Depends(oauth2_scheme),
17
+ db: AsyncSession = Depends(get_db)
18
+ ) -> User:
19
+ """
20
+ Ye function har protected route se pehle chalega.
21
+ Ye Token ko verify karega aur Database se User nikal kar dega.
22
+ """
23
+ credentials_exception = HTTPException(
24
+ status_code=status.HTTP_401_UNAUTHORIZED,
25
+ detail="Could not validate credentials",
26
+ headers={"WWW-Authenticate": "Bearer"},
27
+ )
28
+
29
+ try:
30
+ # Token Decode karo
31
+ payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[ALGORITHM])
32
+ user_id: str = payload.get("sub")
33
+ if user_id is None:
34
+ raise credentials_exception
35
+ except JWTError:
36
+ raise credentials_exception
37
+
38
+ # Database mein User check karo
39
+ result = await db.execute(select(User).where(User.id == int(user_id)))
40
+ user = result.scalars().first()
41
+
42
+ if user is None:
43
+ raise credentials_exception
44
+
45
+ return user
backend/src/api/routes/ingestion.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import shutil
4
+ from fastapi import APIRouter, UploadFile, File, HTTPException, Form, BackgroundTasks, Depends
5
+ from pydantic import BaseModel
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+ from sqlalchemy.future import select
8
+
9
+ # --- Security Imports ---
10
+ from backend.src.api.routes.deps import get_current_user
11
+ from backend.src.models.user import User
12
+
13
+ # --- Internal Services & DB Imports ---
14
+ from backend.src.services.ingestion.file_processor import process_file
15
+ from backend.src.services.ingestion.crawler import SmartCrawler
16
+ from backend.src.services.ingestion.zip_processor import SmartZipProcessor
17
+ from backend.src.db.session import get_db, AsyncSessionLocal
18
+ from backend.src.models.ingestion import IngestionJob, JobStatus, IngestionType
19
+
20
+ # --- CONFIG ---
21
+ MAX_ZIP_SIZE_MB = 100
22
+ MAX_ZIP_SIZE_BYTES = MAX_ZIP_SIZE_MB * 1024 * 1024
23
+
24
+ router = APIRouter()
25
+ UPLOAD_DIRECTORY = "./uploaded_files"
26
+
27
+ # ==========================================
28
+ # FILE UPLOAD (Protected)
29
+ # ==========================================
30
+ @router.post("/ingest/upload")
31
+ async def upload_and_process_file(
32
+ session_id: str = Form(...),
33
+ file: UploadFile = File(...),
34
+ current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
35
+ ):
36
+ # (Function logic same rahegi, bas ab current_user mil jayega)
37
+ if not os.path.exists(UPLOAD_DIRECTORY):
38
+ os.makedirs(UPLOAD_DIRECTORY)
39
+
40
+ file_path = os.path.join(UPLOAD_DIRECTORY, file.filename)
41
+ try:
42
+ with open(file_path, "wb") as buffer:
43
+ shutil.copyfileobj(file.file, buffer)
44
+
45
+ chunks_added = await process_file(file_path, session_id)
46
+ if chunks_added <= 0:
47
+ raise HTTPException(status_code=400, detail="Could not process file.")
48
+
49
+ return {
50
+ "message": "File processed successfully",
51
+ "filename": file.filename,
52
+ "chunks_added": chunks_added,
53
+ "session_id": session_id
54
+ }
55
+ except Exception as e:
56
+ raise HTTPException(status_code=500, detail=str(e))
57
+ finally:
58
+ if os.path.exists(file_path):
59
+ os.remove(file_path)
60
+
61
+ # ==========================================
62
+ # WEB CRAWLER (Protected)
63
+ # ==========================================
64
+ class WebIngestRequest(BaseModel):
65
+ url: str
66
+ session_id: str
67
+ crawl_type: str = "single_page"
68
+
69
+ async def run_crawler_task(job_id, url, session_id, crawl_type, db_factory):
70
+ async with db_factory() as db:
71
+ crawler = SmartCrawler(job_id, url, session_id, crawl_type, db)
72
+ await crawler.start()
73
+
74
+ @router.post("/ingest/url")
75
+ async def start_web_ingestion(
76
+ request: WebIngestRequest,
77
+ background_tasks: BackgroundTasks,
78
+ db: AsyncSession = Depends(get_db),
79
+ current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
80
+ ):
81
+ # (Function logic same rahegi)
82
+ new_job = IngestionJob(
83
+ session_id=request.session_id,
84
+ ingestion_type=IngestionType.URL,
85
+ source_name=request.url,
86
+ status=JobStatus.PENDING
87
+ )
88
+ db.add(new_job)
89
+ await db.commit()
90
+ await db.refresh(new_job)
91
+
92
+ background_tasks.add_task(run_crawler_task, new_job.id, request.url, request.session_id, request.crawl_type, AsyncSessionLocal)
93
+ return {"message": "Ingestion job started", "job_id": new_job.id}
94
+
95
+ @router.get("/ingest/status/{job_id}")
96
+ async def check_job_status(
97
+ job_id: int,
98
+ db: AsyncSession = Depends(get_db),
99
+ current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
100
+ ):
101
+ # (Function logic same rahegi)
102
+ result = await db.execute(select(IngestionJob).where(IngestionJob.id == job_id))
103
+ job = result.scalars().first()
104
+ if not job:
105
+ raise HTTPException(status_code=404, detail="Job not found")
106
+ return job
107
+
108
+ # ==========================================
109
+ # BULK ZIP UPLOAD (Protected)
110
+ # ==========================================
111
+ async def run_zip_task(job_id, zip_path, session_id, db_factory):
112
+ async with db_factory() as db:
113
+ processor = SmartZipProcessor(job_id, zip_path, session_id, db)
114
+ await processor.start()
115
+
116
+ @router.post("/ingest/upload-zip")
117
+ async def upload_and_process_zip(
118
+ session_id: str = Form(...),
119
+ file: UploadFile = File(...),
120
+ background_tasks: BackgroundTasks = BackgroundTasks(),
121
+ db: AsyncSession = Depends(get_db),
122
+ current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
123
+ ):
124
+ # (Function logic same rahegi)
125
+ if not file.filename.endswith(".zip"):
126
+ raise HTTPException(status_code=400, detail="Only .zip files are allowed.")
127
+ if file.size > MAX_ZIP_SIZE_BYTES:
128
+ raise HTTPException(status_code=413, detail=f"File too large. Max size is {MAX_ZIP_SIZE_MB} MB.")
129
+
130
+ zip_dir = os.path.join(UPLOAD_DIRECTORY, "zips")
131
+ os.makedirs(zip_dir, exist_ok=True)
132
+ file_path = os.path.join(zip_dir, f"job_{session_id}_{file.filename}")
133
+
134
+ with open(file_path, "wb") as buffer:
135
+ shutil.copyfileobj(file.file, buffer)
136
+
137
+ new_job = IngestionJob(
138
+ session_id=session_id,
139
+ ingestion_type=IngestionType.ZIP,
140
+ source_name=file.filename,
141
+ status=JobStatus.PENDING
142
+ )
143
+ db.add(new_job)
144
+ await db.commit()
145
+ await db.refresh(new_job)
146
+
147
+ background_tasks.add_task(run_zip_task, new_job.id, file_path, session_id, AsyncSessionLocal)
148
+ return {"message": "Zip processing started", "job_id": new_job.id}
backend/src/api/routes/settings.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ from fastapi import APIRouter, Depends, HTTPException, status
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+ from sqlalchemy.future import select
6
+ from sqlalchemy import create_engine, inspect
7
+ from pymongo import MongoClient
8
+ from pydantic import BaseModel
9
+ from typing import Dict, List, Any, Tuple
10
+
11
+ # --- Internal Imports ---
12
+ from backend.src.db.session import get_db
13
+ from backend.src.models.user import User
14
+ from backend.src.models.integration import UserIntegration
15
+ from backend.src.api.routes.deps import get_current_user
16
+
17
+ # --- Connectors ---
18
+ from backend.src.services.connectors.sanity_connector import SanityConnector
19
+
20
+ # --- AI & LLM ---
21
+ from backend.src.services.llm.factory import get_llm_model
22
+ from langchain_core.messages import HumanMessage
23
+
24
+ router = APIRouter()
25
+
26
+ # ==========================================
27
+ # DATA MODELS
28
+ # ==========================================
29
+ class IntegrationUpdateRequest(BaseModel):
30
+ provider: str
31
+ credentials: Dict[str, Any]
32
+
33
+ class RefreshSchemaRequest(BaseModel):
34
+ provider: str
35
+
36
+ class ConnectedServiceResponse(BaseModel):
37
+ provider: str
38
+ is_active: bool
39
+ description: str | None = None
40
+ last_updated: str | None = None
41
+
42
+ class UserSettingsResponse(BaseModel):
43
+ user_email: str
44
+ connected_services: List[ConnectedServiceResponse]
45
+
46
+ # --- NEW: Bot Profile Model ---
47
+ class BotSettingsRequest(BaseModel):
48
+ bot_name: str
49
+ bot_instruction: str
50
+
51
+ # ==========================================
52
+ # THE DYNAMIC PROFILER (No Bias) 🧠
53
+ # ==========================================
54
+
55
+ async def generate_data_profile(schema_map: dict, provider: str) -> str:
56
+ """
57
+ Ye function bina kisi bias ke, sirf data structure dekh kar keywords nikalta hai.
58
+ """
59
+ try:
60
+ if not schema_map: return f"Connected to {provider}."
61
+
62
+ llm = get_llm_model()
63
+ schema_str = json.dumps(schema_map)[:3500]
64
+
65
+ prompt = f"""
66
+ Act as a Database Architect. Your job is to analyze the provided Database Schema and generate a 'Semantic Description' for an AI Router.
67
+
68
+ --- INPUT SCHEMA ({provider}) ---
69
+ {schema_str}
70
+
71
+ --- INSTRUCTIONS ---
72
+ 1. Analyze the Table Names (or Collections/Types) and Field Names deeply.
73
+ 2. Identify the core "Business Concepts" represented in this data.
74
+ 3. Construct a dense, keyword-rich summary that describes EXACTLY what is in this database.
75
+ 4. **STRICT RULE:** Do NOT use generic words like "solution" or "platform". Use specific nouns found in the schema (e.g., "invoices", "appointments", "inventory", "cement", "users").
76
+ 5. Do NOT guess. Only describe what you see in the schema keys.
77
+
78
+ --- OUTPUT FORMAT ---
79
+ Write a single paragraph (approx 30 words) describing the data contents.
80
+ Description:
81
+ """
82
+
83
+ response = await llm.ainvoke([HumanMessage(content=prompt)])
84
+ return response.content.strip()
85
+ except Exception as e:
86
+ print(f"⚠️ Profiling failed: {e}")
87
+ return f"Contains data from {provider}."
88
+
89
+ async def perform_discovery(provider: str, credentials: Dict[str, Any]) -> Tuple[Dict, str]:
90
+ """
91
+ Common discovery function for Connect and Refresh.
92
+ """
93
+ schema_map = {}
94
+ description = None
95
+
96
+ try:
97
+ # --- CASE A: SANITY ---
98
+ if provider == 'sanity':
99
+ connector = SanityConnector(credentials=credentials)
100
+ if connector.connect():
101
+ schema_map = connector.fetch_schema_structure()
102
+ description = await generate_data_profile(schema_map, 'Sanity CMS')
103
+
104
+ # --- CASE B: SQL DATABASE ---
105
+ elif provider == 'sql':
106
+ db_url = credentials.get('connection_string') or credentials.get('url')
107
+ if db_url:
108
+ engine = create_engine(db_url)
109
+ inspector = inspect(engine)
110
+ tables = inspector.get_table_names()
111
+
112
+ schema_map = {"tables": tables}
113
+ if len(tables) < 15:
114
+ for t in tables:
115
+ try:
116
+ cols = [c['name'] for c in inspector.get_columns(t)]
117
+ schema_map[t] = cols
118
+ except: pass
119
+
120
+ description = await generate_data_profile(schema_map, 'SQL Database')
121
+
122
+ # --- CASE C: MONGODB ---
123
+ elif provider == 'mongodb':
124
+ mongo_uri = credentials.get('connection_string') or credentials.get('url')
125
+ if mongo_uri:
126
+ client = MongoClient(mongo_uri)
127
+ db_name = client.get_database().name
128
+ collections = client[db_name].list_collection_names()
129
+
130
+ schema_map = {"collections": collections}
131
+ for col in collections[:5]:
132
+ one_doc = client[db_name][col].find_one()
133
+ if one_doc:
134
+ keys = [k for k in list(one_doc.keys()) if not k.startswith('_')]
135
+ schema_map[col] = keys
136
+
137
+ description = await generate_data_profile(schema_map, 'MongoDB NoSQL')
138
+
139
+ # --- CASE D: QDRANT / OTHERS ---
140
+ elif provider == 'qdrant':
141
+ description = "Contains uploaded documents, policies, and knowledge base."
142
+
143
+ return schema_map, description
144
+
145
+ except Exception as e:
146
+ print(f"❌ Discovery Error for {provider}: {e}")
147
+ return {}, f"Connected to {provider} (Auto-discovery failed: {str(e)})"
148
+
149
+ # ==========================================
150
+ # 1. SAVE / CONNECT INTEGRATION
151
+ # ==========================================
152
+ @router.post("/settings/integration", status_code=status.HTTP_201_CREATED)
153
+ async def save_or_update_integration(
154
+ data: IntegrationUpdateRequest,
155
+ db: AsyncSession = Depends(get_db),
156
+ current_user: User = Depends(get_current_user)
157
+ ):
158
+ try:
159
+ query = select(UserIntegration).where(
160
+ UserIntegration.user_id == str(current_user.id),
161
+ UserIntegration.provider == data.provider
162
+ )
163
+ result = await db.execute(query)
164
+ existing_integration = result.scalars().first()
165
+
166
+ credentials_json = json.dumps(data.credentials)
167
+ schema_map, description = await perform_discovery(data.provider, data.credentials)
168
+
169
+ if existing_integration:
170
+ existing_integration.credentials = credentials_json
171
+ existing_integration.is_active = True
172
+ if schema_map: existing_integration.schema_map = schema_map
173
+ if description: existing_integration.profile_description = description
174
+ message = f"Integration for {data.provider} updated."
175
+ else:
176
+ new_integration = UserIntegration(
177
+ user_id=str(current_user.id),
178
+ provider=data.provider,
179
+ is_active=True,
180
+ schema_map=schema_map,
181
+ profile_description=description
182
+ )
183
+ new_integration.credentials = credentials_json
184
+ db.add(new_integration)
185
+ message = f"Integration for {data.provider} connected."
186
+
187
+ await db.commit()
188
+ return {
189
+ "message": message,
190
+ "provider": data.provider,
191
+ "profile": description
192
+ }
193
+
194
+ except Exception as e:
195
+ await db.rollback()
196
+ print(f"❌ Error saving integration: {e}")
197
+ raise HTTPException(status_code=500, detail=str(e))
198
+
199
+ # ==========================================
200
+ # 2. REFRESH SCHEMA
201
+ # ==========================================
202
+ @router.post("/settings/integration/refresh")
203
+ async def refresh_integration_schema(
204
+ data: RefreshSchemaRequest,
205
+ db: AsyncSession = Depends(get_db),
206
+ current_user: User = Depends(get_current_user)
207
+ ):
208
+ print(f"🔄 Refreshing schema for {data.provider} (User: {current_user.id})")
209
+
210
+ try:
211
+ stmt = select(UserIntegration).where(
212
+ UserIntegration.user_id == str(current_user.id),
213
+ UserIntegration.provider == data.provider
214
+ )
215
+ result = await db.execute(stmt)
216
+ integration = result.scalars().first()
217
+
218
+ if not integration:
219
+ raise HTTPException(status_code=404, detail="Integration not found. Please connect first.")
220
+
221
+ creds_str = integration.credentials
222
+ creds_dict = json.loads(creds_str)
223
+
224
+ new_schema, new_description = await perform_discovery(data.provider, creds_dict)
225
+
226
+ if new_schema:
227
+ integration.schema_map = dict(new_schema)
228
+
229
+ if new_description:
230
+ integration.profile_description = new_description
231
+
232
+ await db.commit()
233
+
234
+ return {
235
+ "message": "Schema and profile refreshed successfully!",
236
+ "provider": data.provider,
237
+ "new_profile": new_description
238
+ }
239
+
240
+ except Exception as e:
241
+ print(f"❌ Refresh Failed: {e}")
242
+ raise HTTPException(status_code=500, detail=str(e))
243
+
244
+ # ==========================================
245
+ # 3. UPDATE BOT PROFILE (NEW ✅)
246
+ # ==========================================
247
+ @router.post("/settings/bot-profile")
248
+ async def update_bot_profile(
249
+ data: BotSettingsRequest,
250
+ db: AsyncSession = Depends(get_db),
251
+ current_user: User = Depends(get_current_user)
252
+ ):
253
+ """
254
+ User yahan apne chatbot ka Naam aur Role set karega.
255
+ """
256
+ try:
257
+ current_user.bot_name = data.bot_name
258
+ current_user.bot_instruction = data.bot_instruction
259
+
260
+ db.add(current_user)
261
+ await db.commit()
262
+
263
+ return {
264
+ "message": "Bot profile updated successfully!",
265
+ "bot_name": data.bot_name,
266
+ "bot_instruction": data.bot_instruction
267
+ }
268
+ except Exception as e:
269
+ print(f"❌ Bot Profile Update Failed: {e}")
270
+ raise HTTPException(status_code=500, detail=str(e))
271
+
272
+ # ==========================================
273
+ # 4. GET USER INTEGRATIONS
274
+ # ==========================================
275
+ @router.get("/settings/integrations", response_model=UserSettingsResponse)
276
+ async def get_user_integrations(
277
+ db: AsyncSession = Depends(get_db),
278
+ current_user: User = Depends(get_current_user)
279
+ ):
280
+ query = select(UserIntegration).where(
281
+ UserIntegration.user_id == str(current_user.id)
282
+ )
283
+ result = await db.execute(query)
284
+ integrations = result.scalars().all()
285
+
286
+ connected_services = [
287
+ ConnectedServiceResponse(
288
+ provider=i.provider,
289
+ is_active=i.is_active,
290
+ description=i.profile_description,
291
+ last_updated=str(i.updated_at) if i.updated_at else str(i.created_at)
292
+ )
293
+ for i in integrations
294
+ ]
295
+
296
+ return {
297
+ "user_email": current_user.email,
298
+ "connected_services": connected_services
299
+ }
backend/src/core/config.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+ from functools import lru_cache
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ class Settings(BaseSettings):
10
+ # ------------------- CORE PROJECT SETTINGS -------------------
11
+ PROJECT_NAME: str = "OmniAgent Core"
12
+ VERSION: str = "1.0.0"
13
+ API_V1_STR: str = "/api/v1"
14
+
15
+ # ------------------- SECURITY (NEW) -------------------
16
+ # Ye bohot zaroori hai JWT tokens ke liye
17
+ SECRET_KEY: str = os.getenv("SECRET_KEY", "super-secret-key-change-me")
18
+ ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
19
+
20
+ # ------------------- NETWORK / HOSTING -------------------
21
+ QDRANT_HOST: str = os.getenv("QDRANT_HOST", "localhost")
22
+ QDRANT_PORT: int = 6333
23
+
24
+ MONGO_HOST: str = os.getenv("MONGO_HOST", "localhost")
25
+ MONGO_PORT: int = int(os.getenv("MONGO_PORT", 27018))
26
+ MONGO_USER: str = os.getenv("MONGO_INITDB_ROOT_USERNAME", "admin")
27
+ MONGO_PASS: str = os.getenv("MONGO_INITDB_ROOT_PASSWORD", "super_secret_admin_pass")
28
+
29
+ # ------------------- DATABASES -------------------
30
+ _DATABASE_URL: str = os.getenv("POSTGRES_URL", "sqlite+aiosqlite:///./omni_agent.db")
31
+
32
+ @property
33
+ def DATABASE_URL(self) -> str:
34
+ url = self._DATABASE_URL
35
+ if url and "?" in url:
36
+ url = url.split("?")[0]
37
+ if url and url.startswith("postgres://"):
38
+ url = url.replace("postgres://", "postgresql+asyncpg://", 1)
39
+ elif url and url.startswith("postgresql://") and "+asyncpg" not in url:
40
+ url = url.replace("postgresql://", "postgresql+asyncpg://", 1)
41
+
42
+ # --- DEBUG PRINT (Ye add karein) ---
43
+ print(f"🕵️ DEBUG: Connecting to DB URL: {url}")
44
+ # (Security Warning: Ye console mein password dikhayega, baad mein hata dena)
45
+ return url
46
+
47
+ @property
48
+ def QDRANT_URL(self) -> str:
49
+ if self.QDRANT_HOST.startswith("http"):
50
+ return self.QDRANT_HOST
51
+ return f"http://{self.QDRANT_HOST}:{self.QDRANT_PORT}"
52
+
53
+ QDRANT_COLLECTION_NAME: str = "omni_agent_main_collection"
54
+ QDRANT_API_KEY: str | None = None
55
+
56
+ # ------------------- RAG / EMBEDDINGS -------------------
57
+ EMBEDDING_PROVIDER: str = "local"
58
+ EMBEDDING_MODEL_NAME: str = "sentence-transformers/all-MiniLM-L6-v2"
59
+
60
+ # ------------------- AI MODELS -------------------
61
+ LLM_PROVIDER: str = "generic"
62
+ LLM_MODEL_NAME: str = "gpt-3.5-turbo"
63
+ LLM_BASE_URL: str | None = None
64
+ LLM_API_KEY: str | None = None
65
+
66
+ GROQ_API_KEY: str | None = None
67
+ GOOGLE_API_KEY: str | None = None
68
+ OPENAI_API_KEY: str | None = None
69
+
70
+ model_config = SettingsConfigDict(env_file=".env", extra="ignore", env_file_encoding='utf-8')
71
+
72
+ @lru_cache()
73
+ def get_settings():
74
+ return Settings()
75
+
76
+ settings = get_settings()
backend/src/db/base.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # backend/src/db/base.py
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+
4
+ # Saare models is Base class se inherit karenge
5
+ Base = declarative_base()
backend/src/db/session.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
2
+ from sqlalchemy import create_engine
3
+ from backend.src.core.config import settings
4
+
5
+ # Connection Arguments
6
+ connect_args = {}
7
+ if "sqlite" in settings.DATABASE_URL:
8
+ connect_args = {"check_same_thread": False}
9
+
10
+ # --- ROBUST ENGINE CREATION (The Fix) ---
11
+ # Ye settings Neon/Serverless ke liye best hain
12
+ engine = create_async_engine(
13
+ settings.DATABASE_URL,
14
+ echo=False,
15
+ connect_args=connect_args,
16
+ pool_size=5, # 5 connections ka pool rakho
17
+ max_overflow=10, # Agar zaroorat pade to 10 aur bana lo
18
+ pool_recycle=300, # Har 5 minute (300s) mein purane connections ko refresh karo (Sleep issue fix)
19
+ pool_pre_ping=True, # Har query se pehle check karo ke connection zinda hai ya nahi
20
+ )
21
+
22
+ # Session Maker
23
+ AsyncSessionLocal = async_sessionmaker(
24
+ bind=engine,
25
+ class_=AsyncSession,
26
+ expire_on_commit=False,
27
+ autoflush=False,
28
+ )
29
+
30
+ # Dependency Injection
31
+ async def get_db():
32
+ async with AsyncSessionLocal() as session:
33
+ try:
34
+ yield session
35
+ finally:
36
+ await session.close()
backend/src/init_db.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from backend.src.db.session import engine
3
+ from backend.src.db.base import Base
4
+
5
+ # --- Import ALL Models here ---
6
+ # Ye zaroori hai taake SQLAlchemy ko pata chale ke kaunse tables banane hain
7
+ from backend.src.models.chat import ChatHistory
8
+ from backend.src.models.ingestion import IngestionJob
9
+ from backend.src.models.integration import UserIntegration # <--- Isme naya column hai
10
+ from backend.src.models.user import User
11
+
12
+ async def init_database():
13
+ print("🚀 Connecting to the database...")
14
+ async with engine.begin() as conn:
15
+ # --- CRITICAL FOR SCHEMA UPDATE ---
16
+ # Hum purane tables DROP kar rahe hain taake naya 'profile_description' column add ho sake.
17
+ # Note: Isse purana data udd jayega (Dev environment ke liye theek hai).
18
+ print("🗑️ Dropping old tables to apply new Schema...")
19
+ await conn.run_sync(Base.metadata.drop_all)
20
+
21
+ print("⚙️ Creating new tables (Users, Chats, Integrations, Jobs)...")
22
+ await conn.run_sync(Base.metadata.create_all)
23
+ print("✅ Database tables created successfully!")
24
+
25
+ if __name__ == "__main__":
26
+ print("Starting database initialization...")
27
+ asyncio.run(init_database())
backend/src/main.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI
3
+ from fastapi.staticfiles import StaticFiles # <--- New Import
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from backend.src.core.config import settings
6
+
7
+ # --- API Route Imports ---
8
+ from backend.src.api.routes import chat, ingestion, auth, settings as settings_route
9
+
10
+ # 1. App Initialize karein
11
+ app = FastAPI(
12
+ title=settings.PROJECT_NAME,
13
+ version=settings.VERSION,
14
+ description="OmniAgent Core API - The Intelligent Employee"
15
+ )
16
+
17
+ # 2. CORS Setup (Security)
18
+ # Frontend ko Backend se baat karne ki ijazat dena
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"], # Production mein hum isay specific domain karenge
22
+ allow_credentials=True,
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
+ )
26
+
27
+ # 3. Mount Static Files (Chat Widget ke liye) 🎨
28
+ # Ye check karta hai ke 'static' folder hai ya nahi, agar nahi to banata hai
29
+ if not os.path.exists("static"):
30
+ os.makedirs("static")
31
+
32
+ # Is line ka matlab hai: Jo bhi file 'static' folder mein hogi, wo '/static/filename' par milegi
33
+ app.mount("/static", StaticFiles(directory="static"), name="static")
34
+
35
+ # 4. Health Check Route
36
+ @app.get("/")
37
+ async def root():
38
+ return {
39
+ "message": "Welcome to OmniAgent Core 🚀",
40
+ "status": "active",
41
+ "widget_url": "/static/widget.js" # Widget ka link bhi bata diya
42
+ }
43
+
44
+ # 5. API Router Includes
45
+ app.include_router(auth.router, prefix=settings.API_V1_STR, tags=["Authentication"])
46
+ app.include_router(settings_route.router, prefix=settings.API_V1_STR, tags=["User Settings"])
47
+ app.include_router(chat.router, prefix=settings.API_V1_STR, tags=["Chat"])
48
+ app.include_router(ingestion.router, prefix=settings.API_V1_STR, tags=["Ingestion"])
49
+
50
+ if __name__ == "__main__":
51
+ import uvicorn
52
+ # Server Run command (Debugging ke liye)
53
+ uvicorn.run("backend.src.main:app", host="0.0.0.0", port=8000, reload=True)
backend/src/models/chat.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/models/chat.py
2
+ from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean
3
+ from sqlalchemy.sql import func
4
+ from backend.src.db.base import Base
5
+
6
+ class ChatHistory(Base):
7
+ __tablename__ = "chat_history"
8
+
9
+ id = Column(Integer, primary_key=True, index=True)
10
+ session_id = Column(String, index=True) # User ka Session ID
11
+ human_message = Column(Text) # User ne kya kaha
12
+ ai_message = Column(Text) # Bot ne kya jawab diya
13
+ timestamp = Column(DateTime(timezone=True), server_default=func.now()) # Kab baat hui
14
+
15
+ # Metadata (Optional: Konsa tool use hua, kitne tokens lage)
16
+ provider = Column(String)
17
+ tokens_used = Column(Integer, default=0)
backend/src/models/ingestion.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, Text, DateTime, Enum, JSON # <--- JSON import karein
2
+ from sqlalchemy.sql import func
3
+ import enum
4
+ from backend.src.db.base import Base
5
+
6
+ class JobStatus(str, enum.Enum):
7
+ PENDING = "pending"
8
+ PROCESSING = "processing"
9
+ COMPLETED = "completed"
10
+ FAILED = "failed"
11
+
12
+ class IngestionType(str, enum.Enum):
13
+ URL = "url"
14
+ ZIP = "zip"
15
+ FILE = "file" # (Future use ke liye)
16
+
17
+ class IngestionJob(Base):
18
+ __tablename__ = "ingestion_jobs"
19
+
20
+ id = Column(Integer, primary_key=True, index=True)
21
+ session_id = Column(String, index=True)
22
+
23
+ # --- NEW COLUMNS ---
24
+ ingestion_type = Column(String, default=IngestionType.URL) # Taake pata chale ye URL hai ya Zip
25
+ source_name = Column(String, nullable=False) # Ye URL ya Zip file ka naam hoga
26
+
27
+ status = Column(String, default=JobStatus.PENDING)
28
+
29
+ # Progress Tracking
30
+ items_processed = Column(Integer, default=0)
31
+ total_items = Column(Integer, default=0)
32
+
33
+ # Detailed Logging
34
+ details = Column(JSON, default=[]) # <--- Har file ka result yahan aayega
35
+
36
+ error_message = Column(Text, nullable=True)
37
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
38
+ updated_at = Column(DateTime(timezone=True), onupdate=func.now())
39
+
40
+ # 'url', 'crawl_type' waghaira columns hata diye taake table generic rahe
backend/src/models/integration.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from sqlalchemy import Column, Integer, String, Text, Boolean, JSON, DateTime
3
+ from sqlalchemy.sql import func
4
+ from backend.src.db.base import Base
5
+ from backend.src.utils.security import SecurityUtils
6
+
7
+ class UserIntegration(Base):
8
+ __tablename__ = "user_integrations"
9
+
10
+ id = Column(Integer, primary_key=True, index=True)
11
+ user_id = Column(String, index=True)
12
+
13
+ provider = Column(String, nullable=False) # e.g., 'sanity', 'sql', 'mongodb'
14
+
15
+ # Store encrypted credentials
16
+ _credentials = Column("credentials", Text, nullable=False)
17
+
18
+ # The Map (Technical Structure)
19
+ schema_map = Column(JSON, default={})
20
+
21
+ # --- NEW COLUMN: The semantic description of the data ---
22
+ profile_description = Column(Text, nullable=True)
23
+
24
+ is_active = Column(Boolean, default=True)
25
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
26
+ updated_at = Column(DateTime(timezone=True), onupdate=func.now())
27
+
28
+ @property
29
+ def credentials(self):
30
+ return SecurityUtils.decrypt(self._credentials)
31
+
32
+ @credentials.setter
33
+ def credentials(self, value):
34
+ self._credentials = SecurityUtils.encrypt(value)
backend/src/models/user.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text # Text add kiya
2
+ from sqlalchemy.sql import func
3
+ from backend.src.db.base import Base
4
+
5
+ class User(Base):
6
+ __tablename__ = "users"
7
+
8
+ id = Column(Integer, primary_key=True, index=True)
9
+ email = Column(String, unique=True, index=True, nullable=False)
10
+ hashed_password = Column(String, nullable=False)
11
+ full_name = Column(String, nullable=True)
12
+ is_active = Column(Boolean, default=True)
13
+
14
+ # --- NEW: Bot Customization ---
15
+ bot_name = Column(String, default="Support Agent")
16
+ bot_instruction = Column(Text, default="You are a helpful customer support agent. Only answer questions related to the provided data.")
17
+
18
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
19
+ updated_at = Column(DateTime(timezone=True), onupdate=func.now())
backend/src/schemas/chat.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+
4
+ # User jab sawal bhejegaecho $GOOGLE_API_KEY
5
+ class ChatRequest(BaseModel):
6
+ message: str
7
+ # Isay Optional bana diya. Default value None hai.
8
+ session_id: Optional[str] = None
9
+
10
+ # Server jab jawab dega
11
+ class ChatResponse(BaseModel):
12
+ response: str
13
+ # Yahan bhi Optional, kyunki guest ke paas ID nahi hogi
14
+ session_id: Optional[str] = None
15
+ provider: str
backend/src/services/chat_service.py ADDED
@@ -0,0 +1,598 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # import json
3
+ # from sqlalchemy.ext.asyncio import AsyncSession
4
+ # from sqlalchemy.future import select
5
+
6
+ # # --- Model Imports ---
7
+ # from backend.src.models.chat import ChatHistory
8
+ # from backend.src.models.integration import UserIntegration
9
+
10
+ # # --- Dynamic Factory & Tool Imports ---
11
+ # from backend.src.services.llm.factory import get_llm_model
12
+ # from backend.src.services.vector_store.qdrant_adapter import get_vector_store
13
+ # from backend.src.services.security.pii_scrubber import PIIScrubber
14
+
15
+ # # --- Agents ---
16
+ # from backend.src.services.tools.secure_agent import get_secure_agent
17
+ # from backend.src.services.tools.nosql_agent import get_nosql_agent
18
+ # from backend.src.services.tools.cms_agent import get_cms_agent
19
+
20
+ # # --- Router ---
21
+ # from backend.src.services.routing.semantic_router import SemanticRouter
22
+
23
+ # # --- LangChain Core ---
24
+ # from langchain_core.messages import HumanMessage, AIMessage
25
+ # from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
26
+
27
+ # # ==========================================
28
+ # # HELPER FUNCTIONS (UPDATED STRICT LOGIC)
29
+ # # ==========================================
30
+
31
+ # async def get_user_integrations(user_id: str, db: AsyncSession) -> dict:
32
+ # if not user_id: return {}
33
+
34
+ # query = select(UserIntegration).where(UserIntegration.user_id == user_id, UserIntegration.is_active == True)
35
+ # result = await db.execute(query)
36
+ # integrations = result.scalars().all()
37
+
38
+ # settings = {}
39
+ # for i in integrations:
40
+ # try:
41
+ # creds = json.loads(i.credentials)
42
+ # creds['provider'] = i.provider
43
+ # creds['schema_map'] = i.schema_map if i.schema_map else {}
44
+
45
+ # # --- 🔥 FIX: NO DEFAULT DESCRIPTION ---
46
+ # # Agar DB mein description NULL hai, to NULL hi rehne do.
47
+ # # Hum isay Router mein add hi nahi karenge.
48
+ # creds['description'] = i.profile_description
49
+
50
+ # settings[i.provider] = creds
51
+ # except (json.JSONDecodeError, TypeError):
52
+ # continue
53
+ # return settings
54
+
55
+ # async def save_chat_to_db(db: AsyncSession, session_id: str, human_msg: str, ai_msg: str, provider: str):
56
+ # if not session_id: return
57
+ # safe_human = PIIScrubber.scrub(human_msg)
58
+ # safe_ai = PIIScrubber.scrub(ai_msg)
59
+ # new_chat = ChatHistory(
60
+ # session_id=session_id, human_message=safe_human, ai_message=safe_ai, provider=provider
61
+ # )
62
+ # db.add(new_chat)
63
+ # await db.commit()
64
+
65
+ # async def get_chat_history(session_id: str, db: AsyncSession):
66
+ # if not session_id: return []
67
+ # query = select(ChatHistory).where(ChatHistory.session_id == session_id).order_by(ChatHistory.timestamp.asc())
68
+ # result = await db.execute(query)
69
+ # return result.scalars().all()
70
+
71
+ # OMNI_SUPPORT_PROMPT = "You are OmniAgent. Answer based on the provided context or chat history."
72
+
73
+ # # ==========================================
74
+ # # MAIN CHAT LOGIC
75
+ # # ==========================================
76
+ # async def process_chat(message: str, session_id: str, user_id: str, db: AsyncSession):
77
+
78
+ # # 1. User Settings
79
+ # user_settings = await get_user_integrations(user_id, db)
80
+
81
+ # # 2. LLM Check
82
+ # llm_creds = user_settings.get('groq') or user_settings.get('openai')
83
+ # if not llm_creds:
84
+ # return "Please configure your AI Model in Settings."
85
+
86
+ # # 3. Build Tool Map for Router (STRICT FILTERING)
87
+ # tools_map = {}
88
+ # for provider, config in user_settings.items():
89
+ # if provider in ['sanity', 'sql', 'mongodb']:
90
+ # # 🔥 Check: Agar Description hai, tabhi Router mein daalo
91
+ # if config.get('description'):
92
+ # tools_map[provider] = config['description']
93
+ # else:
94
+ # print(f"⚠️ [Router] Skipping {provider} - No Description found.")
95
+
96
+ # # 4. SEMANTIC DECISION
97
+ # selected_provider = None
98
+ # if tools_map:
99
+ # router = SemanticRouter()
100
+ # selected_provider = router.route(message, tools_map)
101
+ # else:
102
+ # print("⚠️ [Router] No active tools with descriptions found.")
103
+
104
+ # response_text = ""
105
+ # provider_name = "general_chat"
106
+
107
+ # # 5. Route to Winner
108
+ # if selected_provider:
109
+ # print(f"👉 [Router] Selected Tool: {selected_provider.upper()}")
110
+ # try:
111
+ # if selected_provider == 'sanity':
112
+ # schema = user_settings['sanity'].get('schema_map', {})
113
+ # agent = get_cms_agent(user_id=user_id, schema_map=schema, llm_credentials=llm_creds)
114
+ # res = await agent.ainvoke({"input": message})
115
+ # response_text = str(res.get('output', ''))
116
+ # provider_name = "cms_agent"
117
+
118
+ # elif selected_provider == 'sql':
119
+ # role = "admin" if user_id == '99' else "customer"
120
+ # agent = get_secure_agent(int(user_id), role, user_settings['sql'], llm_credentials=llm_creds)
121
+ # res = await agent.ainvoke({"input": message})
122
+ # response_text = str(res.get('output', ''))
123
+ # provider_name = "sql_agent"
124
+
125
+ # elif selected_provider == 'mongodb':
126
+ # agent = get_nosql_agent(user_id, user_settings['mongodb'], llm_credentials=llm_creds)
127
+ # res = await agent.ainvoke({"input": message})
128
+ # response_text = str(res.get('output', ''))
129
+ # provider_name = "nosql_agent"
130
+
131
+ # # Anti-Hallucination
132
+ # if not response_text or "error" in response_text.lower():
133
+ # response_text = "" # Trigger Fallback
134
+
135
+ # except Exception as e:
136
+ # print(f"❌ [Router] Execution Failed: {e}")
137
+ # response_text = ""
138
+
139
+ # # 6. Fallback / RAG
140
+ # if not response_text:
141
+ # print("👉 [Router] Fallback to RAG/General Chat...")
142
+ # try:
143
+ # llm = get_llm_model(credentials=llm_creds)
144
+
145
+ # context = ""
146
+ # if 'qdrant' in user_settings:
147
+ # try:
148
+ # vector_store = get_vector_store(credentials=user_settings['qdrant'])
149
+ # docs = await vector_store.asimilarity_search(message, k=3)
150
+ # if docs:
151
+ # context = "\n\n".join([d.page_content for d in docs])
152
+ # except Exception as e:
153
+ # print(f"⚠️ RAG Warning: {e}")
154
+
155
+ # system_instruction = OMNI_SUPPORT_PROMPT
156
+ # if context: system_instruction = f"Context:\n{context}"
157
+
158
+ # history = await get_chat_history(session_id, db)
159
+ # formatted_history = []
160
+ # for chat in history:
161
+ # formatted_history.append(HumanMessage(content=chat.human_message))
162
+ # if chat.ai_message: formatted_history.append(AIMessage(content=chat.ai_message))
163
+
164
+ # prompt = ChatPromptTemplate.from_messages([
165
+ # ("system", system_instruction),
166
+ # MessagesPlaceholder(variable_name="chat_history"),
167
+ # ("human", "{question}")
168
+ # ])
169
+ # chain = prompt | llm
170
+
171
+ # ai_response = await chain.ainvoke({"chat_history": formatted_history, "question": message})
172
+ # response_text = ai_response.content
173
+ # provider_name = "rag_fallback"
174
+
175
+ # except Exception as e:
176
+ # response_text = "I am currently unable to process your request."
177
+
178
+ # await save_chat_to_db(db, session_id, message, response_text, provider_name)
179
+ # return response_text
180
+ import json
181
+ from sqlalchemy.ext.asyncio import AsyncSession
182
+ from sqlalchemy.future import select
183
+
184
+ # --- Model Imports ---
185
+ from backend.src.models.chat import ChatHistory
186
+ from backend.src.models.integration import UserIntegration
187
+ from backend.src.models.user import User # Added User model for Bot Persona
188
+
189
+ # --- Dynamic Factory & Tool Imports ---
190
+ from backend.src.services.llm.factory import get_llm_model
191
+ from backend.src.services.vector_store.qdrant_adapter import get_vector_store
192
+ from backend.src.services.security.pii_scrubber import PIIScrubber
193
+
194
+ # --- Agents ---
195
+ from backend.src.services.tools.secure_agent import get_secure_agent
196
+ from backend.src.services.tools.nosql_agent import get_nosql_agent
197
+ from backend.src.services.tools.cms_agent import get_cms_agent
198
+
199
+ # --- Router ---
200
+ from backend.src.services.routing.semantic_router import SemanticRouter
201
+
202
+ # --- LangChain Core ---
203
+ from langchain_core.messages import HumanMessage, AIMessage
204
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
205
+
206
+ # ==========================================
207
+ # HELPER FUNCTIONS
208
+ # ==========================================
209
+
210
+ async def get_user_integrations(user_id: str, db: AsyncSession) -> dict:
211
+ """Fetches active integrations and filters valid descriptions."""
212
+ if not user_id: return {}
213
+
214
+ query = select(UserIntegration).where(UserIntegration.user_id == user_id, UserIntegration.is_active == True)
215
+ result = await db.execute(query)
216
+ integrations = result.scalars().all()
217
+
218
+ settings = {}
219
+ for i in integrations:
220
+ try:
221
+ creds = json.loads(i.credentials)
222
+ creds['provider'] = i.provider
223
+ creds['schema_map'] = i.schema_map if i.schema_map else {}
224
+
225
+ # --- STRICT CHECK ---
226
+ if i.profile_description:
227
+ creds['description'] = i.profile_description
228
+
229
+ settings[i.provider] = creds
230
+ except (json.JSONDecodeError, TypeError):
231
+ continue
232
+ return settings
233
+
234
+ async def save_chat_to_db(db: AsyncSession, session_id: str, human_msg: str, ai_msg: str, provider: str):
235
+ """Saves chat history with PII redaction."""
236
+ if not session_id: return
237
+ safe_human = PIIScrubber.scrub(human_msg)
238
+ safe_ai = PIIScrubber.scrub(ai_msg)
239
+ new_chat = ChatHistory(
240
+ session_id=session_id, human_message=safe_human, ai_message=safe_ai, provider=provider
241
+ )
242
+ db.add(new_chat)
243
+ await db.commit()
244
+
245
+ async def get_chat_history(session_id: str, db: AsyncSession):
246
+ """Retrieves past conversation history."""
247
+ if not session_id: return []
248
+ query = select(ChatHistory).where(ChatHistory.session_id == session_id).order_by(ChatHistory.timestamp.asc())
249
+ result = await db.execute(query)
250
+ return result.scalars().all()
251
+
252
+ async def get_bot_persona(user_id: str, db: AsyncSession):
253
+ """Fetches custom Bot Name and Instructions from User table."""
254
+ try:
255
+ # User ID ko int mein convert karke query karein
256
+ stmt = select(User).where(User.id == int(user_id))
257
+ result = await db.execute(stmt)
258
+ user = result.scalars().first()
259
+
260
+ if user:
261
+ return {
262
+ "name": getattr(user, "bot_name", "OmniAgent"),
263
+ "instruction": getattr(user, "bot_instruction", "You are a helpful AI assistant.")
264
+ }
265
+ except Exception as e:
266
+ print(f"⚠️ Error fetching persona: {e}")
267
+ pass
268
+
269
+ # Fallback Default Persona
270
+ return {"name": "OmniAgent", "instruction": "You are a helpful AI assistant."}
271
+
272
+ # ==========================================
273
+ # MAIN CHAT LOGIC
274
+ # ==========================================
275
+ async def process_chat(message: str, session_id: str, user_id: str, db: AsyncSession):
276
+
277
+ # 1. Fetch User Settings & Persona
278
+ user_settings = await get_user_integrations(user_id, db)
279
+ bot_persona = await get_bot_persona(user_id, db) # <--- Persona Load kiya
280
+
281
+ # 2. LLM Check
282
+ llm_creds = user_settings.get('groq') or user_settings.get('openai')
283
+ if not llm_creds:
284
+ return "Please configure your AI Model in Settings."
285
+
286
+ # 3. Build Tool Map for Router
287
+ tools_map = {}
288
+ for provider, config in user_settings.items():
289
+ if provider in ['sanity', 'sql', 'mongodb']:
290
+ if config.get('description'):
291
+ tools_map[provider] = config['description']
292
+
293
+ # 4. SEMANTIC DECISION (Router)
294
+ selected_provider = None
295
+ if tools_map:
296
+ router = SemanticRouter() # Singleton Instance
297
+ selected_provider = router.route(message, tools_map)
298
+
299
+ response_text = ""
300
+ provider_name = "general_chat"
301
+
302
+ # 5. Route to Winner
303
+ if selected_provider:
304
+ print(f"👉 [Router] Selected Tool: {selected_provider.upper()}")
305
+ try:
306
+ if selected_provider == 'sanity':
307
+ schema = user_settings['sanity'].get('schema_map', {})
308
+ agent = get_cms_agent(user_id=user_id, schema_map=schema, llm_credentials=llm_creds)
309
+ res = await agent.ainvoke({"input": message})
310
+ response_text = str(res.get('output', ''))
311
+ provider_name = "cms_agent"
312
+
313
+ elif selected_provider == 'sql':
314
+ role = "admin" if user_id == '99' else "customer"
315
+ agent = get_secure_agent(int(user_id), role, user_settings['sql'], llm_credentials=llm_creds)
316
+ res = await agent.ainvoke({"input": message})
317
+ response_text = str(res.get('output', ''))
318
+ provider_name = "sql_agent"
319
+
320
+ elif selected_provider == 'mongodb':
321
+ agent = get_nosql_agent(user_id, user_settings['mongodb'], llm_credentials=llm_creds)
322
+ res = await agent.ainvoke({"input": message})
323
+ response_text = str(res.get('output', ''))
324
+ provider_name = "nosql_agent"
325
+
326
+ # Anti-Hallucination
327
+ if not response_text or "error" in response_text.lower():
328
+ print(f"⚠️ [Router] Tool {selected_provider} failed. Triggering Fallback.")
329
+ response_text = ""
330
+
331
+ except Exception as e:
332
+ print(f"❌ [Router] Execution Failed: {e}")
333
+ response_text = ""
334
+
335
+ # 6. Fallback / RAG (Using Custom Persona)
336
+ if not response_text:
337
+ print("👉 [Router] Fallback to RAG/General Chat...")
338
+ try:
339
+ llm = get_llm_model(credentials=llm_creds)
340
+
341
+ # Context from Vector DB
342
+ context = ""
343
+ if 'qdrant' in user_settings:
344
+ try:
345
+ vector_store = get_vector_store(credentials=user_settings['qdrant'])
346
+ docs = await vector_store.asimilarity_search(message, k=3)
347
+ if docs:
348
+ context = "\n\n".join([d.page_content for d in docs])
349
+ except Exception as e:
350
+ print(f"⚠️ RAG Warning: {e}")
351
+
352
+ # --- 🔥 DYNAMIC SYSTEM PROMPT ---
353
+ system_instruction = f"""
354
+ IDENTITY: You are '{bot_persona['name']}'.
355
+ MISSION: {bot_persona['instruction']}
356
+
357
+ CONTEXT FROM KNOWLEDGE BASE:
358
+ {context if context else "No specific documents found."}
359
+
360
+ Answer the user's question based on the context above or your general knowledge if permitted by your mission.
361
+ """
362
+
363
+ # History Load
364
+ history = await get_chat_history(session_id, db)
365
+ formatted_history = []
366
+ for chat in history:
367
+ formatted_history.append(HumanMessage(content=chat.human_message))
368
+ if chat.ai_message: formatted_history.append(AIMessage(content=chat.ai_message))
369
+
370
+ # LLM Call
371
+ prompt = ChatPromptTemplate.from_messages([
372
+ ("system", system_instruction),
373
+ MessagesPlaceholder(variable_name="chat_history"),
374
+ ("human", "{question}")
375
+ ])
376
+ chain = prompt | llm
377
+
378
+ ai_response = await chain.ainvoke({"chat_history": formatted_history, "question": message})
379
+ response_text = ai_response.content
380
+ provider_name = "rag_fallback"
381
+
382
+ except Exception as e:
383
+ print(f"❌ Fallback Error: {e}")
384
+ response_text = "I am currently unable to process your request. Please check your AI configuration."
385
+
386
+ # 7. Save to DB
387
+ await save_chat_to_db(db, session_id, message, response_text, provider_name)
388
+ return response_text
389
+ # import json
390
+ # from sqlalchemy.ext.asyncio import AsyncSession
391
+ # from sqlalchemy.future import select
392
+
393
+ # # --- Model Imports ---
394
+ # from backend.src.models.chat import ChatHistory
395
+ # from backend.src.models.integration import UserIntegration
396
+ # from backend.src.models.user import User # Added User model for Bot Persona
397
+
398
+ # # --- Dynamic Factory & Tool Imports ---
399
+ # from backend.src.services.llm.factory import get_llm_model
400
+ # from backend.src.services.vector_store.qdrant_adapter import get_vector_store
401
+ # from backend.src.services.security.pii_scrubber import PIIScrubber
402
+
403
+ # # --- Agents ---
404
+ # from backend.src.services.tools.secure_agent import get_secure_agent
405
+ # from backend.src.services.tools.nosql_agent import get_nosql_agent
406
+ # from backend.src.services.tools.cms_agent import get_cms_agent
407
+
408
+ # # --- Router ---
409
+ # from backend.src.services.routing.semantic_router import SemanticRouter
410
+
411
+ # # --- LangChain Core ---
412
+ # from langchain_core.messages import HumanMessage, AIMessage
413
+ # from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
414
+
415
+ # # ==========================================
416
+ # # HELPER FUNCTIONS
417
+ # # ==========================================
418
+
419
+ # async def get_user_integrations(user_id: str, db: AsyncSession) -> dict:
420
+ # """Fetches active integrations and filters valid descriptions."""
421
+ # if not user_id: return {}
422
+
423
+ # query = select(UserIntegration).where(UserIntegration.user_id == user_id, UserIntegration.is_active == True)
424
+ # result = await db.execute(query)
425
+ # integrations = result.scalars().all()
426
+
427
+ # settings = {}
428
+ # for i in integrations:
429
+ # try:
430
+ # creds = json.loads(i.credentials)
431
+ # creds['provider'] = i.provider
432
+ # creds['schema_map'] = i.schema_map if i.schema_map else {}
433
+
434
+ # # --- STRICT CHECK ---
435
+ # # Agar Description NULL hai to dictionary mein mat daalo
436
+ # # Taake Router confuse na ho
437
+ # if i.profile_description:
438
+ # creds['description'] = i.profile_description
439
+
440
+ # settings[i.provider] = creds
441
+ # except (json.JSONDecodeError, TypeError):
442
+ # continue
443
+ # return settings
444
+
445
+ # async def save_chat_to_db(db: AsyncSession, session_id: str, human_msg: str, ai_msg: str, provider: str):
446
+ # """Saves chat history with PII redaction."""
447
+ # if not session_id: return
448
+ # safe_human = PIIScrubber.scrub(human_msg)
449
+ # safe_ai = PIIScrubber.scrub(ai_msg)
450
+ # new_chat = ChatHistory(
451
+ # session_id=session_id, human_message=safe_human, ai_message=safe_ai, provider=provider
452
+ # )
453
+ # db.add(new_chat)
454
+ # await db.commit()
455
+
456
+ # async def get_chat_history(session_id: str, db: AsyncSession):
457
+ # """Retrieves past conversation history."""
458
+ # if not session_id: return []
459
+ # query = select(ChatHistory).where(ChatHistory.session_id == session_id).order_by(ChatHistory.timestamp.asc())
460
+ # result = await db.execute(query)
461
+ # return result.scalars().all()
462
+
463
+ # async def get_bot_persona(user_id: str, db: AsyncSession):
464
+ # """Fetches custom Bot Name and Instructions from User table."""
465
+ # try:
466
+ # result = await db.execute(select(User).where(User.id == int(user_id)))
467
+ # user = result.scalars().first()
468
+ # if user:
469
+ # return {
470
+ # "name": getattr(user, "bot_name", "OmniAgent"),
471
+ # "instruction": getattr(user, "bot_instruction", "You are a helpful AI assistant.")
472
+ # }
473
+ # except Exception:
474
+ # pass
475
+ # return {"name": "OmniAgent", "instruction": "You are a helpful AI assistant."}
476
+
477
+ # # ==========================================
478
+ # # MAIN CHAT LOGIC
479
+ # # ==========================================
480
+ # async def process_chat(message: str, session_id: str, user_id: str, db: AsyncSession):
481
+
482
+ # # 1. Fetch User Settings & Persona
483
+ # user_settings = await get_user_integrations(user_id, db)
484
+ # bot_persona = await get_bot_persona(user_id, db)
485
+
486
+ # # 2. LLM Check
487
+ # llm_creds = user_settings.get('groq') or user_settings.get('openai')
488
+ # if not llm_creds:
489
+ # return "Please configure your AI Model in Settings."
490
+
491
+ # # 3. Build Tool Map for Router (STRICT FILTERING)
492
+ # tools_map = {}
493
+ # for provider, config in user_settings.items():
494
+ # if provider in ['sanity', 'sql', 'mongodb']:
495
+ # # Sirf tab add karo agar description exist karti hai
496
+ # if config.get('description'):
497
+ # tools_map[provider] = config['description']
498
+ # else:
499
+ # print(f"⚠️ [Router] Skipping {provider} - No Description found.")
500
+
501
+ # # 4. SEMANTIC DECISION (Router)
502
+ # selected_provider = None
503
+ # if tools_map:
504
+ # router = SemanticRouter() # Singleton Instance
505
+ # selected_provider = router.route(message, tools_map)
506
+ # else:
507
+ # print("⚠️ [Router] No active tools with descriptions found.")
508
+
509
+ # response_text = ""
510
+ # provider_name = "general_chat"
511
+
512
+ # # 5. Route to Winner (Tool Execution)
513
+ # if selected_provider:
514
+ # print(f"👉 [Router] Selected Tool: {selected_provider.upper()}")
515
+ # try:
516
+ # if selected_provider == 'sanity':
517
+ # schema = user_settings['sanity'].get('schema_map', {})
518
+ # agent = get_cms_agent(user_id=user_id, schema_map=schema, llm_credentials=llm_creds)
519
+ # res = await agent.ainvoke({"input": message})
520
+ # response_text = str(res.get('output', ''))
521
+ # provider_name = "cms_agent"
522
+
523
+ # elif selected_provider == 'sql':
524
+ # role = "admin" if user_id == '99' else "customer"
525
+ # agent = get_secure_agent(int(user_id), role, user_settings['sql'], llm_credentials=llm_creds)
526
+ # res = await agent.ainvoke({"input": message})
527
+ # response_text = str(res.get('output', ''))
528
+ # provider_name = "sql_agent"
529
+
530
+ # elif selected_provider == 'mongodb':
531
+ # agent = get_nosql_agent(user_id, user_settings['mongodb'], llm_credentials=llm_creds)
532
+ # res = await agent.ainvoke({"input": message})
533
+ # response_text = str(res.get('output', ''))
534
+ # provider_name = "nosql_agent"
535
+
536
+ # # Anti-Hallucination Check
537
+ # if not response_text or "error" in response_text.lower():
538
+ # print(f"⚠️ [Router] Tool {selected_provider} failed/empty. Triggering Fallback.")
539
+ # response_text = "" # Clears response to trigger fallback below
540
+
541
+ # except Exception as e:
542
+ # print(f"❌ [Router] Execution Failed: {e}")
543
+ # response_text = ""
544
+
545
+ # # 6. Fallback / RAG (General Chat)
546
+ # if not response_text:
547
+ # print("👉 [Router] Fallback to RAG/General Chat...")
548
+ # try:
549
+ # llm = get_llm_model(credentials=llm_creds)
550
+
551
+ # # Context from Vector DB
552
+ # context = ""
553
+ # if 'qdrant' in user_settings:
554
+ # try:
555
+ # vector_store = get_vector_store(credentials=user_settings['qdrant'])
556
+ # docs = await vector_store.asimilarity_search(message, k=3)
557
+ # if docs:
558
+ # context = "\n\n".join([d.page_content for d in docs])
559
+ # except Exception as e:
560
+ # print(f"⚠️ RAG Warning: {e}")
561
+
562
+ # # --- DYNAMIC SYSTEM PROMPT (PERSONA) ---
563
+ # system_instruction = f"""
564
+ # IDENTITY: You are '{bot_persona['name']}'.
565
+ # MISSION: {bot_persona['instruction']}
566
+
567
+ # CONTEXT FROM KNOWLEDGE BASE:
568
+ # {context if context else "No specific documents found."}
569
+
570
+ # Answer the user's question based on the context above or your general knowledge if permitted by your mission.
571
+ # """
572
+
573
+ # # History Load
574
+ # history = await get_chat_history(session_id, db)
575
+ # formatted_history = []
576
+ # for chat in history:
577
+ # formatted_history.append(HumanMessage(content=chat.human_message))
578
+ # if chat.ai_message: formatted_history.append(AIMessage(content=chat.ai_message))
579
+
580
+ # # LLM Call
581
+ # prompt = ChatPromptTemplate.from_messages([
582
+ # ("system", system_instruction),
583
+ # MessagesPlaceholder(variable_name="chat_history"),
584
+ # ("human", "{question}")
585
+ # ])
586
+ # chain = prompt | llm
587
+
588
+ # ai_response = await chain.ainvoke({"chat_history": formatted_history, "question": message})
589
+ # response_text = ai_response.content
590
+ # provider_name = "rag_fallback"
591
+
592
+ # except Exception as e:
593
+ # print(f"❌ Fallback Error: {e}")
594
+ # response_text = "I am currently unable to process your request. Please check your AI configuration."
595
+
596
+ # # 7. Save to DB
597
+ # await save_chat_to_db(db, session_id, message, response_text, provider_name)
598
+ # return response_text
backend/src/services/connectors/base.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Any, Optional
3
+
4
+ class NoSQLConnector(ABC):
5
+ """
6
+ Abstract Base Class for Universal NoSQL Connectivity.
7
+ Any database (Mongo, DynamoDB, Firebase) must implement these methods.
8
+ """
9
+
10
+ @abstractmethod
11
+ def connect(self):
12
+ """Establish connection to the database."""
13
+ pass
14
+
15
+ @abstractmethod
16
+ def disconnect(self):
17
+ """Close the connection."""
18
+ pass
19
+
20
+ @abstractmethod
21
+ def get_schema_summary(self) -> str:
22
+ """
23
+ Returns a string description of collections and fields.
24
+ Crucial for the LLM to understand what to query.
25
+ """
26
+ pass
27
+
28
+ @abstractmethod
29
+ def find_one(self, collection: str, query: Dict[str, Any]) -> Optional[Dict[str, Any]]:
30
+ """Retrieve a single document matching the query."""
31
+ pass
32
+
33
+ @abstractmethod
34
+ def find_many(self, collection: str, query: Dict[str, Any], limit: int = 5) -> List[Dict[str, Any]]:
35
+ """Retrieve multiple documents matching the query."""
36
+ pass
backend/src/services/connectors/cms_base.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any, List
3
+
4
+ class CMSBaseConnector(ABC):
5
+ """
6
+ Abstract Interface for Headless CMS Integrations.
7
+ """
8
+
9
+ @abstractmethod
10
+ def connect(self, credentials: Dict[str, str]) -> bool:
11
+ """
12
+ Validate credentials and establish connection.
13
+ Returns True if successful.
14
+ """
15
+ pass
16
+
17
+ @abstractmethod
18
+ def fetch_schema_structure(self) -> Dict[str, List[str]]:
19
+ """
20
+ Introspects the CMS to find available Types and Fields.
21
+ Example Return: {'product': ['title', 'price'], 'author': ['name']}
22
+ """
23
+ pass
24
+
25
+ @abstractmethod
26
+ def execute_query(self, query: str) -> List[Dict[str, Any]]:
27
+ """
28
+ Executes a raw query (GROQ, GraphQL) and returns JSON data.
29
+ """
30
+ pass
backend/src/services/connectors/mongo_connector.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pymongo
3
+ from typing import List, Dict, Any, Optional
4
+ from backend.src.services.connectors.base import NoSQLConnector
5
+
6
+ class MongoConnector(NoSQLConnector):
7
+ def __init__(self, credentials: Dict[str, str]):
8
+ """
9
+ Initializes with user-specific credentials.
10
+ """
11
+ # User ki di hui connection string use karega
12
+ # e.g., "mongodb+srv://user:pass@cluster..."
13
+ self.uri = credentials.get("url")
14
+ if not self.uri:
15
+ raise ValueError("MongoDB connection URL ('url') is missing in credentials.")
16
+
17
+ # Database ka naam URL se nikalne ki koshish (agar / ke baad hai)
18
+ # Ya credentials se direct le lo
19
+ self.db_name = credentials.get("database_name", self.uri.split("/")[-1].split("?")[0])
20
+
21
+ self.client = None
22
+ self.db = None
23
+
24
+ # SSL/TLS arguments for cloud databases like Atlas
25
+ self.connect_args = {
26
+ 'tls': True,
27
+ 'tlsAllowInvalidCertificates': True # Development ke liye OK, Production mein False hona chahiye
28
+ }
29
+
30
+ def connect(self):
31
+ if not self.client:
32
+ print(f"🔌 [NoSQL] Connecting to MongoDB Cluster...")
33
+ try:
34
+ # Use serverSelectionTimeoutMS to fail fast if connection is bad
35
+ self.client = pymongo.MongoClient(self.uri, serverSelectionTimeoutMS=5000, **self.connect_args)
36
+ # Ye line check karegi ke connection waqayi bana ya nahi
37
+ self.client.server_info()
38
+ self.db = self.client[self.db_name]
39
+ print("✅ [NoSQL] MongoDB Connection Successful.")
40
+ except pymongo.errors.ConnectionFailure as e:
41
+ print(f"❌ [NoSQL] MongoDB Connection Failed: {e}")
42
+ raise e
43
+
44
+ def disconnect(self):
45
+ if self.client:
46
+ self.client.close()
47
+ self.client = None
48
+ print("🔌 [NoSQL] Disconnected from MongoDB.")
49
+
50
+ def get_schema_summary(self) -> str:
51
+ self.connect()
52
+ summary = []
53
+ try:
54
+ collections = self.db.list_collection_names()
55
+ for col_name in collections:
56
+ sample = self.db[col_name].find_one()
57
+ if sample:
58
+ if '_id' in sample: del sample['_id']
59
+ keys = list(sample.keys())
60
+ summary.append(f"Collection: '{col_name}' -> Fields: {keys}")
61
+ except Exception as e:
62
+ return f"Error fetching schema: {e}"
63
+ return "\n".join(summary)
64
+
65
+ def find_one(self, collection: str, query: Dict[str, Any]) -> Optional[Dict[str, Any]]:
66
+ self.connect()
67
+ try:
68
+ result = self.db[collection].find_one(query)
69
+ if result and '_id' in result:
70
+ result['_id'] = str(result['_id'])
71
+ return result
72
+ except Exception as e:
73
+ return None
74
+
75
+ def find_many(self, collection: str, query: Dict[str, Any], limit: int = 5) -> List[Dict[str, Any]]:
76
+ self.connect()
77
+ try:
78
+ cursor = self.db[collection].find(query).limit(limit)
79
+ results = [doc for doc in cursor]
80
+ for doc in results:
81
+ if '_id' in doc:
82
+ doc['_id'] = str(doc['_id'])
83
+ return results
84
+ except Exception as e:
85
+ return []
backend/src/services/connectors/sanity_connector.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ import json
4
+ from urllib.parse import quote
5
+ from typing import Dict, List, Any
6
+ from backend.src.services.connectors.cms_base import CMSBaseConnector
7
+
8
+ class SanityConnector(CMSBaseConnector):
9
+ def __init__(self, credentials: Dict[str, str]):
10
+ self.project_id = credentials.get("project_id")
11
+ self.dataset = credentials.get("dataset")
12
+ self.token = credentials.get("token") # Read-only token
13
+ self.api_version = "v2021-10-21"
14
+
15
+ if not all([self.project_id, self.dataset, self.token]):
16
+ raise ValueError("Sanity credentials (project_id, dataset, token) are required.")
17
+
18
+ # Build the base URL for API calls
19
+ self.base_url = f"https://{self.project_id}.api.sanity.io/{self.api_version}/data/query/{self.dataset}"
20
+ self.headers = {"Authorization": f"Bearer {self.token}"}
21
+
22
+ self.is_connected = False
23
+
24
+ def connect(self, credentials: Dict[str, str] = None) -> bool:
25
+ """Tests the connection by making a simple, non-data-intensive query."""
26
+ if not self.is_connected:
27
+ print(f"🔌 [Sanity] Connecting to Project ID: {self.project_id}...")
28
+ try:
29
+ # Test query to check credentials
30
+ test_query = '*[_type == "sanity.imageAsset"][0...1]'
31
+ response = requests.get(self.base_url, headers=self.headers, params={'query': test_query})
32
+
33
+ if response.status_code == 200:
34
+ self.is_connected = True
35
+ print("✅ [Sanity] Connection Successful.")
36
+ return True
37
+ else:
38
+ print(f"❌ [Sanity] Connection Failed. Status: {response.status_code}, Response: {response.text}")
39
+ return False
40
+ except Exception as e:
41
+ print(f"❌ [Sanity] Connection Failed: {e}")
42
+ return False
43
+ return True
44
+
45
+ def fetch_schema_structure(self) -> Dict[str, Any]:
46
+ """
47
+ 🕵️‍♂️ DEEP DISCOVERY: Fetches 1 sample of EVERY type to map the full nesting.
48
+ """
49
+ if not self.is_connected: self.connect()
50
+
51
+ print("🕵️‍♂️ Starting Deep Schema Discovery...")
52
+
53
+ # Step 1: Get all unique document types (filtering out system types)
54
+ types_query = "array::unique(*[!(_id in path('_.**')) && !(_type match 'sanity.*')]._type)"
55
+
56
+ try:
57
+ response = requests.get(self.base_url, headers=self.headers, params={'query': types_query})
58
+ if response.status_code != 200:
59
+ print(f"❌ Failed to fetch types: {response.text}")
60
+ return {}
61
+
62
+ user_types = response.json().get('result', [])
63
+ print(f"📋 Found Types: {user_types}")
64
+
65
+ schema_map = {}
66
+
67
+ # Step 2: Loop through each type and fetch ONE full document
68
+ for doc_type in user_types:
69
+ # Query: "Give me the first item of this type"
70
+ sample_query = f"*[_type == '{doc_type}'][0]"
71
+ sample_response = requests.get(self.base_url, headers=self.headers, params={'query': sample_query})
72
+ sample_doc = sample_response.json().get('result')
73
+
74
+ if sample_doc:
75
+ # Step 3: Recursively extract structure
76
+ structure = self._extract_structure(sample_doc)
77
+ schema_map[doc_type] = structure
78
+
79
+ print(f"✅ Full Database Map Created.")
80
+ return schema_map
81
+
82
+ except Exception as e:
83
+ print(f"❌ Schema Discovery Error: {e}")
84
+ return {}
85
+
86
+ def _extract_structure(self, doc: Any, depth=0) -> Any:
87
+ """
88
+ Helper to map nested fields.
89
+ Real Data: {"store": {"price": 20}} -> Map: {"store": {"price": "Number"}}
90
+ """
91
+ if depth > 3: return "..." # Stop infinite recursion
92
+
93
+ if isinstance(doc, dict):
94
+ structure = {}
95
+ for key, value in doc.items():
96
+ if key.startswith("_"): continue # Skip internal fields
97
+ structure[key] = self._extract_structure(value, depth + 1)
98
+ return structure
99
+
100
+ elif isinstance(doc, list):
101
+ # If list has items, check the first one to know what's inside
102
+ if len(doc) > 0:
103
+ return [self._extract_structure(doc[0], depth + 1)]
104
+ return "List[]"
105
+
106
+ elif isinstance(doc, (int, float)):
107
+ return "Number"
108
+ elif isinstance(doc, bool):
109
+ return "Boolean"
110
+
111
+ return "String"
112
+
113
+ def execute_query(self, query: str) -> List[Dict[str, Any]]:
114
+ """Executes a GROQ query against the Sanity HTTP API."""
115
+ if not self.is_connected: self.connect()
116
+
117
+ print(f"🚀 [Sanity] Executing GROQ Query: {query}")
118
+ try:
119
+ # URL-encode the query to handle special characters
120
+ encoded_query = quote(query)
121
+
122
+ response = requests.get(f"{self.base_url}?query={encoded_query}", headers=self.headers)
123
+
124
+ if response.status_code == 200:
125
+ results = response.json().get('result')
126
+ if results is None: return []
127
+ return results if isinstance(results, list) else [results]
128
+ else:
129
+ print(f"❌ [Sanity] Query Failed. Status: {response.status_code}, Details: {response.text}")
130
+ return []
131
+ except Exception as e:
132
+ print(f"❌ [Sanity] Query execution error: {e}")
133
+ return []
backend/src/services/embeddings/factory.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/services/embeddings/factory.py
2
+ from langchain_community.embeddings import (
3
+ SentenceTransformerEmbeddings,
4
+ OpenAIEmbeddings,
5
+ )
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+ from backend.src.core.config import settings
8
+ from functools import lru_cache
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+
11
+ # Ye function cache karega, taake model baar baar load na ho
12
+ @lru_cache()
13
+ def get_embedding_model():
14
+ """
15
+ Ye hamari "Embedding Factory" hai.
16
+ Ye config file ko padhti hai aur sahi embedding model load karti hai.
17
+ Modular design ka ye sabse ahem hissa hai.
18
+ """
19
+ provider = settings.EMBEDDING_PROVIDER.lower()
20
+ model_name = settings.EMBEDDING_MODEL_NAME
21
+
22
+ print(f"INFO: Loading embedding model from provider: '{provider}' using model '{model_name}'")
23
+
24
+ if provider == "local":
25
+ # Ye model local computer par chalta hai. Koi API key nahi chahiye.
26
+ return HuggingFaceEmbeddings(
27
+ model_name=model_name,
28
+ # cache_folder="./models_cache" # Uncomment if you want to specify a cache folder
29
+ )
30
+
31
+ elif provider == "openai":
32
+ if not settings.OPENAI_API_KEY:
33
+ raise ValueError("OpenAI API key not found in .env file")
34
+ return OpenAIEmbeddings(
35
+ model=model_name,
36
+ openai_api_key=settings.OPENAI_API_KEY
37
+ )
38
+
39
+ elif provider == "google":
40
+ if not settings.GOOGLE_API_KEY:
41
+ raise ValueError("Google API key not found in .env file")
42
+ return GoogleGenerativeAIEmbeddings(
43
+ model=model_name,
44
+ google_api_key=settings.GOOGLE_API_KEY,
45
+ task_type="retrieval_document"
46
+ )
47
+ else:
48
+ raise ValueError(f"Unsupported embedding provider: {provider}")
backend/src/services/ingestion/crawler.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import requests
3
+ import numpy as np
4
+ from bs4 import BeautifulSoup
5
+ from urllib.parse import urljoin
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+ from backend.src.models.ingestion import IngestionJob, JobStatus
8
+ from backend.src.services.vector_store.qdrant_adapter import get_vector_store
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain_core.documents import Document
11
+ from qdrant_client.http import models
12
+
13
+ # --- NEW IMPORT ---
14
+ from backend.src.services.ingestion.guardrail_factory import predict_with_model
15
+
16
+ # --- CONFIGURATION ---
17
+ MAX_PAGES_LIMIT = 50
18
+
19
+ class SmartCrawler:
20
+ def __init__(self, job_id: int, url: str, session_id: str, crawl_type: str, db: AsyncSession):
21
+ self.job_id = job_id
22
+ self.root_url = url
23
+ self.session_id = session_id
24
+ self.crawl_type = crawl_type
25
+ self.db = db
26
+ self.visited = set()
27
+ self.vector_store = get_vector_store()
28
+ # YAHAN SE MODEL LOAD HATA DIYA
29
+
30
+ async def log_status(self, status: str, processed=0, total=0, error=None):
31
+ try:
32
+ job = await self.db.get(IngestionJob, self.job_id)
33
+ if job:
34
+ job.status = status
35
+ job.pages_processed = processed
36
+ job.total_pages_found = total
37
+ if error:
38
+ job.error_message = str(error)
39
+ await self.db.commit()
40
+ except Exception as e:
41
+ print(f"DB Log Error: {e}")
42
+
43
+ async def is_ai_unsafe(self, text: str, url: str) -> bool: # <--- Async bana diya
44
+ """
45
+ Non-blocking AI Check using Factory.
46
+ """
47
+ sample_text = text[:300] + " ... " + text[len(text)//2 : len(text)//2 + 300]
48
+ label = "This is an e-commerce product page with price, buy button, or shopping cart."
49
+
50
+ # --- FIX: Call Factory Async Function ---
51
+ # Ab ye server ko block nahi karega
52
+ scores = await predict_with_model(sample_text, label)
53
+
54
+ # Softmax Calculation
55
+ probs = np.exp(scores) / np.sum(np.exp(scores))
56
+ entailment_score = probs[1]
57
+
58
+ print("\n" + "="*60)
59
+ print(f"🤖 AI ANALYSIS REPORT for: {url}")
60
+ print("-" * 60)
61
+ print(f"📊 Scores -> Contradiction: {probs[0]:.2f}, Entailment: {probs[1]:.2f}, Neutral: {probs[2]:.2f}")
62
+ print(f"🎯 Target Score (Entailment): {entailment_score:.4f} (Threshold: 0.5)")
63
+
64
+ if entailment_score > 0.5:
65
+ print(f"⛔ DECISION: BLOCKED")
66
+ print("="*60 + "\n")
67
+ return True
68
+ else:
69
+ print(f"✅ DECISION: ALLOWED")
70
+ print("="*60 + "\n")
71
+ return False
72
+
73
+ async def fetch_page(self, url: str):
74
+ try:
75
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
76
+ return await asyncio.to_thread(requests.get, url, headers=headers, timeout=10)
77
+ except Exception:
78
+ return None
79
+
80
+ async def clean_existing_data(self):
81
+ print(f"INFO: Cleaning old data for source: {self.root_url}")
82
+ try:
83
+ self.vector_store.client.delete(
84
+ collection_name=self.vector_store.collection_name,
85
+ points_selector=models.FilterSelector(
86
+ filter=models.Filter(
87
+ must=[
88
+ models.FieldCondition(
89
+ key="metadata.source",
90
+ match=models.MatchValue(value=self.root_url)
91
+ )
92
+ ]
93
+ )
94
+ )
95
+ )
96
+ except Exception as e:
97
+ print(f"Warning: Clean data failed: {e}")
98
+
99
+ async def process_page(self, url: str, soup: BeautifulSoup) -> bool:
100
+ for script in soup(["script", "style", "nav", "footer", "iframe", "noscript", "svg"]):
101
+ script.extract()
102
+
103
+ text = soup.get_text(separator=" ", strip=True)
104
+
105
+ if len(text) < 200:
106
+ print(f"⚠️ Skipping {url} (Not enough text: {len(text)} chars)")
107
+ return False
108
+
109
+ # --- AWAIT HERE ---
110
+ # Ab hum 'await' use kar rahe hain taake ye background mein chale
111
+ if await self.is_ai_unsafe(text, url):
112
+ return False
113
+
114
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
115
+ docs = [Document(page_content=text, metadata={
116
+ "source": self.root_url,
117
+ "specific_url": url,
118
+ "session_id": self.session_id,
119
+ "type": "web_scrape"
120
+ })]
121
+ split_docs = splitter.split_documents(docs)
122
+
123
+ await self.vector_store.aadd_documents(split_docs)
124
+ return True
125
+
126
+ async def start(self):
127
+ try:
128
+ await self.log_status(JobStatus.PROCESSING)
129
+ await self.clean_existing_data()
130
+
131
+ queue = [self.root_url]
132
+ self.visited.add(self.root_url)
133
+ total_processed = 0
134
+
135
+ while queue and total_processed < MAX_PAGES_LIMIT:
136
+ current_url = queue.pop(0)
137
+
138
+ response = await self.fetch_page(current_url)
139
+ if not response or response.status_code != 200:
140
+ continue
141
+
142
+ soup = BeautifulSoup(response.content, 'html.parser')
143
+
144
+ success = await self.process_page(current_url, soup)
145
+
146
+ if not success:
147
+ if current_url == self.root_url:
148
+ await self.log_status(JobStatus.FAILED, error="Root URL blocked. Identified as E-commerce.")
149
+ return
150
+ continue
151
+
152
+ total_processed += 1
153
+
154
+ if self.crawl_type == "full_site":
155
+ for link in soup.find_all('a', href=True):
156
+ full_link = urljoin(self.root_url, link['href'])
157
+ if self.root_url in full_link and full_link not in self.visited:
158
+ self.visited.add(full_link)
159
+ queue.append(full_link)
160
+
161
+ await self.log_status(JobStatus.PROCESSING, processed=total_processed, total=len(queue)+total_processed)
162
+ await asyncio.sleep(0.5)
163
+
164
+ await self.log_status(JobStatus.COMPLETED, processed=total_processed)
165
+ print(f"SUCCESS: Crawling finished. Processed {total_processed} pages.")
166
+
167
+ except Exception as e:
168
+ print(f"ERROR: Crawling failed: {e}")
169
+ await self.log_status(JobStatus.FAILED, error=str(e))
backend/src/services/ingestion/file_processor.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/services/ingestion/file_processor.py
2
+ import os
3
+ import asyncio
4
+ # Specific Stable Loaders
5
+ from langchain_community.document_loaders import (
6
+ TextLoader,
7
+ PyPDFLoader,
8
+ CSVLoader,
9
+ Docx2txtLoader,
10
+ UnstructuredMarkdownLoader
11
+ )
12
+ # Fallback loader (agar upar walon mein se koi na ho)
13
+ from langchain_community.document_loaders import UnstructuredFileLoader
14
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
15
+ from backend.src.services.vector_store.qdrant_adapter import get_vector_store
16
+
17
+ def get_loader(file_path: str):
18
+ """
19
+ Factory function jo file extension ke hisaab se
20
+ sabse stable loader return karta hai.
21
+ """
22
+ ext = os.path.splitext(file_path)[1].lower()
23
+
24
+ if ext == ".txt":
25
+ # TextLoader sabse fast aur safe hai
26
+ return TextLoader(file_path, encoding="utf-8")
27
+
28
+ elif ext == ".pdf":
29
+ # PyPDFLoader pure python hai, hang nahi hota
30
+ return PyPDFLoader(file_path)
31
+
32
+ elif ext == ".csv":
33
+ return CSVLoader(file_path, encoding="utf-8")
34
+
35
+ elif ext in [".doc", ".docx"]:
36
+ # Docx2txtLoader light hai
37
+ return Docx2txtLoader(file_path)
38
+
39
+ elif ext == ".md":
40
+ # Markdown ko hum TextLoader se bhi parh sakte hain agar Unstructured tang kare
41
+ return TextLoader(file_path, encoding="utf-8")
42
+
43
+ else:
44
+ # Agar koi ajeeb format ho, tab hum Heavy 'Unstructured' loader try karenge
45
+ print(f"INFO: Unknown format '{ext}', attempting to use UnstructuredFileLoader...")
46
+ return UnstructuredFileLoader(file_path)
47
+
48
+ async def process_file(file_path: str, session_id: str):
49
+ """
50
+ Processes a single uploaded file and adds it to the Vector DB.
51
+ Supports: TXT, PDF, CSV, DOCX, MD and others.
52
+ """
53
+ print(f"INFO: [Ingestion] Starting processing for file: {file_path}")
54
+
55
+ try:
56
+ # 1. Sahi Loader select karein
57
+ loader = get_loader(file_path)
58
+
59
+ # 2. File Load karein (Thread mein taake server block na ho)
60
+ # Note: 'aload()' har loader ke paas nahi hota, isliye hum standard 'load()' ko async wrap karte hain
61
+ docs = await asyncio.to_thread(loader.load)
62
+
63
+ except Exception as e:
64
+ print(f"ERROR: [Ingestion] Failed to load file {file_path}: {e}")
65
+ return 0
66
+
67
+ if not docs:
68
+ print(f"WARNING: [Ingestion] Could not extract any content from {file_path}")
69
+ return 0
70
+
71
+ # 3. Document ko chunks mein todein
72
+ text_splitter = RecursiveCharacterTextSplitter(
73
+ chunk_size=1000,
74
+ chunk_overlap=200,
75
+ length_function=len
76
+ )
77
+ split_docs = text_splitter.split_documents(docs)
78
+
79
+ # Metadata update (Source tracking ke liye)
80
+ for doc in split_docs:
81
+ doc.metadata["session_id"] = session_id
82
+ doc.metadata["file_name"] = os.path.basename(file_path)
83
+ # Extension bhi store kar lete hain filter karne ke liye
84
+ doc.metadata["file_type"] = os.path.splitext(file_path)[1].lower()
85
+
86
+ # 4. Qdrant mein upload karein
87
+ try:
88
+ vector_store = get_vector_store()
89
+ await vector_store.aadd_documents(split_docs)
90
+ print(f"SUCCESS: [Ingestion] Processed {len(split_docs)} chunks from {file_path}")
91
+ return len(split_docs)
92
+ except Exception as e:
93
+ print(f"ERROR: [Ingestion] Failed to upload to Qdrant: {e}")
94
+ return 0
backend/src/services/ingestion/guardrail_factory.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import CrossEncoder
2
+ from functools import lru_cache
3
+ import asyncio
4
+
5
+ # Global Cache
6
+ _model_instance = None
7
+
8
+ def get_guardrail_model():
9
+ """
10
+ Model ko sirf ek baar load karega.
11
+ """
12
+ global _model_instance
13
+ if _model_instance is None:
14
+ print("⏳ INFO: Loading AI Guardrail Model into RAM (First Time Only)...")
15
+ # 'nli-distilroberta-base' thoda heavy hai, agar PC slow hai to 'cross-encoder/ms-marco-TinyBERT-L-2' use karein
16
+ _model_instance = CrossEncoder('cross-encoder/nli-distilroberta-base')
17
+ print("✅ INFO: AI Guardrail Model Loaded!")
18
+ return _model_instance
19
+
20
+ async def predict_with_model(text, label):
21
+ """
22
+ Prediction ko background thread mein chalata hai taake server hang na ho.
23
+ """
24
+ model = get_guardrail_model()
25
+
26
+ # Ye line magic hai: Heavy kaam ko alag thread mein bhej do
27
+ scores = await asyncio.to_thread(model.predict, [(text, label)])
28
+ return scores[0]
backend/src/services/ingestion/web_processor.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from langchain_community.document_loaders import WebBaseLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from backend.src.services.vector_store.qdrant_adapter import get_vector_store
5
+
6
+ async def process_url(url: str, session_id: str):
7
+ """
8
+ Ek URL se data scrape karta hai, chunks banata hai aur Qdrant mein save karta hai.
9
+ """
10
+ print(f"INFO: [Ingestion] Starting scraping for URL: {url}")
11
+
12
+ try:
13
+ # 1. Load Data from URL
14
+ # Hum loader ko async thread mein chalayenge taake server block na ho
15
+ def load_data():
16
+ loader = WebBaseLoader(url)
17
+ return loader.load()
18
+
19
+ docs = await asyncio.to_thread(load_data)
20
+
21
+ if not docs:
22
+ print(f"WARNING: [Ingestion] No content found at {url}")
23
+ return 0
24
+
25
+ print(f"INFO: [Ingestion] Successfully fetched content. Length: {len(docs[0].page_content)} chars.")
26
+
27
+ except Exception as e:
28
+ print(f"ERROR: [Ingestion] Failed to scrape URL {url}: {e}")
29
+ raise e # Error upar bhejenge taake API user ko bata sake
30
+
31
+ # 2. Split Text into Chunks
32
+ text_splitter = RecursiveCharacterTextSplitter(
33
+ chunk_size=1000,
34
+ chunk_overlap=200,
35
+ length_function=len
36
+ )
37
+ split_docs = text_splitter.split_documents(docs)
38
+
39
+ # 3. Add Metadata (Bohat Zaroori)
40
+ for doc in split_docs:
41
+ doc.metadata["session_id"] = session_id
42
+ doc.metadata["source"] = url # Taake pata chale ye data kahan se aaya
43
+ doc.metadata["type"] = "web_scrape"
44
+
45
+ # 4. Save to Qdrant
46
+ try:
47
+ vector_store = get_vector_store()
48
+ await vector_store.aadd_documents(split_docs)
49
+ print(f"SUCCESS: [Ingestion] Processed {len(split_docs)} chunks from {url}")
50
+ return len(split_docs)
51
+ except Exception as e:
52
+ print(f"ERROR: [Ingestion] Failed to upload to Qdrant: {e}")
53
+ return 0
backend/src/services/ingestion/zip_processor.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import zipfile
2
+ import os
3
+ import shutil
4
+ import asyncio
5
+ from sqlalchemy.ext.asyncio import AsyncSession
6
+ from backend.src.models.ingestion import IngestionJob, JobStatus
7
+ from backend.src.services.ingestion.file_processor import process_file
8
+ from backend.src.services.vector_store.qdrant_adapter import get_vector_store
9
+ from qdrant_client.http import models
10
+
11
+ # --- CONFIGURATION ---
12
+ SUPPORTED_EXTENSIONS = ['.pdf', '.txt', '.md', '.docx', '.csv']
13
+ MAX_FILES_IN_ZIP = 500
14
+
15
+ class SmartZipProcessor:
16
+ def __init__(self, job_id: int, zip_path: str, session_id: str, db: AsyncSession):
17
+ self.job_id = job_id
18
+ self.zip_path = zip_path
19
+ self.session_id = session_id
20
+ self.db = db
21
+ self.vector_store = get_vector_store()
22
+ self.temp_dir = f"./temp_unzip_{job_id}"
23
+ self.report = []
24
+
25
+ async def log_status(self, status: str, processed=0, total=0, error=None):
26
+ """Database mein job status update karta hai"""
27
+ try:
28
+ job = await self.db.get(IngestionJob, self.job_id)
29
+ if job:
30
+ job.status = status
31
+ job.items_processed = processed
32
+ job.total_items = total
33
+ job.details = self.report # Report bhi save karo
34
+ if error:
35
+ job.error_message = str(error)
36
+ await self.db.commit()
37
+ except Exception as e:
38
+ print(f"DB Log Error: {e}")
39
+
40
+ async def clean_existing_data(self):
41
+ """Update Logic: Is session ka purana data saaf karo"""
42
+ print(f"INFO: Cleaning old data for session_id: {self.session_id}")
43
+ try:
44
+ self.vector_store.client.delete(
45
+ collection_name=self.vector_store.collection_name,
46
+ points_selector=models.FilterSelector(
47
+ filter=models.Filter(
48
+ must=[
49
+ models.FieldCondition(
50
+ key="metadata.session_id",
51
+ match=models.MatchValue(value=self.session_id)
52
+ )
53
+ ]
54
+ )
55
+ )
56
+ )
57
+ except Exception as e:
58
+ print(f"Warning: Clean data failed (maybe first upload): {e}")
59
+
60
+ def inspect_zip(self) -> list:
61
+ """Zip ko bina extract kiye check karta hai"""
62
+ with zipfile.ZipFile(self.zip_path, 'r') as zf:
63
+ file_list = zf.infolist()
64
+
65
+ # Guardrail 1: File Count
66
+ if len(file_list) > MAX_FILES_IN_ZIP:
67
+ raise ValueError(f"Zip contains too many files ({len(file_list)}). Max allowed is {MAX_FILES_IN_ZIP}.")
68
+
69
+ # Sirf "Files" return karo, folders nahi
70
+ return [f for f in file_list if not f.is_dir()]
71
+
72
+ def extract_zip(self):
73
+ """Zip ko temp folder mein extract karta hai"""
74
+ os.makedirs(self.temp_dir, exist_ok=True)
75
+ with zipfile.ZipFile(self.zip_path, 'r') as zf:
76
+ zf.extractall(self.temp_dir)
77
+
78
+ def cleanup(self):
79
+ """Temp files/folders delete karta hai"""
80
+ if os.path.exists(self.temp_dir):
81
+ shutil.rmtree(self.temp_dir)
82
+ if os.path.exists(self.zip_path):
83
+ os.remove(self.zip_path)
84
+
85
+ async def start(self):
86
+ """Main Processing Loop"""
87
+ try:
88
+ # Step 1: Inspect
89
+ files_to_process = self.inspect_zip()
90
+ total_files = len(files_to_process)
91
+ await self.log_status(JobStatus.PROCESSING, total=total_files)
92
+
93
+ # Step 2: Clean old data (Atomic Update)
94
+ await self.clean_existing_data()
95
+
96
+ # Step 3: Extract
97
+ self.extract_zip()
98
+
99
+ # Step 4: Process each file
100
+ processed_count = 0
101
+ for file_info in files_to_process:
102
+ file_path = os.path.join(self.temp_dir, file_info.filename)
103
+
104
+ # Guardrail 2: Supported Extension
105
+ ext = os.path.splitext(file_path)[1].lower()
106
+ if ext not in SUPPORTED_EXTENSIONS:
107
+ self.report.append({"file": file_info.filename, "status": "skipped", "reason": "unsupported_type"})
108
+ continue
109
+
110
+ # Process the file
111
+ try:
112
+ # process_file (jo humne pehle banaya tha) ko call karo
113
+ chunks_added = await process_file(file_path, self.session_id)
114
+ if chunks_added > 0:
115
+ self.report.append({"file": file_info.filename, "status": "success", "chunks": chunks_added})
116
+ else:
117
+ raise ValueError("No content extracted")
118
+ except Exception as e:
119
+ self.report.append({"file": file_info.filename, "status": "failed", "reason": str(e)})
120
+
121
+ processed_count += 1
122
+ await self.log_status(JobStatus.PROCESSING, processed=processed_count, total=total_files)
123
+ await asyncio.sleep(0.1) # Thoda saans lene do
124
+
125
+ await self.log_status(JobStatus.COMPLETED, processed=processed_count, total=total_files)
126
+ print(f"SUCCESS: Zip processing finished. Processed {processed_count}/{total_files} files.")
127
+
128
+ except Exception as e:
129
+ print(f"ERROR: Zip processing failed: {e}")
130
+ await self.log_status(JobStatus.FAILED, error=str(e))
131
+ finally:
132
+ self.cleanup()
backend/src/services/llm/factory.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain_openai import ChatOpenAI
4
+ from backend.src.core.config import settings
5
+
6
+ def get_llm_model(credentials: dict = None):
7
+ """
8
+ True Universal Factory (Fixed).
9
+ Ab ye provider ke hisaab se sahi 'base_url' set karega.
10
+ """
11
+
12
+ # --- Default settings (Fallback) ---
13
+ llm_provider = settings.LLM_PROVIDER.lower()
14
+ llm_model_name = settings.LLM_MODEL_NAME
15
+ llm_base_url = settings.LLM_BASE_URL
16
+ llm_api_key = settings.LLM_API_KEY
17
+ google_api_key = settings.GOOGLE_API_KEY
18
+
19
+ # --- User-specific settings (Override) ---
20
+ if credentials:
21
+ # User ki settings use karo
22
+ llm_provider = credentials.get("provider", llm_provider).lower()
23
+ llm_model_name = credentials.get("model_name", llm_model_name)
24
+ llm_base_url = credentials.get("base_url", llm_base_url)
25
+ llm_api_key = credentials.get("api_key", llm_api_key)
26
+
27
+ # Google ke liye
28
+ if llm_provider == "google":
29
+ google_api_key = llm_api_key
30
+
31
+ # --- MAGIC FIX: Set Base URL for known providers ---
32
+ if llm_provider == "groq" and not llm_base_url:
33
+ llm_base_url = "https://api.groq.com/openai/v1"
34
+ # Groq key .env se le lo agar user ne nahi di (fallback)
35
+ llm_api_key = llm_api_key or settings.GROQ_API_KEY
36
+
37
+ print(f"🤖 Loading AI Model: {llm_provider} -> {llm_model_name}")
38
+
39
+ # --- BLOCK 1: GOOGLE GEMINI ---
40
+ if llm_provider == "google":
41
+ if not google_api_key:
42
+ raise ValueError("Google API key not found.")
43
+ return ChatGoogleGenerativeAI(
44
+ model=llm_model_name,
45
+ google_api_key=google_api_key,
46
+ temperature=0.7,
47
+ convert_system_message_to_human=True
48
+ )
49
+
50
+ # --- BLOCK 2: UNIVERSAL OPENAI-COMPATIBLE ---
51
+ # Ye block Groq, OpenAI, Ollama, etc. sabko handle karega
52
+ else:
53
+ if not llm_api_key and "localhost" not in (llm_base_url or ""):
54
+ print("⚠️ WARNING: No API Key provided for LLM. Trying global fallback.")
55
+ # Fallback to global keys
56
+ if settings.OPENAI_API_KEY and llm_provider == "openai":
57
+ llm_api_key = settings.OPENAI_API_KEY
58
+
59
+ print(f" -> Endpoint URL: {llm_base_url or 'Default OpenAI'}")
60
+
61
+ return ChatOpenAI(
62
+ model_name=llm_model_name,
63
+ api_key=llm_api_key or "dummy-key",
64
+ openai_api_base=llm_base_url,
65
+ temperature=0.7
66
+ )
backend/src/services/routing/semantic_router.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import numpy as np
4
+
5
+ class SemanticRouter:
6
+ _instance = None
7
+ _model = None
8
+
9
+ def __new__(cls):
10
+ if cls._instance is None:
11
+ cls._instance = super(SemanticRouter, cls).__new__(cls)
12
+ print("🧠 [Router] Loading Multilingual Embedding Model...")
13
+ # --- CHANGE IS HERE ---
14
+ # Ye model Hindi/Urdu/English sab samajhta hai
15
+ cls._model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
16
+ print("✅ [Router] Multilingual Model Loaded.")
17
+ return cls._instance
18
+
19
+ def route(self, query: str, tools_map: dict) -> str | None:
20
+ if not tools_map:
21
+ return None
22
+
23
+ tool_names = list(tools_map.keys())
24
+ descriptions = list(tools_map.values())
25
+
26
+ # Encode (Query + Descriptions)
27
+ all_texts = [query] + descriptions
28
+ embeddings = self._model.encode(all_texts)
29
+
30
+ query_vec = embeddings[0].reshape(1, -1)
31
+ tool_vecs = embeddings[1:]
32
+
33
+ # Scores Calculate karo
34
+ scores = cosine_similarity(query_vec, tool_vecs)[0]
35
+
36
+ # Debugging Print
37
+ print(f"\n📊 [Router Logic] Query: '{query}'")
38
+ for name, score in zip(tool_names, scores):
39
+ print(f" 🔹 {name}: {score:.4f}")
40
+
41
+ best_idx = np.argmax(scores)
42
+ best_score = scores[best_idx]
43
+ best_tool = tool_names[best_idx]
44
+
45
+ # --- THRESHOLD ADJUSTMENT ---
46
+ # Hinglish/Multilingual matching ke liye score thoda kam aata hai.
47
+ # Hum 0.05 rakhenge taake agar halka sa bhi match ho to pakad le.
48
+ if best_score < 0.05:
49
+ print(f"⛔ [Router] Score too low ({best_score:.4f} < 0.05). Fallback.")
50
+ return None
51
+
52
+ return best_tool
backend/src/services/security/pii_scrubber.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Tuple
3
+
4
+ class SecurityException(Exception):
5
+ """Custom exception for security violations like prompt injection."""
6
+ pass
7
+
8
+ class PIIScrubber:
9
+ # Pre-compiling Regex patterns for performance
10
+
11
+ # Email: Standard pattern
12
+ EMAIL_REGEX = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
13
+
14
+ # Phone: Matches +1-555-555-5555, (555) 555-5555, 555 555 5555
15
+ # Logic: Look for digits with common separators, length approx 10-15
16
+ PHONE_REGEX = re.compile(r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b')
17
+
18
+ # Credit Card: Matches 13-16 digits, with potential dashes or spaces
19
+ # Logic: Look for groups of 4 digits or continuous strings
20
+ CREDIT_CARD_REGEX = re.compile(r'\b(?:\d{4}[-\s]?){3}\d{4}\b|\b\d{13,16}\b')
21
+
22
+ # IPv4 Address: 0.0.0.0 to 255.255.255.255
23
+ IP_REGEX = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')
24
+
25
+ # Basic Injection Keywords (Lowercased for case-insensitive check)
26
+ INJECTION_KEYWORDS = [
27
+ "ignore all previous instructions",
28
+ "ignore previous instructions",
29
+ "system override",
30
+ "delete database",
31
+ "drop table",
32
+ "you are now",
33
+ "bypass security"
34
+ ]
35
+
36
+ @staticmethod
37
+ def scrub(text: str) -> str:
38
+ """
39
+ Sanitizes the input text by replacing PII with placeholders.
40
+ """
41
+ if not text:
42
+ return ""
43
+
44
+ # Apply redactions sequentially
45
+ scrubbed_text = text
46
+ scrubbed_text = PIIScrubber.EMAIL_REGEX.sub("[EMAIL_REDACTED]", scrubbed_text)
47
+ scrubbed_text = PIIScrubber.PHONE_REGEX.sub("[PHONE_REDACTED]", scrubbed_text)
48
+ scrubbed_text = PIIScrubber.CREDIT_CARD_REGEX.sub("[CC_REDACTED]", scrubbed_text)
49
+ scrubbed_text = PIIScrubber.IP_REGEX.sub("[IP_REDACTED]", scrubbed_text)
50
+
51
+ return scrubbed_text
52
+
53
+ @staticmethod
54
+ def check_for_injection(text: str) -> Tuple[bool, str]:
55
+ """
56
+ Checks for basic Prompt Injection attempts.
57
+ Returns: (is_safe: bool, reason: str)
58
+ """
59
+ if not text:
60
+ return True, ""
61
+
62
+ lower_text = text.lower()
63
+ for keyword in PIIScrubber.INJECTION_KEYWORDS:
64
+ if keyword in lower_text:
65
+ return False, f"Malicious keyword detected: '{keyword}'"
66
+
67
+ return True, ""
backend/src/services/tools/cms_agent.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ from langchain.agents import create_agent
4
+ from backend.src.services.llm.factory import get_llm_model
5
+ from backend.src.services.tools.cms_tool import CMSQueryTool
6
+ from typing import Optional, Dict
7
+
8
+ # --- THE CMS EXPERT PROMPT (ANTI-YAP VERSION 🤐) ---
9
+ CMS_SYSTEM_PROMPT = """You are a Sanity GROQ Query Generator.
10
+ Your goal is to query the database based on the user's request.
11
+
12
+ --- KNOWLEDGE BASE (SCHEMA) ---
13
+ {schema_map}
14
+
15
+ --- RULES (READ CAREFULLY) ---
16
+ 1. **NO EXPLANATIONS:** Do NOT say "Here is the query" or "I will search for...".
17
+ 2. **JUST THE QUERY:** Directly call the 'cms_query_tool' with the GROQ string.
18
+ 3. **USE THE SCHEMA:** Look at the schema map above. If `price` is inside `variants`, use `variants[].price`.
19
+ 4. **SYNTAX:** `*[_type == "product" && title match "Blue*"]`
20
+
21
+ --- ERROR HANDLING ---
22
+ If the query fails or returns empty, just say: "No products found matching your criteria."
23
+ Do NOT make up fake products from Amazon or other websites.
24
+
25
+ User Input: {input}
26
+ """
27
+
28
+ # --- AGENT ADAPTER ---
29
+ class AgentAdapter:
30
+ def __init__(self, agent):
31
+ self.agent = agent
32
+
33
+ async def ainvoke(self, input_dict):
34
+ # Hum input ko thoda modify karke bhejenge taake AI focus kare
35
+ user_text = input_dict.get("input", "")
36
+ # Force instruction appended to user query
37
+ strict_input = f"{user_text} (Return ONLY the GROQ query tool call. Do not explain.)"
38
+
39
+ payload = {"messages": [("user", strict_input)]}
40
+ result = await self.agent.ainvoke(payload)
41
+ last_message = result["messages"][-1]
42
+ return {"output": last_message.content}
43
+
44
+ # --- DYNAMIC AGENT FACTORY ---
45
+ def get_cms_agent(
46
+ user_id: str,
47
+ schema_map: dict,
48
+ llm_credentials: Optional[Dict[str, str]] = None
49
+ ):
50
+ # 1. Load User's LLM
51
+ llm = get_llm_model(credentials=llm_credentials)
52
+
53
+ # 2. Initialize Tool
54
+ tool = CMSQueryTool(user_id=str(user_id))
55
+ tools = [tool]
56
+
57
+ # Convert schema to string
58
+ schema_str = json.dumps(schema_map, indent=2)
59
+
60
+ # 3. Create Agent
61
+ agent_runnable = create_agent(
62
+ model=llm,
63
+ tools=tools,
64
+ system_prompt=CMS_SYSTEM_PROMPT.format(schema_map=schema_str, input="{input}")
65
+ )
66
+
67
+ return AgentAdapter(agent_runnable)
backend/src/services/tools/cms_tool.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ import ast
4
+ from typing import Type
5
+ from pydantic import BaseModel, Field
6
+ from langchain_core.tools import BaseTool
7
+ from sqlalchemy.future import select
8
+
9
+ # Imports for DB access & Connector
10
+ from backend.src.db.session import AsyncSessionLocal
11
+ from backend.src.models.integration import UserIntegration
12
+ # Ab hum Mock nahi, Real use karenge
13
+ from backend.src.services.connectors.sanity_connector import SanityConnector
14
+
15
+ class CMSQueryInput(BaseModel):
16
+ query: str = Field(..., description="The query string (GROQ/GraphQL) to execute.")
17
+
18
+ class CMSQueryTool(BaseTool):
19
+ name: str = "cms_query_tool"
20
+ description: str = """
21
+ Use this tool to fetch products, offers, or content from the CMS.
22
+ Input should be a specific query string (e.g., GROQ for Sanity).
23
+ """
24
+ args_schema: Type[BaseModel] = CMSQueryInput
25
+ user_id: str
26
+
27
+ def _run(self, query: str) -> str:
28
+ raise NotImplementedError("Use _arun for async execution")
29
+
30
+ async def _arun(self, query: str) -> str:
31
+ print(f"🛒 [CMS Tool] Processing Query: {query}")
32
+
33
+ try:
34
+ async with AsyncSessionLocal() as db:
35
+ # 1. Fetch Integration
36
+ stmt = select(UserIntegration).where(
37
+ UserIntegration.user_id == self.user_id,
38
+ UserIntegration.provider == 'sanity', # Specifically find Sanity
39
+ UserIntegration.is_active == True
40
+ )
41
+ result = await db.execute(stmt)
42
+ integration = result.scalars().first()
43
+
44
+ if not integration:
45
+ return "Error: No active Sanity integration found. Please connect first."
46
+
47
+ # 2. Decrypt & Parse Credentials
48
+ creds_dict = {}
49
+ try:
50
+ creds_str = integration.credentials
51
+ creds_dict = json.loads(creds_str)
52
+ except Exception as e:
53
+ print(f"❌ [CMS Tool] Credential parsing failed: {e}")
54
+ return "Error: Invalid Sanity credentials format in database."
55
+
56
+ # 3. Connect & Execute (FIX IS HERE)
57
+ # Pass the credentials to the connector
58
+ connector = SanityConnector(credentials=creds_dict)
59
+
60
+ if not connector.connect():
61
+ return "Error: Could not connect to Sanity. Please check your credentials."
62
+
63
+ data = connector.execute_query(query)
64
+
65
+ if not data:
66
+ return "No data found matching your query."
67
+
68
+ return json.dumps(data, indent=2)
69
+
70
+ except Exception as e:
71
+ print(f"❌ [CMS Tool] CRITICAL ERROR: {e}")
72
+ import traceback
73
+ traceback.print_exc()
74
+ return f"Error executing CMS query: {str(e)}"
backend/src/services/tools/nosql_agent.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain.agents import create_agent
3
+ from backend.src.services.llm.factory import get_llm_model
4
+ from backend.src.services.tools.nosql_tool import NoSQLQueryTool
5
+ from typing import Optional, Dict
6
+
7
+ # --- THE CONSTITUTION (Same as before) ---
8
+ NOSQL_SYSTEM_PROMPT = """You are a User Data Assistant with access to a NoSQL Database.
9
+ Your job is to retrieve user profile details and activity logs using the 'nosql_database_tool'.
10
+
11
+ --- CRITICAL RULES FOR QUERYING ---
12
+ 1. **DO NOT** include 'user_id' or '_id' in the 'query_json'.
13
+ - The tool AUTOMATICALLY applies the security filter for the current user.
14
+ - If you want to fetch the user's profile, just send an empty query: "{{}}"
15
+
16
+ 2. **DO NOT** try to select specific fields in the query_json.
17
+ - Incorrect: {{"fields": ["email"]}}
18
+ - Correct: {{}} (Fetch the whole document, then you extract the email).
19
+
20
+ 3. You are acting on behalf of User ID: {user_id}.
21
+
22
+ --- AVAILABLE COLLECTIONS ---
23
+ 1. 'users': Contains profile info (name, email, membership_tier).
24
+ 2. 'activity_logs': Contains login history and actions.
25
+
26
+ --- EXAMPLES ---
27
+ - User: "Show my profile" -> Tool Input: collection='users', query_json='{{}}'
28
+ - User: "Show my login history" -> Tool Input: collection='activity_logs', query_json='{{"action": "login"}}'
29
+ """
30
+
31
+ class AgentAdapter:
32
+ """Wrapper for V1 Agent compatibility"""
33
+ def __init__(self, agent):
34
+ self.agent = agent
35
+
36
+ async def ainvoke(self, input_dict):
37
+ user_text = input_dict.get("input", "")
38
+ payload = {"messages": [("user", user_text)]}
39
+ result = await self.agent.ainvoke(payload)
40
+ last_message = result["messages"][-1]
41
+ return {"output": last_message.content}
42
+
43
+ # --- DYNAMIC AGENT FACTORY (UPDATED) ---
44
+ def get_nosql_agent(
45
+ user_id: str,
46
+ llm_credentials: Optional[Dict[str, str]] = None # <--- Added this
47
+ ):
48
+ """
49
+ Creates a NoSQL Agent using the user's specific LLM credentials.
50
+ """
51
+ # 1. Load User's LLM
52
+ llm = get_llm_model(credentials=llm_credentials)
53
+
54
+ # 2. Initialize the tool
55
+ tool = NoSQLQueryTool(user_id=str(user_id))
56
+ tools = [tool]
57
+
58
+ # 3. Create Agent
59
+ agent_runnable = create_agent(
60
+ model=llm,
61
+ tools=tools,
62
+ system_prompt=NOSQL_SYSTEM_PROMPT.format(user_id=user_id)
63
+ )
64
+
65
+ return AgentAdapter(agent_runnable)
backend/src/services/tools/nosql_tool.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ import asyncio
4
+ from typing import Type
5
+ from pydantic import BaseModel, Field
6
+ from langchain_core.tools import BaseTool
7
+ from backend.src.services.connectors.mongo_connector import MongoConnector
8
+ from typing import Dict, Optional
9
+
10
+ # --- NoSQLQueryInput Schema (Same as before) ---
11
+ class NoSQLQueryInput(BaseModel):
12
+ collection: str = Field(..., description="The name of the collection to query (e.g., 'users', 'activity_logs').")
13
+ query_json: str = Field(..., description="A valid JSON string representing the query filter.")
14
+
15
+ class NoSQLQueryTool(BaseTool):
16
+ name: str = "nosql_database_tool"
17
+ description: str = """
18
+ Use this tool to query the NoSQL User Database.
19
+ Useful for retrieving User Profiles and Activity Logs.
20
+ """
21
+ args_schema: Type[BaseModel] = NoSQLQueryInput
22
+
23
+ # --- DYNAMIC INJECTION ---
24
+ user_id: str
25
+ db_credentials: Dict[str, str] # User's Mongo URL will come here
26
+
27
+ def _run(self, collection: str, query_json: str) -> str:
28
+ # 1. Initialize connector WITH User Credentials
29
+ # Note: Future-proofing to select connector based on provider
30
+ connector = MongoConnector(credentials=self.db_credentials)
31
+
32
+ try:
33
+ # 2. Parse Query
34
+ query_dict = json.loads(query_json.replace("'", '"'))
35
+
36
+ # 3. Security Checks (Injection & RBAC)
37
+ query_str = str(query_dict)
38
+ if "$where" in query_str or "$function" in query_str:
39
+ return "⛔ SECURITY ALERT: Malicious operators detected."
40
+
41
+ # Force user_id filter
42
+ query_dict['user_id'] = self.user_id
43
+
44
+ print(f"🔎 [NoSQL Tool] Executing Query on '{collection}': {query_dict}")
45
+
46
+ # 4. Execute
47
+ results = connector.find_many(collection, query_dict, limit=5)
48
+
49
+ if not results:
50
+ return "No records found matching your request."
51
+
52
+ return f"Found {len(results)} records:\n{json.dumps(results, indent=2, default=str)}"
53
+
54
+ except json.JSONDecodeError:
55
+ return "❌ Error: Invalid JSON query format."
56
+ except Exception as e:
57
+ return f"❌ System Error: {str(e)}"
58
+
59
+ async def _arun(self, collection: str, query_json: str):
60
+ """Async wrapper for the tool."""
61
+ return await asyncio.to_thread(self._run, collection, query_json)
backend/src/services/tools/secure_agent.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain.agents import create_agent
3
+ from backend.src.services.llm.factory import get_llm_model
4
+ from backend.src.services.tools.sql_tool import get_sql_toolkit # Updated Import
5
+ from typing import Optional, Dict
6
+
7
+ # --- PROMPTS (Same as before) ---
8
+ ADMIN_PREFIX = "You are a PostgreSQL expert... full access..."
9
+ CUSTOMER_PREFIX = """You are a SQL helper for User ID: {user_id}.
10
+ CRITICAL: For every query, you MUST add a "WHERE user_id = {user_id}" clause.
11
+ Never show data of other users.
12
+ Always present data in a clean MARKDOWN TABLE.
13
+ """
14
+
15
+ # --- AGENT ADAPTER (Same as before) ---
16
+ class AgentAdapter:
17
+ def __init__(self, agent):
18
+ self.agent = agent
19
+
20
+ async def ainvoke(self, input_dict):
21
+ user_text = input_dict.get("input", "")
22
+ payload = {"messages": [("user", user_text)]}
23
+ result = await self.agent.ainvoke(payload)
24
+ last_message = result["messages"][-1]
25
+ return {"output": last_message.content}
26
+
27
+ # --- DYNAMIC AGENT FACTORY ---
28
+ def get_secure_agent(
29
+ user_id: int,
30
+ role: str,
31
+ db_credentials: Dict[str, str],
32
+ llm_credentials: Optional[Dict[str, str]] = None
33
+ ):
34
+ """
35
+ Creates a Secure SQL Agent using the specific user's databases and LLM.
36
+ """
37
+ # 1. Load User's LLM (via factory)
38
+ llm = get_llm_model(credentials=llm_credentials)
39
+
40
+ # 2. Get User-specific SQL Toolkit
41
+ toolkit = get_sql_toolkit(db_credentials, llm_credentials)
42
+ tools = toolkit.get_tools() # Toolkit se tools nikalo
43
+
44
+ # 3. Select the right security prompt
45
+ if role == "admin":
46
+ system_prefix = ADMIN_PREFIX
47
+ else:
48
+ system_prefix = CUSTOMER_PREFIX.format(user_id=user_id)
49
+
50
+ # 4. Create the Agent (New V1 'create_agent' syntax)
51
+ agent_runnable = create_agent(
52
+ model=llm,
53
+ tools=tools,
54
+ system_prompt=system_prefix
55
+ )
56
+
57
+ return AgentAdapter(agent_runnable)
backend/src/services/tools/sql_tool.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_community.utilities import SQLDatabase
3
+ from langchain_community.agent_toolkits import SQLDatabaseToolkit
4
+ from backend.src.services.llm.factory import get_llm_model
5
+ from typing import Optional, Dict
6
+
7
+ # --- DYNAMIC FUNCTIONS ---
8
+
9
+ def get_database_connection(db_credentials: Dict[str, str]) -> SQLDatabase:
10
+ """
11
+ User ki di hui connection string se connect karta hai.
12
+ """
13
+ db_uri = db_credentials.get("url")
14
+ if not db_uri:
15
+ raise ValueError("SQL Database URL not found in user's settings.")
16
+
17
+ # --- FIX for SQLAlchemy Async Driver ---
18
+ # Ensure the URL is compatible with the synchronous SQLDatabase object
19
+ if "+asyncpg" in db_uri:
20
+ db_uri = db_uri.replace("+asyncpg", "") # Sync object needs sync driver
21
+
22
+ print(f"INFO: [SQL Tool] Connecting to user's SQL DB: {db_uri[:30]}...")
23
+
24
+ db = SQLDatabase.from_uri(
25
+ db_uri,
26
+ sample_rows_in_table_info=2 # 2 samples kafi hain
27
+ )
28
+ return db
29
+
30
+ def get_sql_toolkit(
31
+ db_credentials: Dict[str, str],
32
+ llm_credentials: Optional[Dict[str, str]] = None
33
+ ) -> SQLDatabaseToolkit:
34
+ """
35
+ User ke DB aur User ke LLM se Toolkit banata hai.
36
+ """
37
+ # 1. Connect to User's DB
38
+ db = get_database_connection(db_credentials)
39
+
40
+ # 2. Load User's LLM
41
+ llm = get_llm_model(credentials=llm_credentials)
42
+
43
+ # 3. Create Toolkit
44
+ toolkit = SQLDatabaseToolkit(db=db, llm=llm)
45
+ return toolkit
backend/src/services/vector_store/qdrant_adapter.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import qdrant_client
3
+ from qdrant_client import QdrantClient
4
+ from qdrant_client.http import models
5
+ from langchain_qdrant import QdrantVectorStore
6
+ from backend.src.core.config import settings
7
+ from backend.src.services.embeddings.factory import get_embedding_model
8
+ from typing import Optional, Dict
9
+
10
+ # @lru_cache() HATA DIYA - We can't cache user-specific connections
11
+ def get_vector_store(credentials: Optional[Dict[str, str]] = None):
12
+ """
13
+ Dynamic Vector Store Connector.
14
+ 1. Agar 'credentials' hain, to unhein use karega (User's Cloud Qdrant).
15
+ 2. Agar nahi, to global settings use karega (Fallback/Admin).
16
+ """
17
+ embedding_model = get_embedding_model() # Ye local hai, isko keys nahi chahiye
18
+
19
+ # --- DYNAMIC CONFIGURATION LOGIC ---
20
+ if credentials:
21
+ # User-specific Cloud settings
22
+ qdrant_url = credentials.get("url")
23
+ qdrant_api_key = credentials.get("api_key")
24
+ collection_name = credentials.get("collection_name", "user_default_collection")
25
+ else:
26
+ # Global fallback settings
27
+ qdrant_url = settings.QDRANT_URL
28
+ qdrant_api_key = settings.QDRANT_API_KEY
29
+ collection_name = settings.QDRANT_COLLECTION_NAME
30
+
31
+ if not qdrant_url:
32
+ raise ValueError("Qdrant URL is not configured for this user or globally.")
33
+
34
+ print(f"INFO: [VectorDB] Connecting to Qdrant at '{qdrant_url}'...")
35
+
36
+ # 1. Qdrant Client banayen (User ki keys ke sath)
37
+ client = QdrantClient(
38
+ url=qdrant_url,
39
+ api_key=qdrant_api_key,
40
+ )
41
+
42
+ # 2. CHECK: Kya Collection exist karti hai?
43
+ # Hum 'try-except' use karenge taake connection errors bhi pakde jayen
44
+ try:
45
+ # collection_exists is deprecated, use get_collection instead
46
+ client.get_collection(collection_name=collection_name)
47
+ print(f"INFO: [VectorDB] Collection '{collection_name}' already exists.")
48
+ except Exception as e:
49
+ # Agar error "Not found" hai, to collection banayenge
50
+ if "404" in str(e) or "Not found" in str(e):
51
+ print(f"INFO: Collection '{collection_name}' not found. Creating it now...")
52
+
53
+ # Embedding size pata karna
54
+ dummy_embedding = embedding_model.embed_query("test")
55
+ vector_size = len(dummy_embedding)
56
+
57
+ client.create_collection(
58
+ collection_name=collection_name,
59
+ vectors_config=models.VectorParams(
60
+ size=vector_size,
61
+ distance=models.Distance.COSINE
62
+ )
63
+ )
64
+ print(f"SUCCESS: Created collection '{collection_name}' with vector size {vector_size}.")
65
+ else:
66
+ # Koi aur error (e.g., connection refused)
67
+ raise ConnectionError(f"Failed to connect or access Qdrant: {e}")
68
+
69
+ # 3. Vector Store object bana kar return karein
70
+ vector_store = QdrantVectorStore(
71
+ client=client,
72
+ collection_name=collection_name,
73
+ embedding=embedding_model,
74
+ content_payload_key="page_content",
75
+ metadata_payload_key="metadata"
76
+ )
77
+
78
+ return vector_store
backend/src/utils/auth.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from passlib.context import CryptContext
2
+ from datetime import datetime, timedelta
3
+ from jose import jwt
4
+ from backend.src.core.config import settings
5
+
6
+ # Password Hasher (Bcrypt)
7
+ pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
8
+
9
+ # JWT Configuration
10
+ ALGORITHM = "HS256"
11
+ ACCESS_TOKEN_EXPIRE_MINUTES = 30
12
+
13
+ def verify_password(plain_password, hashed_password):
14
+ """Check karein ke user ka password sahi hai ya nahi"""
15
+ return pwd_context.verify(plain_password, hashed_password)
16
+
17
+ def get_password_hash(password):
18
+ """Password ko encrypt karein taake DB mein plain text save na ho"""
19
+ return pwd_context.hash(password)
20
+
21
+ def create_access_token(data: dict):
22
+ """User ke liye Login Token (Badge) banayein"""
23
+ to_encode = data.copy()
24
+ expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
25
+ to_encode.update({"exp": expire})
26
+
27
+ # Secret Key config se lenge (Ensure karein ke config mein ho)
28
+ secret_key = settings.SECRET_KEY
29
+ encoded_jwt = jwt.encode(to_encode, secret_key, algorithm=ALGORITHM)
30
+ return encoded_jwt
backend/src/utils/security.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cryptography.fernet import Fernet
2
+ import base64
3
+
4
+ # --- FIX: A Valid, Consistent 32-byte Base64 Key ---
5
+ # Ye key change nahi hogi, to decryption hamesha chalega.
6
+ DEFAULT_KEY = b'8_sW7x9y2z4A5b6C8d9E0f1G2h3I4j5K6l7M8n9O0pQ='
7
+
8
+ class SecurityUtils:
9
+ @staticmethod
10
+ def get_cipher():
11
+ # Production mein ye .env se aana chahiye
12
+ # Development ke liye hum hardcoded valid key use kar rahe hain
13
+ return Fernet(DEFAULT_KEY)
14
+
15
+ @staticmethod
16
+ def encrypt(data: str) -> str:
17
+ if not data: return ""
18
+ cipher = SecurityUtils.get_cipher()
19
+ return cipher.encrypt(data.encode()).decode()
20
+
21
+ @staticmethod
22
+ def decrypt(token: str) -> str:
23
+ if not token: return ""
24
+ cipher = SecurityUtils.get_cipher()
25
+ try:
26
+ return cipher.decrypt(token.encode()).decode()
27
+ except Exception as e:
28
+ print(f"🔐 Decryption Failed: {e}")
29
+ raise ValueError("Invalid Key or Corrupted Data")
dummy_cms_data.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "_id": "p1",
4
+ "_type": "product",
5
+ "title": "Classic Blue Denim Jacket",
6
+ "price": 59.99,
7
+ "inStock": true,
8
+ "colors": ["Blue", "Black"],
9
+ "description": "A timeless classic denim jacket for all seasons."
10
+ },
11
+ {
12
+ "_id": "p2",
13
+ "_type": "product",
14
+ "title": "Urban Running Shoes",
15
+ "price": 89.50,
16
+ "inStock": true,
17
+ "colors": ["White", "Grey"],
18
+ "description": "High performance running shoes with foam technology."
19
+ },
20
+ {
21
+ "_id": "p3",
22
+ "_type": "product",
23
+ "title": "Graphic Tee - Retro",
24
+ "price": 25.00,
25
+ "inStock": false,
26
+ "colors": ["Red"],
27
+ "description": "100% Cotton tee with retro print."
28
+ },
29
+ {
30
+ "_id": "offer1",
31
+ "_type": "offer",
32
+ "code": "SUMMER20",
33
+ "discount_percentage": 20,
34
+ "active": true,
35
+ "description": "Get 20% off on all summer wear."
36
+ }
37
+ ]
requirements.txt ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.12.0
2
+ aiofiles==25.1.0
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.13.2
5
+ aiosignal==1.4.0
6
+ aiosqlite==0.21.0
7
+ annotated-doc==0.0.4
8
+ annotated-types==0.7.0
9
+ antlr4-python3-runtime==4.9.3
10
+ anyio==4.12.0
11
+ argon2-cffi==25.1.0
12
+ argon2-cffi-bindings==25.1.0
13
+ asyncpg==0.31.0
14
+ attrs==25.4.0
15
+ backoff==2.2.1
16
+ bcrypt==5.0.0
17
+ beautifulsoup4==4.14.3
18
+ cachetools==6.2.2
19
+ certifi==2025.11.12
20
+ cffi==2.0.0
21
+ charset-normalizer==3.4.4
22
+ click==8.3.1
23
+ colorama==0.4.6
24
+ coloredlogs==15.0.1
25
+ contourpy==1.3.3
26
+ cryptography==46.0.3
27
+ cycler==0.12.1
28
+ dataclasses-json==0.6.7
29
+ Deprecated==1.3.1
30
+ distro==1.9.0
31
+ dnspython==2.8.0
32
+ docx2txt==0.9
33
+ ecdsa==0.19.1
34
+ effdet==0.4.1
35
+ email-validator==2.3.0
36
+ emoji==2.15.0
37
+ et_xmlfile==2.0.0
38
+ fastapi==0.124.0
39
+ filelock==3.20.0
40
+ filetype==1.2.0
41
+ flatbuffers==25.9.23
42
+ fonttools==4.61.0
43
+ frozenlist==1.8.0
44
+ fsspec==2025.12.0
45
+ google-api-core==2.28.1
46
+ google-auth==2.43.0
47
+ google-cloud-vision==3.11.0
48
+ google-genai==1.54.0
49
+ googleapis-common-protos==1.72.0
50
+ greenlet==3.3.0
51
+ groq==0.37.1
52
+ grpcio==1.76.0
53
+ grpcio-status==1.76.0
54
+ h11==0.16.0
55
+ h2==4.3.0
56
+ hpack==4.1.0
57
+ html5lib==1.1
58
+ httpcore==1.0.9
59
+ httptools==0.7.1
60
+ httpx==0.28.1
61
+ httpx-sse==0.4.3
62
+ huggingface-hub==0.36.0
63
+ humanfriendly==10.0
64
+ hyperframe==6.1.0
65
+ idna==3.11
66
+ Jinja2==3.1.6
67
+ jiter==0.12.0
68
+ joblib==1.5.2
69
+ jsonpatch==1.33
70
+ jsonpointer==3.0.0
71
+ kiwisolver==1.4.9
72
+ langchain==1.1.3
73
+ langchain-classic==1.0.0
74
+ langchain-community==0.4.1
75
+ langchain-core==1.1.3
76
+ langchain-google-genai==4.0.0
77
+ langchain-groq==1.1.0
78
+ langchain-huggingface==1.1.0
79
+ langchain-openai==1.1.1
80
+ langchain-qdrant==1.1.0
81
+ langchain-text-splitters==1.0.0
82
+ langdetect==1.0.9
83
+ langgraph==1.0.4
84
+ langgraph-checkpoint==3.0.1
85
+ langgraph-prebuilt==1.0.5
86
+ langgraph-sdk==0.2.15
87
+ langsmith==0.4.57
88
+ lxml==6.0.2
89
+ Markdown==3.10
90
+ MarkupSafe==3.0.3
91
+ marshmallow==3.26.1
92
+ matplotlib==3.10.7
93
+ ml_dtypes==0.5.4
94
+ mpmath==1.3.0
95
+ msoffcrypto-tool==5.4.2
96
+ multidict==6.7.0
97
+ mypy_extensions==1.1.0
98
+ networkx==3.6.1
99
+ nltk==3.9.2
100
+ numpy==2.2.6
101
+ olefile==0.47
102
+ omegaconf==2.3.0
103
+ onnx==1.20.0
104
+ onnxruntime==1.23.2
105
+ openai==2.9.0
106
+ opencv-python==4.12.0.88
107
+ openpyxl==3.1.5
108
+ orjson==3.11.5
109
+ ormsgpack==1.12.0
110
+ packaging==25.0
111
+ pandas==2.3.3
112
+ passlib==1.7.4
113
+ pdf2image==1.17.0
114
+ pdfminer.six==20251107
115
+ pi_heif==1.1.1
116
+ pikepdf==10.0.2
117
+ pillow==12.0.0
118
+ portalocker==3.2.0
119
+ propcache==0.4.1
120
+ proto-plus==1.26.1
121
+ protobuf==6.33.2
122
+ psutil==7.1.3
123
+ psycopg2-binary==2.9.11
124
+ pyasn1==0.6.1
125
+ pyasn1_modules==0.4.2
126
+ pycocotools==2.0.10
127
+ pycparser==2.23
128
+ pydantic==2.12.5
129
+ pydantic-settings==2.12.0
130
+ pydantic_core==2.41.5
131
+ pymongo==4.15.5
132
+ pypandoc==1.16.2
133
+ pyparsing==3.2.5
134
+ pypdf==6.4.1
135
+ pypdfium2==5.1.0
136
+ pyreadline3==3.5.4
137
+ python-dateutil==2.9.0.post0
138
+ python-docx==1.2.0
139
+ python-dotenv==1.2.1
140
+ python-iso639==2025.11.16
141
+ python-jose==3.5.0
142
+ python-magic==0.4.27
143
+ python-multipart==0.0.20
144
+ python-oxmsg==0.0.2
145
+ python-pptx==1.0.2
146
+ pytz==2025.2
147
+ pywin32==311
148
+ PyYAML==6.0.3
149
+ qdrant-client==1.16.1
150
+ RapidFuzz==3.14.3
151
+ regex==2025.11.3
152
+ requests==2.32.5
153
+ requests-toolbelt==1.0.0
154
+ rsa==4.9.1
155
+ safetensors==0.7.0
156
+ scikit-learn==1.7.2
157
+ scipy==1.16.3
158
+ sentence-transformers==5.1.2
159
+ setuptools==80.9.0
160
+ six==1.17.0
161
+ sniffio==1.3.1
162
+ soupsieve==2.8
163
+ SQLAlchemy==2.0.45
164
+ starlette==0.50.0
165
+ sympy==1.14.0
166
+ tenacity==9.1.2
167
+ threadpoolctl==3.6.0
168
+ tiktoken==0.12.0
169
+ timm==1.0.22
170
+ tokenizers==0.22.1
171
+ torch==2.9.1
172
+ torchvision==0.24.1
173
+ tqdm==4.67.1
174
+ transformers==4.57.3
175
+ typing-inspect==0.9.0
176
+ typing-inspection==0.4.2
177
+ typing_extensions==4.15.0
178
+ tzdata==2025.2
179
+ unstructured==0.18.21
180
+ unstructured-client==0.42.4
181
+ unstructured.pytesseract==0.3.15
182
+ unstructured_inference==1.1.2
183
+ urllib3==2.6.1
184
+ uuid_utils==0.12.0
185
+ uvicorn==0.38.0
186
+ watchfiles==1.1.1
187
+ webencodings==0.5.1
188
+ websockets==15.0.1
189
+ wrapt==2.0.1
190
+ xlrd==2.0.2
191
+ xlsxwriter==3.2.9
192
+ xxhash==3.6.0
193
+ yarl==1.22.0
194
+ zstandard==0.25.0
static/widget.js ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ (function() {
2
+ // ----------------------------------------------------
3
+ // 1. CONFIGURATION: Script Tag se values uthana
4
+ // ----------------------------------------------------
5
+ const scriptTag = document.currentScript;
6
+
7
+ const USER_ID = scriptTag.getAttribute("data-user-id");
8
+ const API_URL = scriptTag.getAttribute("data-api-url");
9
+ const THEME_COLOR = scriptTag.getAttribute("data-theme-color") || "#007bff";
10
+
11
+ if (!USER_ID || !API_URL) {
12
+ console.error("OmniAgent Widget Error: data-user-id or data-api-url is missing!");
13
+ return;
14
+ }
15
+
16
+ // Modern way to generate unique ID (Fixing substr deprecated warning)
17
+ const CHAT_SESSION_ID = "omni_session_" + Math.random().toString(36).slice(2, 11);
18
+
19
+ // ----------------------------------------------------
20
+ // 2. STYLES: UI Design aur Position
21
+ // ----------------------------------------------------
22
+ const style = document.createElement('style');
23
+ style.innerHTML = `
24
+ #omni-widget-container {
25
+ position: fixed; bottom: 20px; right: 20px; z-index: 9999; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
26
+ transition: all 0.3s;
27
+ }
28
+ #omni-chat-btn {
29
+ background: ${THEME_COLOR}; color: white; border: none; padding: 15px; border-radius: 50%;
30
+ cursor: pointer; box-shadow: 0 4px 12px rgba(0,0,0,0.4); width: 60px; height: 60px; font-size: 24px;
31
+ display: flex; align-items: center; justify-content: center;
32
+ }
33
+ #omni-chat-window {
34
+ display: none; width: 350px; height: 500px; background: white; border-radius: 10px;
35
+ box-shadow: 0 10px 30px rgba(0,0,0,0.5); flex-direction: column; overflow: hidden;
36
+ margin-bottom: 15px; transform-origin: bottom right; animation: fadeIn 0.3s ease-out;
37
+ }
38
+ #omni-header {
39
+ background: ${THEME_COLOR}; color: white; padding: 15px; font-weight: 600; display: flex;
40
+ justify-content: space-between; align-items: center; border-radius: 10px 10px 0 0;
41
+ }
42
+ #omni-messages { flex: 1; padding: 10px; overflow-y: auto; background: #f0f0f0; }
43
+ #omni-input-area { display: flex; border-top: 1px solid #ddd; }
44
+ #omni-input { flex: 1; padding: 12px; border: none; outline: none; font-size: 14px; }
45
+ #omni-send { background: white; border: none; color: ${THEME_COLOR}; font-weight: bold; cursor: pointer; padding: 0 15px; font-size: 18px; }
46
+ .omni-msg { margin: 8px 0; padding: 10px 15px; border-radius: 15px; max-width: 80%; font-size: 14px; line-height: 1.4; }
47
+ .omni-msg.user { background: ${THEME_COLOR}; color: white; margin-left: auto; border-bottom-right-radius: 2px; }
48
+ .omni-msg.bot { background: #e8e8e8; color: #333; margin-right: auto; border-bottom-left-radius: 2px; }
49
+
50
+ @keyframes fadeIn { from { opacity: 0; transform: scale(0.9); } to { opacity: 1; transform: scale(1); } }
51
+ `;
52
+ document.head.appendChild(style);
53
+
54
+ // ----------------------------------------------------
55
+ // 3. HTML Structure Banao
56
+ // ----------------------------------------------------
57
+ const container = document.createElement('div');
58
+ container.id = 'omni-widget-container';
59
+
60
+ const chatWindow = document.createElement('div');
61
+ chatWindow.id = 'omni-chat-window';
62
+ chatWindow.innerHTML = `
63
+ <div id="omni-header">
64
+ <span>Customer Support</span>
65
+ <span style="cursor:pointer; font-size: 18px;" onclick="window.toggleOmniChat()">—</span>
66
+ </div>
67
+ <div id="omni-messages"></div>
68
+ <div id="omni-input-area">
69
+ <input type="text" id="omni-input" placeholder="Type your query..." />
70
+ <button id="omni-send">➤</button>
71
+ </div>
72
+ `;
73
+
74
+ const chatBtn = document.createElement('button');
75
+ chatBtn.id = 'omni-chat-btn';
76
+ chatBtn.innerHTML = '💬';
77
+
78
+ // onClick ko addEventListener se theek kiya
79
+ chatBtn.addEventListener('click', toggleOmniChat);
80
+
81
+ container.appendChild(chatWindow);
82
+ container.appendChild(chatBtn);
83
+ document.body.appendChild(container);
84
+
85
+ // ----------------------------------------------------
86
+ // 4. Logic Functions (Modern Event Listeners)
87
+ // ----------------------------------------------------
88
+
89
+ const inputField = document.getElementById('omni-input');
90
+ const sendButton = document.getElementById('omni-send');
91
+
92
+ window.toggleOmniChat = function() {
93
+ const win = document.getElementById('omni-chat-window');
94
+ const isVisible = win.style.display === 'flex';
95
+ win.style.display = isVisible ? 'none' : 'flex';
96
+ if (!isVisible) {
97
+ inputField.focus();
98
+ }
99
+ };
100
+
101
+ function addMessage(text, sender) {
102
+ const msgs = document.getElementById('omni-messages');
103
+ const div = document.createElement('div');
104
+ div.className = `omni-msg ${sender}`;
105
+ div.innerHTML = text.replace(/(https?:\/\/[^\s]+)/g, '<a href="$1" target="_blank" style="color:white; text-decoration:underline;">$1</a>');
106
+ msgs.appendChild(div);
107
+ msgs.scrollTop = msgs.scrollHeight;
108
+ }
109
+
110
+ async function sendMessage() {
111
+ const originalBtnText = sendButton.innerHTML;
112
+ const text = inputField.value.trim();
113
+ if (!text) return;
114
+
115
+ addMessage(text, 'user');
116
+ inputField.value = '';
117
+ inputField.disabled = true;
118
+ sendButton.innerHTML = '...';
119
+ sendButton.disabled = true;
120
+
121
+ try {
122
+ // Backend API Call
123
+ const response = await fetch(`${API_URL}/api/v1/chat`, {
124
+ method: 'POST',
125
+ headers: { 'Content-Type': 'application/json' },
126
+ body: JSON.stringify({
127
+ message: text,
128
+ session_id: CHAT_SESSION_ID,
129
+ user_id: USER_ID
130
+ })
131
+ });
132
+ const data = await response.json();
133
+ addMessage(data.response, 'bot');
134
+ } catch (error) {
135
+ addMessage("Error: Could not connect to the Agent.", 'bot');
136
+ console.error("OmniAgent API Error:", error);
137
+ } finally {
138
+ inputField.disabled = false;
139
+ sendButton.innerHTML = originalBtnText;
140
+ sendButton.disabled = false;
141
+ inputField.focus();
142
+ }
143
+ }
144
+
145
+ // Modern Event Listeners (Fixing deprecated 'onkeypress')
146
+ sendButton.addEventListener('click', sendMessage);
147
+ inputField.addEventListener('keypress', (e) => {
148
+ if(e.key === 'Enter') {
149
+ sendMessage();
150
+ e.preventDefault(); // Enter key ka default action roko
151
+ }
152
+ });
153
+ })();