Spaces:
Sleeping
Sleeping
USAMA BHATTI commited on
Commit ·
370480b
0
Parent(s):
Adding local files to new repository
Browse files- .dockerignore +28 -0
- .gitignore +27 -0
- Dockerfile +32 -0
- Procfile +1 -0
- backend/src/api/routes/auth.py +67 -0
- backend/src/api/routes/chat.py +50 -0
- backend/src/api/routes/deps.py +45 -0
- backend/src/api/routes/ingestion.py +148 -0
- backend/src/api/routes/settings.py +299 -0
- backend/src/core/config.py +76 -0
- backend/src/db/base.py +5 -0
- backend/src/db/session.py +36 -0
- backend/src/init_db.py +27 -0
- backend/src/main.py +53 -0
- backend/src/models/chat.py +17 -0
- backend/src/models/ingestion.py +40 -0
- backend/src/models/integration.py +34 -0
- backend/src/models/user.py +19 -0
- backend/src/schemas/chat.py +15 -0
- backend/src/services/chat_service.py +598 -0
- backend/src/services/connectors/base.py +36 -0
- backend/src/services/connectors/cms_base.py +30 -0
- backend/src/services/connectors/mongo_connector.py +85 -0
- backend/src/services/connectors/sanity_connector.py +133 -0
- backend/src/services/embeddings/factory.py +48 -0
- backend/src/services/ingestion/crawler.py +169 -0
- backend/src/services/ingestion/file_processor.py +94 -0
- backend/src/services/ingestion/guardrail_factory.py +28 -0
- backend/src/services/ingestion/web_processor.py +53 -0
- backend/src/services/ingestion/zip_processor.py +132 -0
- backend/src/services/llm/factory.py +66 -0
- backend/src/services/routing/semantic_router.py +52 -0
- backend/src/services/security/pii_scrubber.py +67 -0
- backend/src/services/tools/cms_agent.py +67 -0
- backend/src/services/tools/cms_tool.py +74 -0
- backend/src/services/tools/nosql_agent.py +65 -0
- backend/src/services/tools/nosql_tool.py +61 -0
- backend/src/services/tools/secure_agent.py +57 -0
- backend/src/services/tools/sql_tool.py +45 -0
- backend/src/services/vector_store/qdrant_adapter.py +78 -0
- backend/src/utils/auth.py +30 -0
- backend/src/utils/security.py +29 -0
- dummy_cms_data.json +37 -0
- requirements.txt +194 -0
- static/widget.js +153 -0
.dockerignore
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
.venv/
|
| 10 |
+
pip-log.txt
|
| 11 |
+
pip-delete-this-directory.txt
|
| 12 |
+
|
| 13 |
+
# Git
|
| 14 |
+
.git
|
| 15 |
+
.gitignore
|
| 16 |
+
|
| 17 |
+
# OS
|
| 18 |
+
.DS_Store
|
| 19 |
+
Thumbs.db
|
| 20 |
+
|
| 21 |
+
# Logs & Temp
|
| 22 |
+
*.log
|
| 23 |
+
uploaded_files/
|
| 24 |
+
temp_unzip_*/
|
| 25 |
+
|
| 26 |
+
# Local DBs (Don't copy local DBs into image, use volumes instead)
|
| 27 |
+
omni_agent.db
|
| 28 |
+
fake_ecommerce.db
|
.gitignore
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- Security (Inhein kabhi upload mat karna) ---
|
| 2 |
+
.env
|
| 3 |
+
.env.local
|
| 4 |
+
|
| 5 |
+
# --- Python Garbage ---
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.pyc
|
| 8 |
+
*.pyo
|
| 9 |
+
*.pyd
|
| 10 |
+
|
| 11 |
+
# --- Virtual Environment (Heavy folders) ---
|
| 12 |
+
venv/
|
| 13 |
+
env/
|
| 14 |
+
.venv/
|
| 15 |
+
|
| 16 |
+
# --- Local Databases (Railway par naya banega) ---
|
| 17 |
+
omni_agent.db
|
| 18 |
+
fake_ecommerce.db
|
| 19 |
+
*.sqlite3
|
| 20 |
+
|
| 21 |
+
# --- OS Junk ---
|
| 22 |
+
.DS_Store
|
| 23 |
+
Thumbs.db
|
| 24 |
+
|
| 25 |
+
# --- Logs ---
|
| 26 |
+
*.log
|
| 27 |
+
uploaded_files/
|
Dockerfile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 1. Base Image (Lightweight Python)
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# 2. Set Environment Variables
|
| 5 |
+
# Prevents Python from writing pyc files to disc
|
| 6 |
+
ENV PYTHONDONTWRITEBYTECODE 1
|
| 7 |
+
# Prevents Python from buffering stdout and stderr (logs show up immediately)
|
| 8 |
+
ENV PYTHONUNBUFFERED 1
|
| 9 |
+
|
| 10 |
+
# 3. Install System Dependencies
|
| 11 |
+
# 'build-essential' is often needed for compiling python packages like numpy/cryptography
|
| 12 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 13 |
+
build-essential \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# 4. Set Work Directory
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
# 5. Install Dependencies (Layer Caching Strategy)
|
| 20 |
+
# We copy requirements FIRST. If requirements don't change, Docker uses cached layer here.
|
| 21 |
+
COPY requirements.txt .
|
| 22 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 23 |
+
|
| 24 |
+
# 6. Copy Application Code
|
| 25 |
+
COPY . .
|
| 26 |
+
|
| 27 |
+
# 7. Expose Port
|
| 28 |
+
EXPOSE 8000
|
| 29 |
+
|
| 30 |
+
# 8. Run Command
|
| 31 |
+
# We use host 0.0.0.0 so it is accessible outside the container
|
| 32 |
+
CMD ["uvicorn", "backend.src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: uvicorn backend.src.main:app --host 0.0.0.0 --port $PORT
|
backend/src/api/routes/auth.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
| 2 |
+
from fastapi.security import OAuth2PasswordRequestForm
|
| 3 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 4 |
+
from sqlalchemy.future import select
|
| 5 |
+
from pydantic import BaseModel, EmailStr
|
| 6 |
+
|
| 7 |
+
from backend.src.db.session import get_db
|
| 8 |
+
from backend.src.models.user import User
|
| 9 |
+
from backend.src.utils.auth import get_password_hash, verify_password, create_access_token
|
| 10 |
+
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
|
| 13 |
+
# --- Schemas ---
|
| 14 |
+
class UserCreate(BaseModel):
|
| 15 |
+
email: EmailStr
|
| 16 |
+
password: str
|
| 17 |
+
full_name: str | None = None
|
| 18 |
+
|
| 19 |
+
class Token(BaseModel):
|
| 20 |
+
access_token: str
|
| 21 |
+
token_type: str
|
| 22 |
+
|
| 23 |
+
# --- 1. Registration Endpoint ---
|
| 24 |
+
@router.post("/auth/register", response_model=Token)
|
| 25 |
+
async def register(user_in: UserCreate, db: AsyncSession = Depends(get_db)):
|
| 26 |
+
# Check agar email pehle se exist karta hai
|
| 27 |
+
result = await db.execute(select(User).where(User.email == user_in.email))
|
| 28 |
+
existing_user = result.scalars().first()
|
| 29 |
+
|
| 30 |
+
if existing_user:
|
| 31 |
+
raise HTTPException(
|
| 32 |
+
status_code=400,
|
| 33 |
+
detail="Email already registered"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Naya User Banao
|
| 37 |
+
new_user = User(
|
| 38 |
+
email=user_in.email,
|
| 39 |
+
hashed_password=get_password_hash(user_in.password),
|
| 40 |
+
full_name=user_in.full_name
|
| 41 |
+
)
|
| 42 |
+
db.add(new_user)
|
| 43 |
+
await db.commit()
|
| 44 |
+
await db.refresh(new_user)
|
| 45 |
+
|
| 46 |
+
# Direct Login Token do
|
| 47 |
+
access_token = create_access_token(data={"sub": str(new_user.id)})
|
| 48 |
+
return {"access_token": access_token, "token_type": "bearer"}
|
| 49 |
+
|
| 50 |
+
# --- 2. Login Endpoint ---
|
| 51 |
+
@router.post("/auth/login", response_model=Token)
|
| 52 |
+
async def login(form_data: OAuth2PasswordRequestForm = Depends(), db: AsyncSession = Depends(get_db)):
|
| 53 |
+
# User dhoondo
|
| 54 |
+
result = await db.execute(select(User).where(User.email == form_data.username)) # OAuth2 form mein email 'username' field mein hota hai
|
| 55 |
+
user = result.scalars().first()
|
| 56 |
+
|
| 57 |
+
# Password check karo
|
| 58 |
+
if not user or not verify_password(form_data.password, user.hashed_password):
|
| 59 |
+
raise HTTPException(
|
| 60 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 61 |
+
detail="Incorrect email or password",
|
| 62 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Token generate karo
|
| 66 |
+
access_token = create_access_token(data={"sub": str(user.id)})
|
| 67 |
+
return {"access_token": access_token, "token_type": "bearer"}
|
backend/src/api/routes/chat.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 4 |
+
from backend.src.db.session import get_db
|
| 5 |
+
from backend.src.schemas.chat import ChatRequest, ChatResponse
|
| 6 |
+
from backend.src.services.chat_service import process_chat
|
| 7 |
+
from backend.src.core.config import settings
|
| 8 |
+
|
| 9 |
+
# --- Security Imports ---
|
| 10 |
+
from backend.src.api.routes.deps import get_current_user
|
| 11 |
+
from backend.src.models.user import User
|
| 12 |
+
|
| 13 |
+
router = APIRouter()
|
| 14 |
+
|
| 15 |
+
@router.post("/chat", response_model=ChatResponse)
|
| 16 |
+
async def chat_endpoint(
|
| 17 |
+
request: ChatRequest,
|
| 18 |
+
db: AsyncSession = Depends(get_db),
|
| 19 |
+
current_user: User = Depends(get_current_user) # <-- User Logged in hai
|
| 20 |
+
):
|
| 21 |
+
"""
|
| 22 |
+
Protected Chat Endpoint.
|
| 23 |
+
Only accessible with a valid JWT Token.
|
| 24 |
+
"""
|
| 25 |
+
try:
|
| 26 |
+
# User ki ID token se aayegi (Secure)
|
| 27 |
+
# Session ID user maintain kar sakta hai taake alag-alag chats yaad rahein
|
| 28 |
+
user_id = str(current_user.id)
|
| 29 |
+
session_id = request.session_id or user_id # Fallback
|
| 30 |
+
|
| 31 |
+
# --- FIX IS HERE: 'user_id' pass kiya ja raha hai ---
|
| 32 |
+
response_text = await process_chat(
|
| 33 |
+
message=request.message,
|
| 34 |
+
session_id=session_id,
|
| 35 |
+
user_id=user_id, # <--- Ye hum bhool gaye thay
|
| 36 |
+
db=db
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
return ChatResponse(
|
| 40 |
+
response=response_text,
|
| 41 |
+
session_id=session_id,
|
| 42 |
+
# 'provider' ab chat_service se aayega, humein yahan hardcode nahi karna
|
| 43 |
+
provider="omni_agent"
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"Error in chat endpoint: {e}")
|
| 48 |
+
import traceback
|
| 49 |
+
traceback.print_exc() # Poora error print karega
|
| 50 |
+
raise HTTPException(status_code=500, detail=str(e))
|
backend/src/api/routes/deps.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import Depends, HTTPException, status
|
| 2 |
+
from fastapi.security import OAuth2PasswordBearer
|
| 3 |
+
from jose import jwt, JWTError
|
| 4 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 5 |
+
from sqlalchemy.future import select
|
| 6 |
+
|
| 7 |
+
from backend.src.core.config import settings
|
| 8 |
+
from backend.src.db.session import get_db
|
| 9 |
+
from backend.src.models.user import User
|
| 10 |
+
from backend.src.utils.auth import ALGORITHM
|
| 11 |
+
|
| 12 |
+
# Ye Swagger UI ko batata hai ke Token kahan se lena hai (/auth/login se)
|
| 13 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl=f"{settings.API_V1_STR}/auth/login")
|
| 14 |
+
|
| 15 |
+
async def get_current_user(
|
| 16 |
+
token: str = Depends(oauth2_scheme),
|
| 17 |
+
db: AsyncSession = Depends(get_db)
|
| 18 |
+
) -> User:
|
| 19 |
+
"""
|
| 20 |
+
Ye function har protected route se pehle chalega.
|
| 21 |
+
Ye Token ko verify karega aur Database se User nikal kar dega.
|
| 22 |
+
"""
|
| 23 |
+
credentials_exception = HTTPException(
|
| 24 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 25 |
+
detail="Could not validate credentials",
|
| 26 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
# Token Decode karo
|
| 31 |
+
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[ALGORITHM])
|
| 32 |
+
user_id: str = payload.get("sub")
|
| 33 |
+
if user_id is None:
|
| 34 |
+
raise credentials_exception
|
| 35 |
+
except JWTError:
|
| 36 |
+
raise credentials_exception
|
| 37 |
+
|
| 38 |
+
# Database mein User check karo
|
| 39 |
+
result = await db.execute(select(User).where(User.id == int(user_id)))
|
| 40 |
+
user = result.scalars().first()
|
| 41 |
+
|
| 42 |
+
if user is None:
|
| 43 |
+
raise credentials_exception
|
| 44 |
+
|
| 45 |
+
return user
|
backend/src/api/routes/ingestion.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
from fastapi import APIRouter, UploadFile, File, HTTPException, Form, BackgroundTasks, Depends
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 7 |
+
from sqlalchemy.future import select
|
| 8 |
+
|
| 9 |
+
# --- Security Imports ---
|
| 10 |
+
from backend.src.api.routes.deps import get_current_user
|
| 11 |
+
from backend.src.models.user import User
|
| 12 |
+
|
| 13 |
+
# --- Internal Services & DB Imports ---
|
| 14 |
+
from backend.src.services.ingestion.file_processor import process_file
|
| 15 |
+
from backend.src.services.ingestion.crawler import SmartCrawler
|
| 16 |
+
from backend.src.services.ingestion.zip_processor import SmartZipProcessor
|
| 17 |
+
from backend.src.db.session import get_db, AsyncSessionLocal
|
| 18 |
+
from backend.src.models.ingestion import IngestionJob, JobStatus, IngestionType
|
| 19 |
+
|
| 20 |
+
# --- CONFIG ---
|
| 21 |
+
MAX_ZIP_SIZE_MB = 100
|
| 22 |
+
MAX_ZIP_SIZE_BYTES = MAX_ZIP_SIZE_MB * 1024 * 1024
|
| 23 |
+
|
| 24 |
+
router = APIRouter()
|
| 25 |
+
UPLOAD_DIRECTORY = "./uploaded_files"
|
| 26 |
+
|
| 27 |
+
# ==========================================
|
| 28 |
+
# FILE UPLOAD (Protected)
|
| 29 |
+
# ==========================================
|
| 30 |
+
@router.post("/ingest/upload")
|
| 31 |
+
async def upload_and_process_file(
|
| 32 |
+
session_id: str = Form(...),
|
| 33 |
+
file: UploadFile = File(...),
|
| 34 |
+
current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
|
| 35 |
+
):
|
| 36 |
+
# (Function logic same rahegi, bas ab current_user mil jayega)
|
| 37 |
+
if not os.path.exists(UPLOAD_DIRECTORY):
|
| 38 |
+
os.makedirs(UPLOAD_DIRECTORY)
|
| 39 |
+
|
| 40 |
+
file_path = os.path.join(UPLOAD_DIRECTORY, file.filename)
|
| 41 |
+
try:
|
| 42 |
+
with open(file_path, "wb") as buffer:
|
| 43 |
+
shutil.copyfileobj(file.file, buffer)
|
| 44 |
+
|
| 45 |
+
chunks_added = await process_file(file_path, session_id)
|
| 46 |
+
if chunks_added <= 0:
|
| 47 |
+
raise HTTPException(status_code=400, detail="Could not process file.")
|
| 48 |
+
|
| 49 |
+
return {
|
| 50 |
+
"message": "File processed successfully",
|
| 51 |
+
"filename": file.filename,
|
| 52 |
+
"chunks_added": chunks_added,
|
| 53 |
+
"session_id": session_id
|
| 54 |
+
}
|
| 55 |
+
except Exception as e:
|
| 56 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 57 |
+
finally:
|
| 58 |
+
if os.path.exists(file_path):
|
| 59 |
+
os.remove(file_path)
|
| 60 |
+
|
| 61 |
+
# ==========================================
|
| 62 |
+
# WEB CRAWLER (Protected)
|
| 63 |
+
# ==========================================
|
| 64 |
+
class WebIngestRequest(BaseModel):
|
| 65 |
+
url: str
|
| 66 |
+
session_id: str
|
| 67 |
+
crawl_type: str = "single_page"
|
| 68 |
+
|
| 69 |
+
async def run_crawler_task(job_id, url, session_id, crawl_type, db_factory):
|
| 70 |
+
async with db_factory() as db:
|
| 71 |
+
crawler = SmartCrawler(job_id, url, session_id, crawl_type, db)
|
| 72 |
+
await crawler.start()
|
| 73 |
+
|
| 74 |
+
@router.post("/ingest/url")
|
| 75 |
+
async def start_web_ingestion(
|
| 76 |
+
request: WebIngestRequest,
|
| 77 |
+
background_tasks: BackgroundTasks,
|
| 78 |
+
db: AsyncSession = Depends(get_db),
|
| 79 |
+
current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
|
| 80 |
+
):
|
| 81 |
+
# (Function logic same rahegi)
|
| 82 |
+
new_job = IngestionJob(
|
| 83 |
+
session_id=request.session_id,
|
| 84 |
+
ingestion_type=IngestionType.URL,
|
| 85 |
+
source_name=request.url,
|
| 86 |
+
status=JobStatus.PENDING
|
| 87 |
+
)
|
| 88 |
+
db.add(new_job)
|
| 89 |
+
await db.commit()
|
| 90 |
+
await db.refresh(new_job)
|
| 91 |
+
|
| 92 |
+
background_tasks.add_task(run_crawler_task, new_job.id, request.url, request.session_id, request.crawl_type, AsyncSessionLocal)
|
| 93 |
+
return {"message": "Ingestion job started", "job_id": new_job.id}
|
| 94 |
+
|
| 95 |
+
@router.get("/ingest/status/{job_id}")
|
| 96 |
+
async def check_job_status(
|
| 97 |
+
job_id: int,
|
| 98 |
+
db: AsyncSession = Depends(get_db),
|
| 99 |
+
current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
|
| 100 |
+
):
|
| 101 |
+
# (Function logic same rahegi)
|
| 102 |
+
result = await db.execute(select(IngestionJob).where(IngestionJob.id == job_id))
|
| 103 |
+
job = result.scalars().first()
|
| 104 |
+
if not job:
|
| 105 |
+
raise HTTPException(status_code=404, detail="Job not found")
|
| 106 |
+
return job
|
| 107 |
+
|
| 108 |
+
# ==========================================
|
| 109 |
+
# BULK ZIP UPLOAD (Protected)
|
| 110 |
+
# ==========================================
|
| 111 |
+
async def run_zip_task(job_id, zip_path, session_id, db_factory):
|
| 112 |
+
async with db_factory() as db:
|
| 113 |
+
processor = SmartZipProcessor(job_id, zip_path, session_id, db)
|
| 114 |
+
await processor.start()
|
| 115 |
+
|
| 116 |
+
@router.post("/ingest/upload-zip")
|
| 117 |
+
async def upload_and_process_zip(
|
| 118 |
+
session_id: str = Form(...),
|
| 119 |
+
file: UploadFile = File(...),
|
| 120 |
+
background_tasks: BackgroundTasks = BackgroundTasks(),
|
| 121 |
+
db: AsyncSession = Depends(get_db),
|
| 122 |
+
current_user: User = Depends(get_current_user) # <--- 🔒 TALA LAGA DIYA
|
| 123 |
+
):
|
| 124 |
+
# (Function logic same rahegi)
|
| 125 |
+
if not file.filename.endswith(".zip"):
|
| 126 |
+
raise HTTPException(status_code=400, detail="Only .zip files are allowed.")
|
| 127 |
+
if file.size > MAX_ZIP_SIZE_BYTES:
|
| 128 |
+
raise HTTPException(status_code=413, detail=f"File too large. Max size is {MAX_ZIP_SIZE_MB} MB.")
|
| 129 |
+
|
| 130 |
+
zip_dir = os.path.join(UPLOAD_DIRECTORY, "zips")
|
| 131 |
+
os.makedirs(zip_dir, exist_ok=True)
|
| 132 |
+
file_path = os.path.join(zip_dir, f"job_{session_id}_{file.filename}")
|
| 133 |
+
|
| 134 |
+
with open(file_path, "wb") as buffer:
|
| 135 |
+
shutil.copyfileobj(file.file, buffer)
|
| 136 |
+
|
| 137 |
+
new_job = IngestionJob(
|
| 138 |
+
session_id=session_id,
|
| 139 |
+
ingestion_type=IngestionType.ZIP,
|
| 140 |
+
source_name=file.filename,
|
| 141 |
+
status=JobStatus.PENDING
|
| 142 |
+
)
|
| 143 |
+
db.add(new_job)
|
| 144 |
+
await db.commit()
|
| 145 |
+
await db.refresh(new_job)
|
| 146 |
+
|
| 147 |
+
background_tasks.add_task(run_zip_task, new_job.id, file_path, session_id, AsyncSessionLocal)
|
| 148 |
+
return {"message": "Zip processing started", "job_id": new_job.id}
|
backend/src/api/routes/settings.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
| 4 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 5 |
+
from sqlalchemy.future import select
|
| 6 |
+
from sqlalchemy import create_engine, inspect
|
| 7 |
+
from pymongo import MongoClient
|
| 8 |
+
from pydantic import BaseModel
|
| 9 |
+
from typing import Dict, List, Any, Tuple
|
| 10 |
+
|
| 11 |
+
# --- Internal Imports ---
|
| 12 |
+
from backend.src.db.session import get_db
|
| 13 |
+
from backend.src.models.user import User
|
| 14 |
+
from backend.src.models.integration import UserIntegration
|
| 15 |
+
from backend.src.api.routes.deps import get_current_user
|
| 16 |
+
|
| 17 |
+
# --- Connectors ---
|
| 18 |
+
from backend.src.services.connectors.sanity_connector import SanityConnector
|
| 19 |
+
|
| 20 |
+
# --- AI & LLM ---
|
| 21 |
+
from backend.src.services.llm.factory import get_llm_model
|
| 22 |
+
from langchain_core.messages import HumanMessage
|
| 23 |
+
|
| 24 |
+
router = APIRouter()
|
| 25 |
+
|
| 26 |
+
# ==========================================
|
| 27 |
+
# DATA MODELS
|
| 28 |
+
# ==========================================
|
| 29 |
+
class IntegrationUpdateRequest(BaseModel):
|
| 30 |
+
provider: str
|
| 31 |
+
credentials: Dict[str, Any]
|
| 32 |
+
|
| 33 |
+
class RefreshSchemaRequest(BaseModel):
|
| 34 |
+
provider: str
|
| 35 |
+
|
| 36 |
+
class ConnectedServiceResponse(BaseModel):
|
| 37 |
+
provider: str
|
| 38 |
+
is_active: bool
|
| 39 |
+
description: str | None = None
|
| 40 |
+
last_updated: str | None = None
|
| 41 |
+
|
| 42 |
+
class UserSettingsResponse(BaseModel):
|
| 43 |
+
user_email: str
|
| 44 |
+
connected_services: List[ConnectedServiceResponse]
|
| 45 |
+
|
| 46 |
+
# --- NEW: Bot Profile Model ---
|
| 47 |
+
class BotSettingsRequest(BaseModel):
|
| 48 |
+
bot_name: str
|
| 49 |
+
bot_instruction: str
|
| 50 |
+
|
| 51 |
+
# ==========================================
|
| 52 |
+
# THE DYNAMIC PROFILER (No Bias) 🧠
|
| 53 |
+
# ==========================================
|
| 54 |
+
|
| 55 |
+
async def generate_data_profile(schema_map: dict, provider: str) -> str:
|
| 56 |
+
"""
|
| 57 |
+
Ye function bina kisi bias ke, sirf data structure dekh kar keywords nikalta hai.
|
| 58 |
+
"""
|
| 59 |
+
try:
|
| 60 |
+
if not schema_map: return f"Connected to {provider}."
|
| 61 |
+
|
| 62 |
+
llm = get_llm_model()
|
| 63 |
+
schema_str = json.dumps(schema_map)[:3500]
|
| 64 |
+
|
| 65 |
+
prompt = f"""
|
| 66 |
+
Act as a Database Architect. Your job is to analyze the provided Database Schema and generate a 'Semantic Description' for an AI Router.
|
| 67 |
+
|
| 68 |
+
--- INPUT SCHEMA ({provider}) ---
|
| 69 |
+
{schema_str}
|
| 70 |
+
|
| 71 |
+
--- INSTRUCTIONS ---
|
| 72 |
+
1. Analyze the Table Names (or Collections/Types) and Field Names deeply.
|
| 73 |
+
2. Identify the core "Business Concepts" represented in this data.
|
| 74 |
+
3. Construct a dense, keyword-rich summary that describes EXACTLY what is in this database.
|
| 75 |
+
4. **STRICT RULE:** Do NOT use generic words like "solution" or "platform". Use specific nouns found in the schema (e.g., "invoices", "appointments", "inventory", "cement", "users").
|
| 76 |
+
5. Do NOT guess. Only describe what you see in the schema keys.
|
| 77 |
+
|
| 78 |
+
--- OUTPUT FORMAT ---
|
| 79 |
+
Write a single paragraph (approx 30 words) describing the data contents.
|
| 80 |
+
Description:
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
response = await llm.ainvoke([HumanMessage(content=prompt)])
|
| 84 |
+
return response.content.strip()
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"⚠️ Profiling failed: {e}")
|
| 87 |
+
return f"Contains data from {provider}."
|
| 88 |
+
|
| 89 |
+
async def perform_discovery(provider: str, credentials: Dict[str, Any]) -> Tuple[Dict, str]:
|
| 90 |
+
"""
|
| 91 |
+
Common discovery function for Connect and Refresh.
|
| 92 |
+
"""
|
| 93 |
+
schema_map = {}
|
| 94 |
+
description = None
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
# --- CASE A: SANITY ---
|
| 98 |
+
if provider == 'sanity':
|
| 99 |
+
connector = SanityConnector(credentials=credentials)
|
| 100 |
+
if connector.connect():
|
| 101 |
+
schema_map = connector.fetch_schema_structure()
|
| 102 |
+
description = await generate_data_profile(schema_map, 'Sanity CMS')
|
| 103 |
+
|
| 104 |
+
# --- CASE B: SQL DATABASE ---
|
| 105 |
+
elif provider == 'sql':
|
| 106 |
+
db_url = credentials.get('connection_string') or credentials.get('url')
|
| 107 |
+
if db_url:
|
| 108 |
+
engine = create_engine(db_url)
|
| 109 |
+
inspector = inspect(engine)
|
| 110 |
+
tables = inspector.get_table_names()
|
| 111 |
+
|
| 112 |
+
schema_map = {"tables": tables}
|
| 113 |
+
if len(tables) < 15:
|
| 114 |
+
for t in tables:
|
| 115 |
+
try:
|
| 116 |
+
cols = [c['name'] for c in inspector.get_columns(t)]
|
| 117 |
+
schema_map[t] = cols
|
| 118 |
+
except: pass
|
| 119 |
+
|
| 120 |
+
description = await generate_data_profile(schema_map, 'SQL Database')
|
| 121 |
+
|
| 122 |
+
# --- CASE C: MONGODB ---
|
| 123 |
+
elif provider == 'mongodb':
|
| 124 |
+
mongo_uri = credentials.get('connection_string') or credentials.get('url')
|
| 125 |
+
if mongo_uri:
|
| 126 |
+
client = MongoClient(mongo_uri)
|
| 127 |
+
db_name = client.get_database().name
|
| 128 |
+
collections = client[db_name].list_collection_names()
|
| 129 |
+
|
| 130 |
+
schema_map = {"collections": collections}
|
| 131 |
+
for col in collections[:5]:
|
| 132 |
+
one_doc = client[db_name][col].find_one()
|
| 133 |
+
if one_doc:
|
| 134 |
+
keys = [k for k in list(one_doc.keys()) if not k.startswith('_')]
|
| 135 |
+
schema_map[col] = keys
|
| 136 |
+
|
| 137 |
+
description = await generate_data_profile(schema_map, 'MongoDB NoSQL')
|
| 138 |
+
|
| 139 |
+
# --- CASE D: QDRANT / OTHERS ---
|
| 140 |
+
elif provider == 'qdrant':
|
| 141 |
+
description = "Contains uploaded documents, policies, and knowledge base."
|
| 142 |
+
|
| 143 |
+
return schema_map, description
|
| 144 |
+
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f"❌ Discovery Error for {provider}: {e}")
|
| 147 |
+
return {}, f"Connected to {provider} (Auto-discovery failed: {str(e)})"
|
| 148 |
+
|
| 149 |
+
# ==========================================
|
| 150 |
+
# 1. SAVE / CONNECT INTEGRATION
|
| 151 |
+
# ==========================================
|
| 152 |
+
@router.post("/settings/integration", status_code=status.HTTP_201_CREATED)
|
| 153 |
+
async def save_or_update_integration(
|
| 154 |
+
data: IntegrationUpdateRequest,
|
| 155 |
+
db: AsyncSession = Depends(get_db),
|
| 156 |
+
current_user: User = Depends(get_current_user)
|
| 157 |
+
):
|
| 158 |
+
try:
|
| 159 |
+
query = select(UserIntegration).where(
|
| 160 |
+
UserIntegration.user_id == str(current_user.id),
|
| 161 |
+
UserIntegration.provider == data.provider
|
| 162 |
+
)
|
| 163 |
+
result = await db.execute(query)
|
| 164 |
+
existing_integration = result.scalars().first()
|
| 165 |
+
|
| 166 |
+
credentials_json = json.dumps(data.credentials)
|
| 167 |
+
schema_map, description = await perform_discovery(data.provider, data.credentials)
|
| 168 |
+
|
| 169 |
+
if existing_integration:
|
| 170 |
+
existing_integration.credentials = credentials_json
|
| 171 |
+
existing_integration.is_active = True
|
| 172 |
+
if schema_map: existing_integration.schema_map = schema_map
|
| 173 |
+
if description: existing_integration.profile_description = description
|
| 174 |
+
message = f"Integration for {data.provider} updated."
|
| 175 |
+
else:
|
| 176 |
+
new_integration = UserIntegration(
|
| 177 |
+
user_id=str(current_user.id),
|
| 178 |
+
provider=data.provider,
|
| 179 |
+
is_active=True,
|
| 180 |
+
schema_map=schema_map,
|
| 181 |
+
profile_description=description
|
| 182 |
+
)
|
| 183 |
+
new_integration.credentials = credentials_json
|
| 184 |
+
db.add(new_integration)
|
| 185 |
+
message = f"Integration for {data.provider} connected."
|
| 186 |
+
|
| 187 |
+
await db.commit()
|
| 188 |
+
return {
|
| 189 |
+
"message": message,
|
| 190 |
+
"provider": data.provider,
|
| 191 |
+
"profile": description
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
await db.rollback()
|
| 196 |
+
print(f"❌ Error saving integration: {e}")
|
| 197 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 198 |
+
|
| 199 |
+
# ==========================================
|
| 200 |
+
# 2. REFRESH SCHEMA
|
| 201 |
+
# ==========================================
|
| 202 |
+
@router.post("/settings/integration/refresh")
|
| 203 |
+
async def refresh_integration_schema(
|
| 204 |
+
data: RefreshSchemaRequest,
|
| 205 |
+
db: AsyncSession = Depends(get_db),
|
| 206 |
+
current_user: User = Depends(get_current_user)
|
| 207 |
+
):
|
| 208 |
+
print(f"🔄 Refreshing schema for {data.provider} (User: {current_user.id})")
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
stmt = select(UserIntegration).where(
|
| 212 |
+
UserIntegration.user_id == str(current_user.id),
|
| 213 |
+
UserIntegration.provider == data.provider
|
| 214 |
+
)
|
| 215 |
+
result = await db.execute(stmt)
|
| 216 |
+
integration = result.scalars().first()
|
| 217 |
+
|
| 218 |
+
if not integration:
|
| 219 |
+
raise HTTPException(status_code=404, detail="Integration not found. Please connect first.")
|
| 220 |
+
|
| 221 |
+
creds_str = integration.credentials
|
| 222 |
+
creds_dict = json.loads(creds_str)
|
| 223 |
+
|
| 224 |
+
new_schema, new_description = await perform_discovery(data.provider, creds_dict)
|
| 225 |
+
|
| 226 |
+
if new_schema:
|
| 227 |
+
integration.schema_map = dict(new_schema)
|
| 228 |
+
|
| 229 |
+
if new_description:
|
| 230 |
+
integration.profile_description = new_description
|
| 231 |
+
|
| 232 |
+
await db.commit()
|
| 233 |
+
|
| 234 |
+
return {
|
| 235 |
+
"message": "Schema and profile refreshed successfully!",
|
| 236 |
+
"provider": data.provider,
|
| 237 |
+
"new_profile": new_description
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
except Exception as e:
|
| 241 |
+
print(f"❌ Refresh Failed: {e}")
|
| 242 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 243 |
+
|
| 244 |
+
# ==========================================
|
| 245 |
+
# 3. UPDATE BOT PROFILE (NEW ✅)
|
| 246 |
+
# ==========================================
|
| 247 |
+
@router.post("/settings/bot-profile")
|
| 248 |
+
async def update_bot_profile(
|
| 249 |
+
data: BotSettingsRequest,
|
| 250 |
+
db: AsyncSession = Depends(get_db),
|
| 251 |
+
current_user: User = Depends(get_current_user)
|
| 252 |
+
):
|
| 253 |
+
"""
|
| 254 |
+
User yahan apne chatbot ka Naam aur Role set karega.
|
| 255 |
+
"""
|
| 256 |
+
try:
|
| 257 |
+
current_user.bot_name = data.bot_name
|
| 258 |
+
current_user.bot_instruction = data.bot_instruction
|
| 259 |
+
|
| 260 |
+
db.add(current_user)
|
| 261 |
+
await db.commit()
|
| 262 |
+
|
| 263 |
+
return {
|
| 264 |
+
"message": "Bot profile updated successfully!",
|
| 265 |
+
"bot_name": data.bot_name,
|
| 266 |
+
"bot_instruction": data.bot_instruction
|
| 267 |
+
}
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f"❌ Bot Profile Update Failed: {e}")
|
| 270 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 271 |
+
|
| 272 |
+
# ==========================================
|
| 273 |
+
# 4. GET USER INTEGRATIONS
|
| 274 |
+
# ==========================================
|
| 275 |
+
@router.get("/settings/integrations", response_model=UserSettingsResponse)
|
| 276 |
+
async def get_user_integrations(
|
| 277 |
+
db: AsyncSession = Depends(get_db),
|
| 278 |
+
current_user: User = Depends(get_current_user)
|
| 279 |
+
):
|
| 280 |
+
query = select(UserIntegration).where(
|
| 281 |
+
UserIntegration.user_id == str(current_user.id)
|
| 282 |
+
)
|
| 283 |
+
result = await db.execute(query)
|
| 284 |
+
integrations = result.scalars().all()
|
| 285 |
+
|
| 286 |
+
connected_services = [
|
| 287 |
+
ConnectedServiceResponse(
|
| 288 |
+
provider=i.provider,
|
| 289 |
+
is_active=i.is_active,
|
| 290 |
+
description=i.profile_description,
|
| 291 |
+
last_updated=str(i.updated_at) if i.updated_at else str(i.created_at)
|
| 292 |
+
)
|
| 293 |
+
for i in integrations
|
| 294 |
+
]
|
| 295 |
+
|
| 296 |
+
return {
|
| 297 |
+
"user_email": current_user.email,
|
| 298 |
+
"connected_services": connected_services
|
| 299 |
+
}
|
backend/src/core/config.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 4 |
+
from functools import lru_cache
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
class Settings(BaseSettings):
|
| 10 |
+
# ------------------- CORE PROJECT SETTINGS -------------------
|
| 11 |
+
PROJECT_NAME: str = "OmniAgent Core"
|
| 12 |
+
VERSION: str = "1.0.0"
|
| 13 |
+
API_V1_STR: str = "/api/v1"
|
| 14 |
+
|
| 15 |
+
# ------------------- SECURITY (NEW) -------------------
|
| 16 |
+
# Ye bohot zaroori hai JWT tokens ke liye
|
| 17 |
+
SECRET_KEY: str = os.getenv("SECRET_KEY", "super-secret-key-change-me")
|
| 18 |
+
ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
|
| 19 |
+
|
| 20 |
+
# ------------------- NETWORK / HOSTING -------------------
|
| 21 |
+
QDRANT_HOST: str = os.getenv("QDRANT_HOST", "localhost")
|
| 22 |
+
QDRANT_PORT: int = 6333
|
| 23 |
+
|
| 24 |
+
MONGO_HOST: str = os.getenv("MONGO_HOST", "localhost")
|
| 25 |
+
MONGO_PORT: int = int(os.getenv("MONGO_PORT", 27018))
|
| 26 |
+
MONGO_USER: str = os.getenv("MONGO_INITDB_ROOT_USERNAME", "admin")
|
| 27 |
+
MONGO_PASS: str = os.getenv("MONGO_INITDB_ROOT_PASSWORD", "super_secret_admin_pass")
|
| 28 |
+
|
| 29 |
+
# ------------------- DATABASES -------------------
|
| 30 |
+
_DATABASE_URL: str = os.getenv("POSTGRES_URL", "sqlite+aiosqlite:///./omni_agent.db")
|
| 31 |
+
|
| 32 |
+
@property
|
| 33 |
+
def DATABASE_URL(self) -> str:
|
| 34 |
+
url = self._DATABASE_URL
|
| 35 |
+
if url and "?" in url:
|
| 36 |
+
url = url.split("?")[0]
|
| 37 |
+
if url and url.startswith("postgres://"):
|
| 38 |
+
url = url.replace("postgres://", "postgresql+asyncpg://", 1)
|
| 39 |
+
elif url and url.startswith("postgresql://") and "+asyncpg" not in url:
|
| 40 |
+
url = url.replace("postgresql://", "postgresql+asyncpg://", 1)
|
| 41 |
+
|
| 42 |
+
# --- DEBUG PRINT (Ye add karein) ---
|
| 43 |
+
print(f"🕵️ DEBUG: Connecting to DB URL: {url}")
|
| 44 |
+
# (Security Warning: Ye console mein password dikhayega, baad mein hata dena)
|
| 45 |
+
return url
|
| 46 |
+
|
| 47 |
+
@property
|
| 48 |
+
def QDRANT_URL(self) -> str:
|
| 49 |
+
if self.QDRANT_HOST.startswith("http"):
|
| 50 |
+
return self.QDRANT_HOST
|
| 51 |
+
return f"http://{self.QDRANT_HOST}:{self.QDRANT_PORT}"
|
| 52 |
+
|
| 53 |
+
QDRANT_COLLECTION_NAME: str = "omni_agent_main_collection"
|
| 54 |
+
QDRANT_API_KEY: str | None = None
|
| 55 |
+
|
| 56 |
+
# ------------------- RAG / EMBEDDINGS -------------------
|
| 57 |
+
EMBEDDING_PROVIDER: str = "local"
|
| 58 |
+
EMBEDDING_MODEL_NAME: str = "sentence-transformers/all-MiniLM-L6-v2"
|
| 59 |
+
|
| 60 |
+
# ------------------- AI MODELS -------------------
|
| 61 |
+
LLM_PROVIDER: str = "generic"
|
| 62 |
+
LLM_MODEL_NAME: str = "gpt-3.5-turbo"
|
| 63 |
+
LLM_BASE_URL: str | None = None
|
| 64 |
+
LLM_API_KEY: str | None = None
|
| 65 |
+
|
| 66 |
+
GROQ_API_KEY: str | None = None
|
| 67 |
+
GOOGLE_API_KEY: str | None = None
|
| 68 |
+
OPENAI_API_KEY: str | None = None
|
| 69 |
+
|
| 70 |
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore", env_file_encoding='utf-8')
|
| 71 |
+
|
| 72 |
+
@lru_cache()
|
| 73 |
+
def get_settings():
|
| 74 |
+
return Settings()
|
| 75 |
+
|
| 76 |
+
settings = get_settings()
|
backend/src/db/base.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/db/base.py
|
| 2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 3 |
+
|
| 4 |
+
# Saare models is Base class se inherit karenge
|
| 5 |
+
Base = declarative_base()
|
backend/src/db/session.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
|
| 2 |
+
from sqlalchemy import create_engine
|
| 3 |
+
from backend.src.core.config import settings
|
| 4 |
+
|
| 5 |
+
# Connection Arguments
|
| 6 |
+
connect_args = {}
|
| 7 |
+
if "sqlite" in settings.DATABASE_URL:
|
| 8 |
+
connect_args = {"check_same_thread": False}
|
| 9 |
+
|
| 10 |
+
# --- ROBUST ENGINE CREATION (The Fix) ---
|
| 11 |
+
# Ye settings Neon/Serverless ke liye best hain
|
| 12 |
+
engine = create_async_engine(
|
| 13 |
+
settings.DATABASE_URL,
|
| 14 |
+
echo=False,
|
| 15 |
+
connect_args=connect_args,
|
| 16 |
+
pool_size=5, # 5 connections ka pool rakho
|
| 17 |
+
max_overflow=10, # Agar zaroorat pade to 10 aur bana lo
|
| 18 |
+
pool_recycle=300, # Har 5 minute (300s) mein purane connections ko refresh karo (Sleep issue fix)
|
| 19 |
+
pool_pre_ping=True, # Har query se pehle check karo ke connection zinda hai ya nahi
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Session Maker
|
| 23 |
+
AsyncSessionLocal = async_sessionmaker(
|
| 24 |
+
bind=engine,
|
| 25 |
+
class_=AsyncSession,
|
| 26 |
+
expire_on_commit=False,
|
| 27 |
+
autoflush=False,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Dependency Injection
|
| 31 |
+
async def get_db():
|
| 32 |
+
async with AsyncSessionLocal() as session:
|
| 33 |
+
try:
|
| 34 |
+
yield session
|
| 35 |
+
finally:
|
| 36 |
+
await session.close()
|
backend/src/init_db.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from backend.src.db.session import engine
|
| 3 |
+
from backend.src.db.base import Base
|
| 4 |
+
|
| 5 |
+
# --- Import ALL Models here ---
|
| 6 |
+
# Ye zaroori hai taake SQLAlchemy ko pata chale ke kaunse tables banane hain
|
| 7 |
+
from backend.src.models.chat import ChatHistory
|
| 8 |
+
from backend.src.models.ingestion import IngestionJob
|
| 9 |
+
from backend.src.models.integration import UserIntegration # <--- Isme naya column hai
|
| 10 |
+
from backend.src.models.user import User
|
| 11 |
+
|
| 12 |
+
async def init_database():
|
| 13 |
+
print("🚀 Connecting to the database...")
|
| 14 |
+
async with engine.begin() as conn:
|
| 15 |
+
# --- CRITICAL FOR SCHEMA UPDATE ---
|
| 16 |
+
# Hum purane tables DROP kar rahe hain taake naya 'profile_description' column add ho sake.
|
| 17 |
+
# Note: Isse purana data udd jayega (Dev environment ke liye theek hai).
|
| 18 |
+
print("🗑️ Dropping old tables to apply new Schema...")
|
| 19 |
+
await conn.run_sync(Base.metadata.drop_all)
|
| 20 |
+
|
| 21 |
+
print("⚙️ Creating new tables (Users, Chats, Integrations, Jobs)...")
|
| 22 |
+
await conn.run_sync(Base.metadata.create_all)
|
| 23 |
+
print("✅ Database tables created successfully!")
|
| 24 |
+
|
| 25 |
+
if __name__ == "__main__":
|
| 26 |
+
print("Starting database initialization...")
|
| 27 |
+
asyncio.run(init_database())
|
backend/src/main.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from fastapi.staticfiles import StaticFiles # <--- New Import
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
+
from backend.src.core.config import settings
|
| 6 |
+
|
| 7 |
+
# --- API Route Imports ---
|
| 8 |
+
from backend.src.api.routes import chat, ingestion, auth, settings as settings_route
|
| 9 |
+
|
| 10 |
+
# 1. App Initialize karein
|
| 11 |
+
app = FastAPI(
|
| 12 |
+
title=settings.PROJECT_NAME,
|
| 13 |
+
version=settings.VERSION,
|
| 14 |
+
description="OmniAgent Core API - The Intelligent Employee"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# 2. CORS Setup (Security)
|
| 18 |
+
# Frontend ko Backend se baat karne ki ijazat dena
|
| 19 |
+
app.add_middleware(
|
| 20 |
+
CORSMiddleware,
|
| 21 |
+
allow_origins=["*"], # Production mein hum isay specific domain karenge
|
| 22 |
+
allow_credentials=True,
|
| 23 |
+
allow_methods=["*"],
|
| 24 |
+
allow_headers=["*"],
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# 3. Mount Static Files (Chat Widget ke liye) 🎨
|
| 28 |
+
# Ye check karta hai ke 'static' folder hai ya nahi, agar nahi to banata hai
|
| 29 |
+
if not os.path.exists("static"):
|
| 30 |
+
os.makedirs("static")
|
| 31 |
+
|
| 32 |
+
# Is line ka matlab hai: Jo bhi file 'static' folder mein hogi, wo '/static/filename' par milegi
|
| 33 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 34 |
+
|
| 35 |
+
# 4. Health Check Route
|
| 36 |
+
@app.get("/")
|
| 37 |
+
async def root():
|
| 38 |
+
return {
|
| 39 |
+
"message": "Welcome to OmniAgent Core 🚀",
|
| 40 |
+
"status": "active",
|
| 41 |
+
"widget_url": "/static/widget.js" # Widget ka link bhi bata diya
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
# 5. API Router Includes
|
| 45 |
+
app.include_router(auth.router, prefix=settings.API_V1_STR, tags=["Authentication"])
|
| 46 |
+
app.include_router(settings_route.router, prefix=settings.API_V1_STR, tags=["User Settings"])
|
| 47 |
+
app.include_router(chat.router, prefix=settings.API_V1_STR, tags=["Chat"])
|
| 48 |
+
app.include_router(ingestion.router, prefix=settings.API_V1_STR, tags=["Ingestion"])
|
| 49 |
+
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
import uvicorn
|
| 52 |
+
# Server Run command (Debugging ke liye)
|
| 53 |
+
uvicorn.run("backend.src.main:app", host="0.0.0.0", port=8000, reload=True)
|
backend/src/models/chat.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/models/chat.py
|
| 2 |
+
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean
|
| 3 |
+
from sqlalchemy.sql import func
|
| 4 |
+
from backend.src.db.base import Base
|
| 5 |
+
|
| 6 |
+
class ChatHistory(Base):
|
| 7 |
+
__tablename__ = "chat_history"
|
| 8 |
+
|
| 9 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 10 |
+
session_id = Column(String, index=True) # User ka Session ID
|
| 11 |
+
human_message = Column(Text) # User ne kya kaha
|
| 12 |
+
ai_message = Column(Text) # Bot ne kya jawab diya
|
| 13 |
+
timestamp = Column(DateTime(timezone=True), server_default=func.now()) # Kab baat hui
|
| 14 |
+
|
| 15 |
+
# Metadata (Optional: Konsa tool use hua, kitne tokens lage)
|
| 16 |
+
provider = Column(String)
|
| 17 |
+
tokens_used = Column(Integer, default=0)
|
backend/src/models/ingestion.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, Integer, String, Text, DateTime, Enum, JSON # <--- JSON import karein
|
| 2 |
+
from sqlalchemy.sql import func
|
| 3 |
+
import enum
|
| 4 |
+
from backend.src.db.base import Base
|
| 5 |
+
|
| 6 |
+
class JobStatus(str, enum.Enum):
|
| 7 |
+
PENDING = "pending"
|
| 8 |
+
PROCESSING = "processing"
|
| 9 |
+
COMPLETED = "completed"
|
| 10 |
+
FAILED = "failed"
|
| 11 |
+
|
| 12 |
+
class IngestionType(str, enum.Enum):
|
| 13 |
+
URL = "url"
|
| 14 |
+
ZIP = "zip"
|
| 15 |
+
FILE = "file" # (Future use ke liye)
|
| 16 |
+
|
| 17 |
+
class IngestionJob(Base):
|
| 18 |
+
__tablename__ = "ingestion_jobs"
|
| 19 |
+
|
| 20 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 21 |
+
session_id = Column(String, index=True)
|
| 22 |
+
|
| 23 |
+
# --- NEW COLUMNS ---
|
| 24 |
+
ingestion_type = Column(String, default=IngestionType.URL) # Taake pata chale ye URL hai ya Zip
|
| 25 |
+
source_name = Column(String, nullable=False) # Ye URL ya Zip file ka naam hoga
|
| 26 |
+
|
| 27 |
+
status = Column(String, default=JobStatus.PENDING)
|
| 28 |
+
|
| 29 |
+
# Progress Tracking
|
| 30 |
+
items_processed = Column(Integer, default=0)
|
| 31 |
+
total_items = Column(Integer, default=0)
|
| 32 |
+
|
| 33 |
+
# Detailed Logging
|
| 34 |
+
details = Column(JSON, default=[]) # <--- Har file ka result yahan aayega
|
| 35 |
+
|
| 36 |
+
error_message = Column(Text, nullable=True)
|
| 37 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 38 |
+
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
| 39 |
+
|
| 40 |
+
# 'url', 'crawl_type' waghaira columns hata diye taake table generic rahe
|
backend/src/models/integration.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from sqlalchemy import Column, Integer, String, Text, Boolean, JSON, DateTime
|
| 3 |
+
from sqlalchemy.sql import func
|
| 4 |
+
from backend.src.db.base import Base
|
| 5 |
+
from backend.src.utils.security import SecurityUtils
|
| 6 |
+
|
| 7 |
+
class UserIntegration(Base):
|
| 8 |
+
__tablename__ = "user_integrations"
|
| 9 |
+
|
| 10 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 11 |
+
user_id = Column(String, index=True)
|
| 12 |
+
|
| 13 |
+
provider = Column(String, nullable=False) # e.g., 'sanity', 'sql', 'mongodb'
|
| 14 |
+
|
| 15 |
+
# Store encrypted credentials
|
| 16 |
+
_credentials = Column("credentials", Text, nullable=False)
|
| 17 |
+
|
| 18 |
+
# The Map (Technical Structure)
|
| 19 |
+
schema_map = Column(JSON, default={})
|
| 20 |
+
|
| 21 |
+
# --- NEW COLUMN: The semantic description of the data ---
|
| 22 |
+
profile_description = Column(Text, nullable=True)
|
| 23 |
+
|
| 24 |
+
is_active = Column(Boolean, default=True)
|
| 25 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 26 |
+
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
| 27 |
+
|
| 28 |
+
@property
|
| 29 |
+
def credentials(self):
|
| 30 |
+
return SecurityUtils.decrypt(self._credentials)
|
| 31 |
+
|
| 32 |
+
@credentials.setter
|
| 33 |
+
def credentials(self, value):
|
| 34 |
+
self._credentials = SecurityUtils.encrypt(value)
|
backend/src/models/user.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text # Text add kiya
|
| 2 |
+
from sqlalchemy.sql import func
|
| 3 |
+
from backend.src.db.base import Base
|
| 4 |
+
|
| 5 |
+
class User(Base):
|
| 6 |
+
__tablename__ = "users"
|
| 7 |
+
|
| 8 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 9 |
+
email = Column(String, unique=True, index=True, nullable=False)
|
| 10 |
+
hashed_password = Column(String, nullable=False)
|
| 11 |
+
full_name = Column(String, nullable=True)
|
| 12 |
+
is_active = Column(Boolean, default=True)
|
| 13 |
+
|
| 14 |
+
# --- NEW: Bot Customization ---
|
| 15 |
+
bot_name = Column(String, default="Support Agent")
|
| 16 |
+
bot_instruction = Column(Text, default="You are a helpful customer support agent. Only answer questions related to the provided data.")
|
| 17 |
+
|
| 18 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 19 |
+
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
backend/src/schemas/chat.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
# User jab sawal bhejegaecho $GOOGLE_API_KEY
|
| 5 |
+
class ChatRequest(BaseModel):
|
| 6 |
+
message: str
|
| 7 |
+
# Isay Optional bana diya. Default value None hai.
|
| 8 |
+
session_id: Optional[str] = None
|
| 9 |
+
|
| 10 |
+
# Server jab jawab dega
|
| 11 |
+
class ChatResponse(BaseModel):
|
| 12 |
+
response: str
|
| 13 |
+
# Yahan bhi Optional, kyunki guest ke paas ID nahi hogi
|
| 14 |
+
session_id: Optional[str] = None
|
| 15 |
+
provider: str
|
backend/src/services/chat_service.py
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# import json
|
| 3 |
+
# from sqlalchemy.ext.asyncio import AsyncSession
|
| 4 |
+
# from sqlalchemy.future import select
|
| 5 |
+
|
| 6 |
+
# # --- Model Imports ---
|
| 7 |
+
# from backend.src.models.chat import ChatHistory
|
| 8 |
+
# from backend.src.models.integration import UserIntegration
|
| 9 |
+
|
| 10 |
+
# # --- Dynamic Factory & Tool Imports ---
|
| 11 |
+
# from backend.src.services.llm.factory import get_llm_model
|
| 12 |
+
# from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 13 |
+
# from backend.src.services.security.pii_scrubber import PIIScrubber
|
| 14 |
+
|
| 15 |
+
# # --- Agents ---
|
| 16 |
+
# from backend.src.services.tools.secure_agent import get_secure_agent
|
| 17 |
+
# from backend.src.services.tools.nosql_agent import get_nosql_agent
|
| 18 |
+
# from backend.src.services.tools.cms_agent import get_cms_agent
|
| 19 |
+
|
| 20 |
+
# # --- Router ---
|
| 21 |
+
# from backend.src.services.routing.semantic_router import SemanticRouter
|
| 22 |
+
|
| 23 |
+
# # --- LangChain Core ---
|
| 24 |
+
# from langchain_core.messages import HumanMessage, AIMessage
|
| 25 |
+
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 26 |
+
|
| 27 |
+
# # ==========================================
|
| 28 |
+
# # HELPER FUNCTIONS (UPDATED STRICT LOGIC)
|
| 29 |
+
# # ==========================================
|
| 30 |
+
|
| 31 |
+
# async def get_user_integrations(user_id: str, db: AsyncSession) -> dict:
|
| 32 |
+
# if not user_id: return {}
|
| 33 |
+
|
| 34 |
+
# query = select(UserIntegration).where(UserIntegration.user_id == user_id, UserIntegration.is_active == True)
|
| 35 |
+
# result = await db.execute(query)
|
| 36 |
+
# integrations = result.scalars().all()
|
| 37 |
+
|
| 38 |
+
# settings = {}
|
| 39 |
+
# for i in integrations:
|
| 40 |
+
# try:
|
| 41 |
+
# creds = json.loads(i.credentials)
|
| 42 |
+
# creds['provider'] = i.provider
|
| 43 |
+
# creds['schema_map'] = i.schema_map if i.schema_map else {}
|
| 44 |
+
|
| 45 |
+
# # --- 🔥 FIX: NO DEFAULT DESCRIPTION ---
|
| 46 |
+
# # Agar DB mein description NULL hai, to NULL hi rehne do.
|
| 47 |
+
# # Hum isay Router mein add hi nahi karenge.
|
| 48 |
+
# creds['description'] = i.profile_description
|
| 49 |
+
|
| 50 |
+
# settings[i.provider] = creds
|
| 51 |
+
# except (json.JSONDecodeError, TypeError):
|
| 52 |
+
# continue
|
| 53 |
+
# return settings
|
| 54 |
+
|
| 55 |
+
# async def save_chat_to_db(db: AsyncSession, session_id: str, human_msg: str, ai_msg: str, provider: str):
|
| 56 |
+
# if not session_id: return
|
| 57 |
+
# safe_human = PIIScrubber.scrub(human_msg)
|
| 58 |
+
# safe_ai = PIIScrubber.scrub(ai_msg)
|
| 59 |
+
# new_chat = ChatHistory(
|
| 60 |
+
# session_id=session_id, human_message=safe_human, ai_message=safe_ai, provider=provider
|
| 61 |
+
# )
|
| 62 |
+
# db.add(new_chat)
|
| 63 |
+
# await db.commit()
|
| 64 |
+
|
| 65 |
+
# async def get_chat_history(session_id: str, db: AsyncSession):
|
| 66 |
+
# if not session_id: return []
|
| 67 |
+
# query = select(ChatHistory).where(ChatHistory.session_id == session_id).order_by(ChatHistory.timestamp.asc())
|
| 68 |
+
# result = await db.execute(query)
|
| 69 |
+
# return result.scalars().all()
|
| 70 |
+
|
| 71 |
+
# OMNI_SUPPORT_PROMPT = "You are OmniAgent. Answer based on the provided context or chat history."
|
| 72 |
+
|
| 73 |
+
# # ==========================================
|
| 74 |
+
# # MAIN CHAT LOGIC
|
| 75 |
+
# # ==========================================
|
| 76 |
+
# async def process_chat(message: str, session_id: str, user_id: str, db: AsyncSession):
|
| 77 |
+
|
| 78 |
+
# # 1. User Settings
|
| 79 |
+
# user_settings = await get_user_integrations(user_id, db)
|
| 80 |
+
|
| 81 |
+
# # 2. LLM Check
|
| 82 |
+
# llm_creds = user_settings.get('groq') or user_settings.get('openai')
|
| 83 |
+
# if not llm_creds:
|
| 84 |
+
# return "Please configure your AI Model in Settings."
|
| 85 |
+
|
| 86 |
+
# # 3. Build Tool Map for Router (STRICT FILTERING)
|
| 87 |
+
# tools_map = {}
|
| 88 |
+
# for provider, config in user_settings.items():
|
| 89 |
+
# if provider in ['sanity', 'sql', 'mongodb']:
|
| 90 |
+
# # 🔥 Check: Agar Description hai, tabhi Router mein daalo
|
| 91 |
+
# if config.get('description'):
|
| 92 |
+
# tools_map[provider] = config['description']
|
| 93 |
+
# else:
|
| 94 |
+
# print(f"⚠️ [Router] Skipping {provider} - No Description found.")
|
| 95 |
+
|
| 96 |
+
# # 4. SEMANTIC DECISION
|
| 97 |
+
# selected_provider = None
|
| 98 |
+
# if tools_map:
|
| 99 |
+
# router = SemanticRouter()
|
| 100 |
+
# selected_provider = router.route(message, tools_map)
|
| 101 |
+
# else:
|
| 102 |
+
# print("⚠️ [Router] No active tools with descriptions found.")
|
| 103 |
+
|
| 104 |
+
# response_text = ""
|
| 105 |
+
# provider_name = "general_chat"
|
| 106 |
+
|
| 107 |
+
# # 5. Route to Winner
|
| 108 |
+
# if selected_provider:
|
| 109 |
+
# print(f"👉 [Router] Selected Tool: {selected_provider.upper()}")
|
| 110 |
+
# try:
|
| 111 |
+
# if selected_provider == 'sanity':
|
| 112 |
+
# schema = user_settings['sanity'].get('schema_map', {})
|
| 113 |
+
# agent = get_cms_agent(user_id=user_id, schema_map=schema, llm_credentials=llm_creds)
|
| 114 |
+
# res = await agent.ainvoke({"input": message})
|
| 115 |
+
# response_text = str(res.get('output', ''))
|
| 116 |
+
# provider_name = "cms_agent"
|
| 117 |
+
|
| 118 |
+
# elif selected_provider == 'sql':
|
| 119 |
+
# role = "admin" if user_id == '99' else "customer"
|
| 120 |
+
# agent = get_secure_agent(int(user_id), role, user_settings['sql'], llm_credentials=llm_creds)
|
| 121 |
+
# res = await agent.ainvoke({"input": message})
|
| 122 |
+
# response_text = str(res.get('output', ''))
|
| 123 |
+
# provider_name = "sql_agent"
|
| 124 |
+
|
| 125 |
+
# elif selected_provider == 'mongodb':
|
| 126 |
+
# agent = get_nosql_agent(user_id, user_settings['mongodb'], llm_credentials=llm_creds)
|
| 127 |
+
# res = await agent.ainvoke({"input": message})
|
| 128 |
+
# response_text = str(res.get('output', ''))
|
| 129 |
+
# provider_name = "nosql_agent"
|
| 130 |
+
|
| 131 |
+
# # Anti-Hallucination
|
| 132 |
+
# if not response_text or "error" in response_text.lower():
|
| 133 |
+
# response_text = "" # Trigger Fallback
|
| 134 |
+
|
| 135 |
+
# except Exception as e:
|
| 136 |
+
# print(f"❌ [Router] Execution Failed: {e}")
|
| 137 |
+
# response_text = ""
|
| 138 |
+
|
| 139 |
+
# # 6. Fallback / RAG
|
| 140 |
+
# if not response_text:
|
| 141 |
+
# print("👉 [Router] Fallback to RAG/General Chat...")
|
| 142 |
+
# try:
|
| 143 |
+
# llm = get_llm_model(credentials=llm_creds)
|
| 144 |
+
|
| 145 |
+
# context = ""
|
| 146 |
+
# if 'qdrant' in user_settings:
|
| 147 |
+
# try:
|
| 148 |
+
# vector_store = get_vector_store(credentials=user_settings['qdrant'])
|
| 149 |
+
# docs = await vector_store.asimilarity_search(message, k=3)
|
| 150 |
+
# if docs:
|
| 151 |
+
# context = "\n\n".join([d.page_content for d in docs])
|
| 152 |
+
# except Exception as e:
|
| 153 |
+
# print(f"⚠️ RAG Warning: {e}")
|
| 154 |
+
|
| 155 |
+
# system_instruction = OMNI_SUPPORT_PROMPT
|
| 156 |
+
# if context: system_instruction = f"Context:\n{context}"
|
| 157 |
+
|
| 158 |
+
# history = await get_chat_history(session_id, db)
|
| 159 |
+
# formatted_history = []
|
| 160 |
+
# for chat in history:
|
| 161 |
+
# formatted_history.append(HumanMessage(content=chat.human_message))
|
| 162 |
+
# if chat.ai_message: formatted_history.append(AIMessage(content=chat.ai_message))
|
| 163 |
+
|
| 164 |
+
# prompt = ChatPromptTemplate.from_messages([
|
| 165 |
+
# ("system", system_instruction),
|
| 166 |
+
# MessagesPlaceholder(variable_name="chat_history"),
|
| 167 |
+
# ("human", "{question}")
|
| 168 |
+
# ])
|
| 169 |
+
# chain = prompt | llm
|
| 170 |
+
|
| 171 |
+
# ai_response = await chain.ainvoke({"chat_history": formatted_history, "question": message})
|
| 172 |
+
# response_text = ai_response.content
|
| 173 |
+
# provider_name = "rag_fallback"
|
| 174 |
+
|
| 175 |
+
# except Exception as e:
|
| 176 |
+
# response_text = "I am currently unable to process your request."
|
| 177 |
+
|
| 178 |
+
# await save_chat_to_db(db, session_id, message, response_text, provider_name)
|
| 179 |
+
# return response_text
|
| 180 |
+
import json
|
| 181 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 182 |
+
from sqlalchemy.future import select
|
| 183 |
+
|
| 184 |
+
# --- Model Imports ---
|
| 185 |
+
from backend.src.models.chat import ChatHistory
|
| 186 |
+
from backend.src.models.integration import UserIntegration
|
| 187 |
+
from backend.src.models.user import User # Added User model for Bot Persona
|
| 188 |
+
|
| 189 |
+
# --- Dynamic Factory & Tool Imports ---
|
| 190 |
+
from backend.src.services.llm.factory import get_llm_model
|
| 191 |
+
from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 192 |
+
from backend.src.services.security.pii_scrubber import PIIScrubber
|
| 193 |
+
|
| 194 |
+
# --- Agents ---
|
| 195 |
+
from backend.src.services.tools.secure_agent import get_secure_agent
|
| 196 |
+
from backend.src.services.tools.nosql_agent import get_nosql_agent
|
| 197 |
+
from backend.src.services.tools.cms_agent import get_cms_agent
|
| 198 |
+
|
| 199 |
+
# --- Router ---
|
| 200 |
+
from backend.src.services.routing.semantic_router import SemanticRouter
|
| 201 |
+
|
| 202 |
+
# --- LangChain Core ---
|
| 203 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 204 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 205 |
+
|
| 206 |
+
# ==========================================
|
| 207 |
+
# HELPER FUNCTIONS
|
| 208 |
+
# ==========================================
|
| 209 |
+
|
| 210 |
+
async def get_user_integrations(user_id: str, db: AsyncSession) -> dict:
|
| 211 |
+
"""Fetches active integrations and filters valid descriptions."""
|
| 212 |
+
if not user_id: return {}
|
| 213 |
+
|
| 214 |
+
query = select(UserIntegration).where(UserIntegration.user_id == user_id, UserIntegration.is_active == True)
|
| 215 |
+
result = await db.execute(query)
|
| 216 |
+
integrations = result.scalars().all()
|
| 217 |
+
|
| 218 |
+
settings = {}
|
| 219 |
+
for i in integrations:
|
| 220 |
+
try:
|
| 221 |
+
creds = json.loads(i.credentials)
|
| 222 |
+
creds['provider'] = i.provider
|
| 223 |
+
creds['schema_map'] = i.schema_map if i.schema_map else {}
|
| 224 |
+
|
| 225 |
+
# --- STRICT CHECK ---
|
| 226 |
+
if i.profile_description:
|
| 227 |
+
creds['description'] = i.profile_description
|
| 228 |
+
|
| 229 |
+
settings[i.provider] = creds
|
| 230 |
+
except (json.JSONDecodeError, TypeError):
|
| 231 |
+
continue
|
| 232 |
+
return settings
|
| 233 |
+
|
| 234 |
+
async def save_chat_to_db(db: AsyncSession, session_id: str, human_msg: str, ai_msg: str, provider: str):
|
| 235 |
+
"""Saves chat history with PII redaction."""
|
| 236 |
+
if not session_id: return
|
| 237 |
+
safe_human = PIIScrubber.scrub(human_msg)
|
| 238 |
+
safe_ai = PIIScrubber.scrub(ai_msg)
|
| 239 |
+
new_chat = ChatHistory(
|
| 240 |
+
session_id=session_id, human_message=safe_human, ai_message=safe_ai, provider=provider
|
| 241 |
+
)
|
| 242 |
+
db.add(new_chat)
|
| 243 |
+
await db.commit()
|
| 244 |
+
|
| 245 |
+
async def get_chat_history(session_id: str, db: AsyncSession):
|
| 246 |
+
"""Retrieves past conversation history."""
|
| 247 |
+
if not session_id: return []
|
| 248 |
+
query = select(ChatHistory).where(ChatHistory.session_id == session_id).order_by(ChatHistory.timestamp.asc())
|
| 249 |
+
result = await db.execute(query)
|
| 250 |
+
return result.scalars().all()
|
| 251 |
+
|
| 252 |
+
async def get_bot_persona(user_id: str, db: AsyncSession):
|
| 253 |
+
"""Fetches custom Bot Name and Instructions from User table."""
|
| 254 |
+
try:
|
| 255 |
+
# User ID ko int mein convert karke query karein
|
| 256 |
+
stmt = select(User).where(User.id == int(user_id))
|
| 257 |
+
result = await db.execute(stmt)
|
| 258 |
+
user = result.scalars().first()
|
| 259 |
+
|
| 260 |
+
if user:
|
| 261 |
+
return {
|
| 262 |
+
"name": getattr(user, "bot_name", "OmniAgent"),
|
| 263 |
+
"instruction": getattr(user, "bot_instruction", "You are a helpful AI assistant.")
|
| 264 |
+
}
|
| 265 |
+
except Exception as e:
|
| 266 |
+
print(f"⚠️ Error fetching persona: {e}")
|
| 267 |
+
pass
|
| 268 |
+
|
| 269 |
+
# Fallback Default Persona
|
| 270 |
+
return {"name": "OmniAgent", "instruction": "You are a helpful AI assistant."}
|
| 271 |
+
|
| 272 |
+
# ==========================================
|
| 273 |
+
# MAIN CHAT LOGIC
|
| 274 |
+
# ==========================================
|
| 275 |
+
async def process_chat(message: str, session_id: str, user_id: str, db: AsyncSession):
|
| 276 |
+
|
| 277 |
+
# 1. Fetch User Settings & Persona
|
| 278 |
+
user_settings = await get_user_integrations(user_id, db)
|
| 279 |
+
bot_persona = await get_bot_persona(user_id, db) # <--- Persona Load kiya
|
| 280 |
+
|
| 281 |
+
# 2. LLM Check
|
| 282 |
+
llm_creds = user_settings.get('groq') or user_settings.get('openai')
|
| 283 |
+
if not llm_creds:
|
| 284 |
+
return "Please configure your AI Model in Settings."
|
| 285 |
+
|
| 286 |
+
# 3. Build Tool Map for Router
|
| 287 |
+
tools_map = {}
|
| 288 |
+
for provider, config in user_settings.items():
|
| 289 |
+
if provider in ['sanity', 'sql', 'mongodb']:
|
| 290 |
+
if config.get('description'):
|
| 291 |
+
tools_map[provider] = config['description']
|
| 292 |
+
|
| 293 |
+
# 4. SEMANTIC DECISION (Router)
|
| 294 |
+
selected_provider = None
|
| 295 |
+
if tools_map:
|
| 296 |
+
router = SemanticRouter() # Singleton Instance
|
| 297 |
+
selected_provider = router.route(message, tools_map)
|
| 298 |
+
|
| 299 |
+
response_text = ""
|
| 300 |
+
provider_name = "general_chat"
|
| 301 |
+
|
| 302 |
+
# 5. Route to Winner
|
| 303 |
+
if selected_provider:
|
| 304 |
+
print(f"👉 [Router] Selected Tool: {selected_provider.upper()}")
|
| 305 |
+
try:
|
| 306 |
+
if selected_provider == 'sanity':
|
| 307 |
+
schema = user_settings['sanity'].get('schema_map', {})
|
| 308 |
+
agent = get_cms_agent(user_id=user_id, schema_map=schema, llm_credentials=llm_creds)
|
| 309 |
+
res = await agent.ainvoke({"input": message})
|
| 310 |
+
response_text = str(res.get('output', ''))
|
| 311 |
+
provider_name = "cms_agent"
|
| 312 |
+
|
| 313 |
+
elif selected_provider == 'sql':
|
| 314 |
+
role = "admin" if user_id == '99' else "customer"
|
| 315 |
+
agent = get_secure_agent(int(user_id), role, user_settings['sql'], llm_credentials=llm_creds)
|
| 316 |
+
res = await agent.ainvoke({"input": message})
|
| 317 |
+
response_text = str(res.get('output', ''))
|
| 318 |
+
provider_name = "sql_agent"
|
| 319 |
+
|
| 320 |
+
elif selected_provider == 'mongodb':
|
| 321 |
+
agent = get_nosql_agent(user_id, user_settings['mongodb'], llm_credentials=llm_creds)
|
| 322 |
+
res = await agent.ainvoke({"input": message})
|
| 323 |
+
response_text = str(res.get('output', ''))
|
| 324 |
+
provider_name = "nosql_agent"
|
| 325 |
+
|
| 326 |
+
# Anti-Hallucination
|
| 327 |
+
if not response_text or "error" in response_text.lower():
|
| 328 |
+
print(f"⚠️ [Router] Tool {selected_provider} failed. Triggering Fallback.")
|
| 329 |
+
response_text = ""
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
print(f"❌ [Router] Execution Failed: {e}")
|
| 333 |
+
response_text = ""
|
| 334 |
+
|
| 335 |
+
# 6. Fallback / RAG (Using Custom Persona)
|
| 336 |
+
if not response_text:
|
| 337 |
+
print("👉 [Router] Fallback to RAG/General Chat...")
|
| 338 |
+
try:
|
| 339 |
+
llm = get_llm_model(credentials=llm_creds)
|
| 340 |
+
|
| 341 |
+
# Context from Vector DB
|
| 342 |
+
context = ""
|
| 343 |
+
if 'qdrant' in user_settings:
|
| 344 |
+
try:
|
| 345 |
+
vector_store = get_vector_store(credentials=user_settings['qdrant'])
|
| 346 |
+
docs = await vector_store.asimilarity_search(message, k=3)
|
| 347 |
+
if docs:
|
| 348 |
+
context = "\n\n".join([d.page_content for d in docs])
|
| 349 |
+
except Exception as e:
|
| 350 |
+
print(f"⚠️ RAG Warning: {e}")
|
| 351 |
+
|
| 352 |
+
# --- 🔥 DYNAMIC SYSTEM PROMPT ---
|
| 353 |
+
system_instruction = f"""
|
| 354 |
+
IDENTITY: You are '{bot_persona['name']}'.
|
| 355 |
+
MISSION: {bot_persona['instruction']}
|
| 356 |
+
|
| 357 |
+
CONTEXT FROM KNOWLEDGE BASE:
|
| 358 |
+
{context if context else "No specific documents found."}
|
| 359 |
+
|
| 360 |
+
Answer the user's question based on the context above or your general knowledge if permitted by your mission.
|
| 361 |
+
"""
|
| 362 |
+
|
| 363 |
+
# History Load
|
| 364 |
+
history = await get_chat_history(session_id, db)
|
| 365 |
+
formatted_history = []
|
| 366 |
+
for chat in history:
|
| 367 |
+
formatted_history.append(HumanMessage(content=chat.human_message))
|
| 368 |
+
if chat.ai_message: formatted_history.append(AIMessage(content=chat.ai_message))
|
| 369 |
+
|
| 370 |
+
# LLM Call
|
| 371 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 372 |
+
("system", system_instruction),
|
| 373 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
| 374 |
+
("human", "{question}")
|
| 375 |
+
])
|
| 376 |
+
chain = prompt | llm
|
| 377 |
+
|
| 378 |
+
ai_response = await chain.ainvoke({"chat_history": formatted_history, "question": message})
|
| 379 |
+
response_text = ai_response.content
|
| 380 |
+
provider_name = "rag_fallback"
|
| 381 |
+
|
| 382 |
+
except Exception as e:
|
| 383 |
+
print(f"❌ Fallback Error: {e}")
|
| 384 |
+
response_text = "I am currently unable to process your request. Please check your AI configuration."
|
| 385 |
+
|
| 386 |
+
# 7. Save to DB
|
| 387 |
+
await save_chat_to_db(db, session_id, message, response_text, provider_name)
|
| 388 |
+
return response_text
|
| 389 |
+
# import json
|
| 390 |
+
# from sqlalchemy.ext.asyncio import AsyncSession
|
| 391 |
+
# from sqlalchemy.future import select
|
| 392 |
+
|
| 393 |
+
# # --- Model Imports ---
|
| 394 |
+
# from backend.src.models.chat import ChatHistory
|
| 395 |
+
# from backend.src.models.integration import UserIntegration
|
| 396 |
+
# from backend.src.models.user import User # Added User model for Bot Persona
|
| 397 |
+
|
| 398 |
+
# # --- Dynamic Factory & Tool Imports ---
|
| 399 |
+
# from backend.src.services.llm.factory import get_llm_model
|
| 400 |
+
# from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 401 |
+
# from backend.src.services.security.pii_scrubber import PIIScrubber
|
| 402 |
+
|
| 403 |
+
# # --- Agents ---
|
| 404 |
+
# from backend.src.services.tools.secure_agent import get_secure_agent
|
| 405 |
+
# from backend.src.services.tools.nosql_agent import get_nosql_agent
|
| 406 |
+
# from backend.src.services.tools.cms_agent import get_cms_agent
|
| 407 |
+
|
| 408 |
+
# # --- Router ---
|
| 409 |
+
# from backend.src.services.routing.semantic_router import SemanticRouter
|
| 410 |
+
|
| 411 |
+
# # --- LangChain Core ---
|
| 412 |
+
# from langchain_core.messages import HumanMessage, AIMessage
|
| 413 |
+
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 414 |
+
|
| 415 |
+
# # ==========================================
|
| 416 |
+
# # HELPER FUNCTIONS
|
| 417 |
+
# # ==========================================
|
| 418 |
+
|
| 419 |
+
# async def get_user_integrations(user_id: str, db: AsyncSession) -> dict:
|
| 420 |
+
# """Fetches active integrations and filters valid descriptions."""
|
| 421 |
+
# if not user_id: return {}
|
| 422 |
+
|
| 423 |
+
# query = select(UserIntegration).where(UserIntegration.user_id == user_id, UserIntegration.is_active == True)
|
| 424 |
+
# result = await db.execute(query)
|
| 425 |
+
# integrations = result.scalars().all()
|
| 426 |
+
|
| 427 |
+
# settings = {}
|
| 428 |
+
# for i in integrations:
|
| 429 |
+
# try:
|
| 430 |
+
# creds = json.loads(i.credentials)
|
| 431 |
+
# creds['provider'] = i.provider
|
| 432 |
+
# creds['schema_map'] = i.schema_map if i.schema_map else {}
|
| 433 |
+
|
| 434 |
+
# # --- STRICT CHECK ---
|
| 435 |
+
# # Agar Description NULL hai to dictionary mein mat daalo
|
| 436 |
+
# # Taake Router confuse na ho
|
| 437 |
+
# if i.profile_description:
|
| 438 |
+
# creds['description'] = i.profile_description
|
| 439 |
+
|
| 440 |
+
# settings[i.provider] = creds
|
| 441 |
+
# except (json.JSONDecodeError, TypeError):
|
| 442 |
+
# continue
|
| 443 |
+
# return settings
|
| 444 |
+
|
| 445 |
+
# async def save_chat_to_db(db: AsyncSession, session_id: str, human_msg: str, ai_msg: str, provider: str):
|
| 446 |
+
# """Saves chat history with PII redaction."""
|
| 447 |
+
# if not session_id: return
|
| 448 |
+
# safe_human = PIIScrubber.scrub(human_msg)
|
| 449 |
+
# safe_ai = PIIScrubber.scrub(ai_msg)
|
| 450 |
+
# new_chat = ChatHistory(
|
| 451 |
+
# session_id=session_id, human_message=safe_human, ai_message=safe_ai, provider=provider
|
| 452 |
+
# )
|
| 453 |
+
# db.add(new_chat)
|
| 454 |
+
# await db.commit()
|
| 455 |
+
|
| 456 |
+
# async def get_chat_history(session_id: str, db: AsyncSession):
|
| 457 |
+
# """Retrieves past conversation history."""
|
| 458 |
+
# if not session_id: return []
|
| 459 |
+
# query = select(ChatHistory).where(ChatHistory.session_id == session_id).order_by(ChatHistory.timestamp.asc())
|
| 460 |
+
# result = await db.execute(query)
|
| 461 |
+
# return result.scalars().all()
|
| 462 |
+
|
| 463 |
+
# async def get_bot_persona(user_id: str, db: AsyncSession):
|
| 464 |
+
# """Fetches custom Bot Name and Instructions from User table."""
|
| 465 |
+
# try:
|
| 466 |
+
# result = await db.execute(select(User).where(User.id == int(user_id)))
|
| 467 |
+
# user = result.scalars().first()
|
| 468 |
+
# if user:
|
| 469 |
+
# return {
|
| 470 |
+
# "name": getattr(user, "bot_name", "OmniAgent"),
|
| 471 |
+
# "instruction": getattr(user, "bot_instruction", "You are a helpful AI assistant.")
|
| 472 |
+
# }
|
| 473 |
+
# except Exception:
|
| 474 |
+
# pass
|
| 475 |
+
# return {"name": "OmniAgent", "instruction": "You are a helpful AI assistant."}
|
| 476 |
+
|
| 477 |
+
# # ==========================================
|
| 478 |
+
# # MAIN CHAT LOGIC
|
| 479 |
+
# # ==========================================
|
| 480 |
+
# async def process_chat(message: str, session_id: str, user_id: str, db: AsyncSession):
|
| 481 |
+
|
| 482 |
+
# # 1. Fetch User Settings & Persona
|
| 483 |
+
# user_settings = await get_user_integrations(user_id, db)
|
| 484 |
+
# bot_persona = await get_bot_persona(user_id, db)
|
| 485 |
+
|
| 486 |
+
# # 2. LLM Check
|
| 487 |
+
# llm_creds = user_settings.get('groq') or user_settings.get('openai')
|
| 488 |
+
# if not llm_creds:
|
| 489 |
+
# return "Please configure your AI Model in Settings."
|
| 490 |
+
|
| 491 |
+
# # 3. Build Tool Map for Router (STRICT FILTERING)
|
| 492 |
+
# tools_map = {}
|
| 493 |
+
# for provider, config in user_settings.items():
|
| 494 |
+
# if provider in ['sanity', 'sql', 'mongodb']:
|
| 495 |
+
# # Sirf tab add karo agar description exist karti hai
|
| 496 |
+
# if config.get('description'):
|
| 497 |
+
# tools_map[provider] = config['description']
|
| 498 |
+
# else:
|
| 499 |
+
# print(f"⚠️ [Router] Skipping {provider} - No Description found.")
|
| 500 |
+
|
| 501 |
+
# # 4. SEMANTIC DECISION (Router)
|
| 502 |
+
# selected_provider = None
|
| 503 |
+
# if tools_map:
|
| 504 |
+
# router = SemanticRouter() # Singleton Instance
|
| 505 |
+
# selected_provider = router.route(message, tools_map)
|
| 506 |
+
# else:
|
| 507 |
+
# print("⚠️ [Router] No active tools with descriptions found.")
|
| 508 |
+
|
| 509 |
+
# response_text = ""
|
| 510 |
+
# provider_name = "general_chat"
|
| 511 |
+
|
| 512 |
+
# # 5. Route to Winner (Tool Execution)
|
| 513 |
+
# if selected_provider:
|
| 514 |
+
# print(f"👉 [Router] Selected Tool: {selected_provider.upper()}")
|
| 515 |
+
# try:
|
| 516 |
+
# if selected_provider == 'sanity':
|
| 517 |
+
# schema = user_settings['sanity'].get('schema_map', {})
|
| 518 |
+
# agent = get_cms_agent(user_id=user_id, schema_map=schema, llm_credentials=llm_creds)
|
| 519 |
+
# res = await agent.ainvoke({"input": message})
|
| 520 |
+
# response_text = str(res.get('output', ''))
|
| 521 |
+
# provider_name = "cms_agent"
|
| 522 |
+
|
| 523 |
+
# elif selected_provider == 'sql':
|
| 524 |
+
# role = "admin" if user_id == '99' else "customer"
|
| 525 |
+
# agent = get_secure_agent(int(user_id), role, user_settings['sql'], llm_credentials=llm_creds)
|
| 526 |
+
# res = await agent.ainvoke({"input": message})
|
| 527 |
+
# response_text = str(res.get('output', ''))
|
| 528 |
+
# provider_name = "sql_agent"
|
| 529 |
+
|
| 530 |
+
# elif selected_provider == 'mongodb':
|
| 531 |
+
# agent = get_nosql_agent(user_id, user_settings['mongodb'], llm_credentials=llm_creds)
|
| 532 |
+
# res = await agent.ainvoke({"input": message})
|
| 533 |
+
# response_text = str(res.get('output', ''))
|
| 534 |
+
# provider_name = "nosql_agent"
|
| 535 |
+
|
| 536 |
+
# # Anti-Hallucination Check
|
| 537 |
+
# if not response_text or "error" in response_text.lower():
|
| 538 |
+
# print(f"⚠️ [Router] Tool {selected_provider} failed/empty. Triggering Fallback.")
|
| 539 |
+
# response_text = "" # Clears response to trigger fallback below
|
| 540 |
+
|
| 541 |
+
# except Exception as e:
|
| 542 |
+
# print(f"❌ [Router] Execution Failed: {e}")
|
| 543 |
+
# response_text = ""
|
| 544 |
+
|
| 545 |
+
# # 6. Fallback / RAG (General Chat)
|
| 546 |
+
# if not response_text:
|
| 547 |
+
# print("👉 [Router] Fallback to RAG/General Chat...")
|
| 548 |
+
# try:
|
| 549 |
+
# llm = get_llm_model(credentials=llm_creds)
|
| 550 |
+
|
| 551 |
+
# # Context from Vector DB
|
| 552 |
+
# context = ""
|
| 553 |
+
# if 'qdrant' in user_settings:
|
| 554 |
+
# try:
|
| 555 |
+
# vector_store = get_vector_store(credentials=user_settings['qdrant'])
|
| 556 |
+
# docs = await vector_store.asimilarity_search(message, k=3)
|
| 557 |
+
# if docs:
|
| 558 |
+
# context = "\n\n".join([d.page_content for d in docs])
|
| 559 |
+
# except Exception as e:
|
| 560 |
+
# print(f"⚠️ RAG Warning: {e}")
|
| 561 |
+
|
| 562 |
+
# # --- DYNAMIC SYSTEM PROMPT (PERSONA) ---
|
| 563 |
+
# system_instruction = f"""
|
| 564 |
+
# IDENTITY: You are '{bot_persona['name']}'.
|
| 565 |
+
# MISSION: {bot_persona['instruction']}
|
| 566 |
+
|
| 567 |
+
# CONTEXT FROM KNOWLEDGE BASE:
|
| 568 |
+
# {context if context else "No specific documents found."}
|
| 569 |
+
|
| 570 |
+
# Answer the user's question based on the context above or your general knowledge if permitted by your mission.
|
| 571 |
+
# """
|
| 572 |
+
|
| 573 |
+
# # History Load
|
| 574 |
+
# history = await get_chat_history(session_id, db)
|
| 575 |
+
# formatted_history = []
|
| 576 |
+
# for chat in history:
|
| 577 |
+
# formatted_history.append(HumanMessage(content=chat.human_message))
|
| 578 |
+
# if chat.ai_message: formatted_history.append(AIMessage(content=chat.ai_message))
|
| 579 |
+
|
| 580 |
+
# # LLM Call
|
| 581 |
+
# prompt = ChatPromptTemplate.from_messages([
|
| 582 |
+
# ("system", system_instruction),
|
| 583 |
+
# MessagesPlaceholder(variable_name="chat_history"),
|
| 584 |
+
# ("human", "{question}")
|
| 585 |
+
# ])
|
| 586 |
+
# chain = prompt | llm
|
| 587 |
+
|
| 588 |
+
# ai_response = await chain.ainvoke({"chat_history": formatted_history, "question": message})
|
| 589 |
+
# response_text = ai_response.content
|
| 590 |
+
# provider_name = "rag_fallback"
|
| 591 |
+
|
| 592 |
+
# except Exception as e:
|
| 593 |
+
# print(f"❌ Fallback Error: {e}")
|
| 594 |
+
# response_text = "I am currently unable to process your request. Please check your AI configuration."
|
| 595 |
+
|
| 596 |
+
# # 7. Save to DB
|
| 597 |
+
# await save_chat_to_db(db, session_id, message, response_text, provider_name)
|
| 598 |
+
# return response_text
|
backend/src/services/connectors/base.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import List, Dict, Any, Optional
|
| 3 |
+
|
| 4 |
+
class NoSQLConnector(ABC):
|
| 5 |
+
"""
|
| 6 |
+
Abstract Base Class for Universal NoSQL Connectivity.
|
| 7 |
+
Any database (Mongo, DynamoDB, Firebase) must implement these methods.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
@abstractmethod
|
| 11 |
+
def connect(self):
|
| 12 |
+
"""Establish connection to the database."""
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
@abstractmethod
|
| 16 |
+
def disconnect(self):
|
| 17 |
+
"""Close the connection."""
|
| 18 |
+
pass
|
| 19 |
+
|
| 20 |
+
@abstractmethod
|
| 21 |
+
def get_schema_summary(self) -> str:
|
| 22 |
+
"""
|
| 23 |
+
Returns a string description of collections and fields.
|
| 24 |
+
Crucial for the LLM to understand what to query.
|
| 25 |
+
"""
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
@abstractmethod
|
| 29 |
+
def find_one(self, collection: str, query: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
| 30 |
+
"""Retrieve a single document matching the query."""
|
| 31 |
+
pass
|
| 32 |
+
|
| 33 |
+
@abstractmethod
|
| 34 |
+
def find_many(self, collection: str, query: Dict[str, Any], limit: int = 5) -> List[Dict[str, Any]]:
|
| 35 |
+
"""Retrieve multiple documents matching the query."""
|
| 36 |
+
pass
|
backend/src/services/connectors/cms_base.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Dict, Any, List
|
| 3 |
+
|
| 4 |
+
class CMSBaseConnector(ABC):
|
| 5 |
+
"""
|
| 6 |
+
Abstract Interface for Headless CMS Integrations.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
@abstractmethod
|
| 10 |
+
def connect(self, credentials: Dict[str, str]) -> bool:
|
| 11 |
+
"""
|
| 12 |
+
Validate credentials and establish connection.
|
| 13 |
+
Returns True if successful.
|
| 14 |
+
"""
|
| 15 |
+
pass
|
| 16 |
+
|
| 17 |
+
@abstractmethod
|
| 18 |
+
def fetch_schema_structure(self) -> Dict[str, List[str]]:
|
| 19 |
+
"""
|
| 20 |
+
Introspects the CMS to find available Types and Fields.
|
| 21 |
+
Example Return: {'product': ['title', 'price'], 'author': ['name']}
|
| 22 |
+
"""
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
@abstractmethod
|
| 26 |
+
def execute_query(self, query: str) -> List[Dict[str, Any]]:
|
| 27 |
+
"""
|
| 28 |
+
Executes a raw query (GROQ, GraphQL) and returns JSON data.
|
| 29 |
+
"""
|
| 30 |
+
pass
|
backend/src/services/connectors/mongo_connector.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import pymongo
|
| 3 |
+
from typing import List, Dict, Any, Optional
|
| 4 |
+
from backend.src.services.connectors.base import NoSQLConnector
|
| 5 |
+
|
| 6 |
+
class MongoConnector(NoSQLConnector):
|
| 7 |
+
def __init__(self, credentials: Dict[str, str]):
|
| 8 |
+
"""
|
| 9 |
+
Initializes with user-specific credentials.
|
| 10 |
+
"""
|
| 11 |
+
# User ki di hui connection string use karega
|
| 12 |
+
# e.g., "mongodb+srv://user:pass@cluster..."
|
| 13 |
+
self.uri = credentials.get("url")
|
| 14 |
+
if not self.uri:
|
| 15 |
+
raise ValueError("MongoDB connection URL ('url') is missing in credentials.")
|
| 16 |
+
|
| 17 |
+
# Database ka naam URL se nikalne ki koshish (agar / ke baad hai)
|
| 18 |
+
# Ya credentials se direct le lo
|
| 19 |
+
self.db_name = credentials.get("database_name", self.uri.split("/")[-1].split("?")[0])
|
| 20 |
+
|
| 21 |
+
self.client = None
|
| 22 |
+
self.db = None
|
| 23 |
+
|
| 24 |
+
# SSL/TLS arguments for cloud databases like Atlas
|
| 25 |
+
self.connect_args = {
|
| 26 |
+
'tls': True,
|
| 27 |
+
'tlsAllowInvalidCertificates': True # Development ke liye OK, Production mein False hona chahiye
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
def connect(self):
|
| 31 |
+
if not self.client:
|
| 32 |
+
print(f"🔌 [NoSQL] Connecting to MongoDB Cluster...")
|
| 33 |
+
try:
|
| 34 |
+
# Use serverSelectionTimeoutMS to fail fast if connection is bad
|
| 35 |
+
self.client = pymongo.MongoClient(self.uri, serverSelectionTimeoutMS=5000, **self.connect_args)
|
| 36 |
+
# Ye line check karegi ke connection waqayi bana ya nahi
|
| 37 |
+
self.client.server_info()
|
| 38 |
+
self.db = self.client[self.db_name]
|
| 39 |
+
print("✅ [NoSQL] MongoDB Connection Successful.")
|
| 40 |
+
except pymongo.errors.ConnectionFailure as e:
|
| 41 |
+
print(f"❌ [NoSQL] MongoDB Connection Failed: {e}")
|
| 42 |
+
raise e
|
| 43 |
+
|
| 44 |
+
def disconnect(self):
|
| 45 |
+
if self.client:
|
| 46 |
+
self.client.close()
|
| 47 |
+
self.client = None
|
| 48 |
+
print("🔌 [NoSQL] Disconnected from MongoDB.")
|
| 49 |
+
|
| 50 |
+
def get_schema_summary(self) -> str:
|
| 51 |
+
self.connect()
|
| 52 |
+
summary = []
|
| 53 |
+
try:
|
| 54 |
+
collections = self.db.list_collection_names()
|
| 55 |
+
for col_name in collections:
|
| 56 |
+
sample = self.db[col_name].find_one()
|
| 57 |
+
if sample:
|
| 58 |
+
if '_id' in sample: del sample['_id']
|
| 59 |
+
keys = list(sample.keys())
|
| 60 |
+
summary.append(f"Collection: '{col_name}' -> Fields: {keys}")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return f"Error fetching schema: {e}"
|
| 63 |
+
return "\n".join(summary)
|
| 64 |
+
|
| 65 |
+
def find_one(self, collection: str, query: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
| 66 |
+
self.connect()
|
| 67 |
+
try:
|
| 68 |
+
result = self.db[collection].find_one(query)
|
| 69 |
+
if result and '_id' in result:
|
| 70 |
+
result['_id'] = str(result['_id'])
|
| 71 |
+
return result
|
| 72 |
+
except Exception as e:
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
def find_many(self, collection: str, query: Dict[str, Any], limit: int = 5) -> List[Dict[str, Any]]:
|
| 76 |
+
self.connect()
|
| 77 |
+
try:
|
| 78 |
+
cursor = self.db[collection].find(query).limit(limit)
|
| 79 |
+
results = [doc for doc in cursor]
|
| 80 |
+
for doc in results:
|
| 81 |
+
if '_id' in doc:
|
| 82 |
+
doc['_id'] = str(doc['_id'])
|
| 83 |
+
return results
|
| 84 |
+
except Exception as e:
|
| 85 |
+
return []
|
backend/src/services/connectors/sanity_connector.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
from urllib.parse import quote
|
| 5 |
+
from typing import Dict, List, Any
|
| 6 |
+
from backend.src.services.connectors.cms_base import CMSBaseConnector
|
| 7 |
+
|
| 8 |
+
class SanityConnector(CMSBaseConnector):
|
| 9 |
+
def __init__(self, credentials: Dict[str, str]):
|
| 10 |
+
self.project_id = credentials.get("project_id")
|
| 11 |
+
self.dataset = credentials.get("dataset")
|
| 12 |
+
self.token = credentials.get("token") # Read-only token
|
| 13 |
+
self.api_version = "v2021-10-21"
|
| 14 |
+
|
| 15 |
+
if not all([self.project_id, self.dataset, self.token]):
|
| 16 |
+
raise ValueError("Sanity credentials (project_id, dataset, token) are required.")
|
| 17 |
+
|
| 18 |
+
# Build the base URL for API calls
|
| 19 |
+
self.base_url = f"https://{self.project_id}.api.sanity.io/{self.api_version}/data/query/{self.dataset}"
|
| 20 |
+
self.headers = {"Authorization": f"Bearer {self.token}"}
|
| 21 |
+
|
| 22 |
+
self.is_connected = False
|
| 23 |
+
|
| 24 |
+
def connect(self, credentials: Dict[str, str] = None) -> bool:
|
| 25 |
+
"""Tests the connection by making a simple, non-data-intensive query."""
|
| 26 |
+
if not self.is_connected:
|
| 27 |
+
print(f"🔌 [Sanity] Connecting to Project ID: {self.project_id}...")
|
| 28 |
+
try:
|
| 29 |
+
# Test query to check credentials
|
| 30 |
+
test_query = '*[_type == "sanity.imageAsset"][0...1]'
|
| 31 |
+
response = requests.get(self.base_url, headers=self.headers, params={'query': test_query})
|
| 32 |
+
|
| 33 |
+
if response.status_code == 200:
|
| 34 |
+
self.is_connected = True
|
| 35 |
+
print("✅ [Sanity] Connection Successful.")
|
| 36 |
+
return True
|
| 37 |
+
else:
|
| 38 |
+
print(f"❌ [Sanity] Connection Failed. Status: {response.status_code}, Response: {response.text}")
|
| 39 |
+
return False
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"❌ [Sanity] Connection Failed: {e}")
|
| 42 |
+
return False
|
| 43 |
+
return True
|
| 44 |
+
|
| 45 |
+
def fetch_schema_structure(self) -> Dict[str, Any]:
|
| 46 |
+
"""
|
| 47 |
+
🕵️♂️ DEEP DISCOVERY: Fetches 1 sample of EVERY type to map the full nesting.
|
| 48 |
+
"""
|
| 49 |
+
if not self.is_connected: self.connect()
|
| 50 |
+
|
| 51 |
+
print("🕵️♂️ Starting Deep Schema Discovery...")
|
| 52 |
+
|
| 53 |
+
# Step 1: Get all unique document types (filtering out system types)
|
| 54 |
+
types_query = "array::unique(*[!(_id in path('_.**')) && !(_type match 'sanity.*')]._type)"
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
response = requests.get(self.base_url, headers=self.headers, params={'query': types_query})
|
| 58 |
+
if response.status_code != 200:
|
| 59 |
+
print(f"❌ Failed to fetch types: {response.text}")
|
| 60 |
+
return {}
|
| 61 |
+
|
| 62 |
+
user_types = response.json().get('result', [])
|
| 63 |
+
print(f"📋 Found Types: {user_types}")
|
| 64 |
+
|
| 65 |
+
schema_map = {}
|
| 66 |
+
|
| 67 |
+
# Step 2: Loop through each type and fetch ONE full document
|
| 68 |
+
for doc_type in user_types:
|
| 69 |
+
# Query: "Give me the first item of this type"
|
| 70 |
+
sample_query = f"*[_type == '{doc_type}'][0]"
|
| 71 |
+
sample_response = requests.get(self.base_url, headers=self.headers, params={'query': sample_query})
|
| 72 |
+
sample_doc = sample_response.json().get('result')
|
| 73 |
+
|
| 74 |
+
if sample_doc:
|
| 75 |
+
# Step 3: Recursively extract structure
|
| 76 |
+
structure = self._extract_structure(sample_doc)
|
| 77 |
+
schema_map[doc_type] = structure
|
| 78 |
+
|
| 79 |
+
print(f"✅ Full Database Map Created.")
|
| 80 |
+
return schema_map
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
print(f"❌ Schema Discovery Error: {e}")
|
| 84 |
+
return {}
|
| 85 |
+
|
| 86 |
+
def _extract_structure(self, doc: Any, depth=0) -> Any:
|
| 87 |
+
"""
|
| 88 |
+
Helper to map nested fields.
|
| 89 |
+
Real Data: {"store": {"price": 20}} -> Map: {"store": {"price": "Number"}}
|
| 90 |
+
"""
|
| 91 |
+
if depth > 3: return "..." # Stop infinite recursion
|
| 92 |
+
|
| 93 |
+
if isinstance(doc, dict):
|
| 94 |
+
structure = {}
|
| 95 |
+
for key, value in doc.items():
|
| 96 |
+
if key.startswith("_"): continue # Skip internal fields
|
| 97 |
+
structure[key] = self._extract_structure(value, depth + 1)
|
| 98 |
+
return structure
|
| 99 |
+
|
| 100 |
+
elif isinstance(doc, list):
|
| 101 |
+
# If list has items, check the first one to know what's inside
|
| 102 |
+
if len(doc) > 0:
|
| 103 |
+
return [self._extract_structure(doc[0], depth + 1)]
|
| 104 |
+
return "List[]"
|
| 105 |
+
|
| 106 |
+
elif isinstance(doc, (int, float)):
|
| 107 |
+
return "Number"
|
| 108 |
+
elif isinstance(doc, bool):
|
| 109 |
+
return "Boolean"
|
| 110 |
+
|
| 111 |
+
return "String"
|
| 112 |
+
|
| 113 |
+
def execute_query(self, query: str) -> List[Dict[str, Any]]:
|
| 114 |
+
"""Executes a GROQ query against the Sanity HTTP API."""
|
| 115 |
+
if not self.is_connected: self.connect()
|
| 116 |
+
|
| 117 |
+
print(f"🚀 [Sanity] Executing GROQ Query: {query}")
|
| 118 |
+
try:
|
| 119 |
+
# URL-encode the query to handle special characters
|
| 120 |
+
encoded_query = quote(query)
|
| 121 |
+
|
| 122 |
+
response = requests.get(f"{self.base_url}?query={encoded_query}", headers=self.headers)
|
| 123 |
+
|
| 124 |
+
if response.status_code == 200:
|
| 125 |
+
results = response.json().get('result')
|
| 126 |
+
if results is None: return []
|
| 127 |
+
return results if isinstance(results, list) else [results]
|
| 128 |
+
else:
|
| 129 |
+
print(f"❌ [Sanity] Query Failed. Status: {response.status_code}, Details: {response.text}")
|
| 130 |
+
return []
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"❌ [Sanity] Query execution error: {e}")
|
| 133 |
+
return []
|
backend/src/services/embeddings/factory.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/services/embeddings/factory.py
|
| 2 |
+
from langchain_community.embeddings import (
|
| 3 |
+
SentenceTransformerEmbeddings,
|
| 4 |
+
OpenAIEmbeddings,
|
| 5 |
+
)
|
| 6 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 7 |
+
from backend.src.core.config import settings
|
| 8 |
+
from functools import lru_cache
|
| 9 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 10 |
+
|
| 11 |
+
# Ye function cache karega, taake model baar baar load na ho
|
| 12 |
+
@lru_cache()
|
| 13 |
+
def get_embedding_model():
|
| 14 |
+
"""
|
| 15 |
+
Ye hamari "Embedding Factory" hai.
|
| 16 |
+
Ye config file ko padhti hai aur sahi embedding model load karti hai.
|
| 17 |
+
Modular design ka ye sabse ahem hissa hai.
|
| 18 |
+
"""
|
| 19 |
+
provider = settings.EMBEDDING_PROVIDER.lower()
|
| 20 |
+
model_name = settings.EMBEDDING_MODEL_NAME
|
| 21 |
+
|
| 22 |
+
print(f"INFO: Loading embedding model from provider: '{provider}' using model '{model_name}'")
|
| 23 |
+
|
| 24 |
+
if provider == "local":
|
| 25 |
+
# Ye model local computer par chalta hai. Koi API key nahi chahiye.
|
| 26 |
+
return HuggingFaceEmbeddings(
|
| 27 |
+
model_name=model_name,
|
| 28 |
+
# cache_folder="./models_cache" # Uncomment if you want to specify a cache folder
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
elif provider == "openai":
|
| 32 |
+
if not settings.OPENAI_API_KEY:
|
| 33 |
+
raise ValueError("OpenAI API key not found in .env file")
|
| 34 |
+
return OpenAIEmbeddings(
|
| 35 |
+
model=model_name,
|
| 36 |
+
openai_api_key=settings.OPENAI_API_KEY
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
elif provider == "google":
|
| 40 |
+
if not settings.GOOGLE_API_KEY:
|
| 41 |
+
raise ValueError("Google API key not found in .env file")
|
| 42 |
+
return GoogleGenerativeAIEmbeddings(
|
| 43 |
+
model=model_name,
|
| 44 |
+
google_api_key=settings.GOOGLE_API_KEY,
|
| 45 |
+
task_type="retrieval_document"
|
| 46 |
+
)
|
| 47 |
+
else:
|
| 48 |
+
raise ValueError(f"Unsupported embedding provider: {provider}")
|
backend/src/services/ingestion/crawler.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import requests
|
| 3 |
+
import numpy as np
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
from urllib.parse import urljoin
|
| 6 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 7 |
+
from backend.src.models.ingestion import IngestionJob, JobStatus
|
| 8 |
+
from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 9 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
+
from langchain_core.documents import Document
|
| 11 |
+
from qdrant_client.http import models
|
| 12 |
+
|
| 13 |
+
# --- NEW IMPORT ---
|
| 14 |
+
from backend.src.services.ingestion.guardrail_factory import predict_with_model
|
| 15 |
+
|
| 16 |
+
# --- CONFIGURATION ---
|
| 17 |
+
MAX_PAGES_LIMIT = 50
|
| 18 |
+
|
| 19 |
+
class SmartCrawler:
|
| 20 |
+
def __init__(self, job_id: int, url: str, session_id: str, crawl_type: str, db: AsyncSession):
|
| 21 |
+
self.job_id = job_id
|
| 22 |
+
self.root_url = url
|
| 23 |
+
self.session_id = session_id
|
| 24 |
+
self.crawl_type = crawl_type
|
| 25 |
+
self.db = db
|
| 26 |
+
self.visited = set()
|
| 27 |
+
self.vector_store = get_vector_store()
|
| 28 |
+
# YAHAN SE MODEL LOAD HATA DIYA
|
| 29 |
+
|
| 30 |
+
async def log_status(self, status: str, processed=0, total=0, error=None):
|
| 31 |
+
try:
|
| 32 |
+
job = await self.db.get(IngestionJob, self.job_id)
|
| 33 |
+
if job:
|
| 34 |
+
job.status = status
|
| 35 |
+
job.pages_processed = processed
|
| 36 |
+
job.total_pages_found = total
|
| 37 |
+
if error:
|
| 38 |
+
job.error_message = str(error)
|
| 39 |
+
await self.db.commit()
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"DB Log Error: {e}")
|
| 42 |
+
|
| 43 |
+
async def is_ai_unsafe(self, text: str, url: str) -> bool: # <--- Async bana diya
|
| 44 |
+
"""
|
| 45 |
+
Non-blocking AI Check using Factory.
|
| 46 |
+
"""
|
| 47 |
+
sample_text = text[:300] + " ... " + text[len(text)//2 : len(text)//2 + 300]
|
| 48 |
+
label = "This is an e-commerce product page with price, buy button, or shopping cart."
|
| 49 |
+
|
| 50 |
+
# --- FIX: Call Factory Async Function ---
|
| 51 |
+
# Ab ye server ko block nahi karega
|
| 52 |
+
scores = await predict_with_model(sample_text, label)
|
| 53 |
+
|
| 54 |
+
# Softmax Calculation
|
| 55 |
+
probs = np.exp(scores) / np.sum(np.exp(scores))
|
| 56 |
+
entailment_score = probs[1]
|
| 57 |
+
|
| 58 |
+
print("\n" + "="*60)
|
| 59 |
+
print(f"🤖 AI ANALYSIS REPORT for: {url}")
|
| 60 |
+
print("-" * 60)
|
| 61 |
+
print(f"📊 Scores -> Contradiction: {probs[0]:.2f}, Entailment: {probs[1]:.2f}, Neutral: {probs[2]:.2f}")
|
| 62 |
+
print(f"🎯 Target Score (Entailment): {entailment_score:.4f} (Threshold: 0.5)")
|
| 63 |
+
|
| 64 |
+
if entailment_score > 0.5:
|
| 65 |
+
print(f"⛔ DECISION: BLOCKED")
|
| 66 |
+
print("="*60 + "\n")
|
| 67 |
+
return True
|
| 68 |
+
else:
|
| 69 |
+
print(f"✅ DECISION: ALLOWED")
|
| 70 |
+
print("="*60 + "\n")
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
async def fetch_page(self, url: str):
|
| 74 |
+
try:
|
| 75 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
|
| 76 |
+
return await asyncio.to_thread(requests.get, url, headers=headers, timeout=10)
|
| 77 |
+
except Exception:
|
| 78 |
+
return None
|
| 79 |
+
|
| 80 |
+
async def clean_existing_data(self):
|
| 81 |
+
print(f"INFO: Cleaning old data for source: {self.root_url}")
|
| 82 |
+
try:
|
| 83 |
+
self.vector_store.client.delete(
|
| 84 |
+
collection_name=self.vector_store.collection_name,
|
| 85 |
+
points_selector=models.FilterSelector(
|
| 86 |
+
filter=models.Filter(
|
| 87 |
+
must=[
|
| 88 |
+
models.FieldCondition(
|
| 89 |
+
key="metadata.source",
|
| 90 |
+
match=models.MatchValue(value=self.root_url)
|
| 91 |
+
)
|
| 92 |
+
]
|
| 93 |
+
)
|
| 94 |
+
)
|
| 95 |
+
)
|
| 96 |
+
except Exception as e:
|
| 97 |
+
print(f"Warning: Clean data failed: {e}")
|
| 98 |
+
|
| 99 |
+
async def process_page(self, url: str, soup: BeautifulSoup) -> bool:
|
| 100 |
+
for script in soup(["script", "style", "nav", "footer", "iframe", "noscript", "svg"]):
|
| 101 |
+
script.extract()
|
| 102 |
+
|
| 103 |
+
text = soup.get_text(separator=" ", strip=True)
|
| 104 |
+
|
| 105 |
+
if len(text) < 200:
|
| 106 |
+
print(f"⚠️ Skipping {url} (Not enough text: {len(text)} chars)")
|
| 107 |
+
return False
|
| 108 |
+
|
| 109 |
+
# --- AWAIT HERE ---
|
| 110 |
+
# Ab hum 'await' use kar rahe hain taake ye background mein chale
|
| 111 |
+
if await self.is_ai_unsafe(text, url):
|
| 112 |
+
return False
|
| 113 |
+
|
| 114 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 115 |
+
docs = [Document(page_content=text, metadata={
|
| 116 |
+
"source": self.root_url,
|
| 117 |
+
"specific_url": url,
|
| 118 |
+
"session_id": self.session_id,
|
| 119 |
+
"type": "web_scrape"
|
| 120 |
+
})]
|
| 121 |
+
split_docs = splitter.split_documents(docs)
|
| 122 |
+
|
| 123 |
+
await self.vector_store.aadd_documents(split_docs)
|
| 124 |
+
return True
|
| 125 |
+
|
| 126 |
+
async def start(self):
|
| 127 |
+
try:
|
| 128 |
+
await self.log_status(JobStatus.PROCESSING)
|
| 129 |
+
await self.clean_existing_data()
|
| 130 |
+
|
| 131 |
+
queue = [self.root_url]
|
| 132 |
+
self.visited.add(self.root_url)
|
| 133 |
+
total_processed = 0
|
| 134 |
+
|
| 135 |
+
while queue and total_processed < MAX_PAGES_LIMIT:
|
| 136 |
+
current_url = queue.pop(0)
|
| 137 |
+
|
| 138 |
+
response = await self.fetch_page(current_url)
|
| 139 |
+
if not response or response.status_code != 200:
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 143 |
+
|
| 144 |
+
success = await self.process_page(current_url, soup)
|
| 145 |
+
|
| 146 |
+
if not success:
|
| 147 |
+
if current_url == self.root_url:
|
| 148 |
+
await self.log_status(JobStatus.FAILED, error="Root URL blocked. Identified as E-commerce.")
|
| 149 |
+
return
|
| 150 |
+
continue
|
| 151 |
+
|
| 152 |
+
total_processed += 1
|
| 153 |
+
|
| 154 |
+
if self.crawl_type == "full_site":
|
| 155 |
+
for link in soup.find_all('a', href=True):
|
| 156 |
+
full_link = urljoin(self.root_url, link['href'])
|
| 157 |
+
if self.root_url in full_link and full_link not in self.visited:
|
| 158 |
+
self.visited.add(full_link)
|
| 159 |
+
queue.append(full_link)
|
| 160 |
+
|
| 161 |
+
await self.log_status(JobStatus.PROCESSING, processed=total_processed, total=len(queue)+total_processed)
|
| 162 |
+
await asyncio.sleep(0.5)
|
| 163 |
+
|
| 164 |
+
await self.log_status(JobStatus.COMPLETED, processed=total_processed)
|
| 165 |
+
print(f"SUCCESS: Crawling finished. Processed {total_processed} pages.")
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"ERROR: Crawling failed: {e}")
|
| 169 |
+
await self.log_status(JobStatus.FAILED, error=str(e))
|
backend/src/services/ingestion/file_processor.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/services/ingestion/file_processor.py
|
| 2 |
+
import os
|
| 3 |
+
import asyncio
|
| 4 |
+
# Specific Stable Loaders
|
| 5 |
+
from langchain_community.document_loaders import (
|
| 6 |
+
TextLoader,
|
| 7 |
+
PyPDFLoader,
|
| 8 |
+
CSVLoader,
|
| 9 |
+
Docx2txtLoader,
|
| 10 |
+
UnstructuredMarkdownLoader
|
| 11 |
+
)
|
| 12 |
+
# Fallback loader (agar upar walon mein se koi na ho)
|
| 13 |
+
from langchain_community.document_loaders import UnstructuredFileLoader
|
| 14 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 15 |
+
from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 16 |
+
|
| 17 |
+
def get_loader(file_path: str):
|
| 18 |
+
"""
|
| 19 |
+
Factory function jo file extension ke hisaab se
|
| 20 |
+
sabse stable loader return karta hai.
|
| 21 |
+
"""
|
| 22 |
+
ext = os.path.splitext(file_path)[1].lower()
|
| 23 |
+
|
| 24 |
+
if ext == ".txt":
|
| 25 |
+
# TextLoader sabse fast aur safe hai
|
| 26 |
+
return TextLoader(file_path, encoding="utf-8")
|
| 27 |
+
|
| 28 |
+
elif ext == ".pdf":
|
| 29 |
+
# PyPDFLoader pure python hai, hang nahi hota
|
| 30 |
+
return PyPDFLoader(file_path)
|
| 31 |
+
|
| 32 |
+
elif ext == ".csv":
|
| 33 |
+
return CSVLoader(file_path, encoding="utf-8")
|
| 34 |
+
|
| 35 |
+
elif ext in [".doc", ".docx"]:
|
| 36 |
+
# Docx2txtLoader light hai
|
| 37 |
+
return Docx2txtLoader(file_path)
|
| 38 |
+
|
| 39 |
+
elif ext == ".md":
|
| 40 |
+
# Markdown ko hum TextLoader se bhi parh sakte hain agar Unstructured tang kare
|
| 41 |
+
return TextLoader(file_path, encoding="utf-8")
|
| 42 |
+
|
| 43 |
+
else:
|
| 44 |
+
# Agar koi ajeeb format ho, tab hum Heavy 'Unstructured' loader try karenge
|
| 45 |
+
print(f"INFO: Unknown format '{ext}', attempting to use UnstructuredFileLoader...")
|
| 46 |
+
return UnstructuredFileLoader(file_path)
|
| 47 |
+
|
| 48 |
+
async def process_file(file_path: str, session_id: str):
|
| 49 |
+
"""
|
| 50 |
+
Processes a single uploaded file and adds it to the Vector DB.
|
| 51 |
+
Supports: TXT, PDF, CSV, DOCX, MD and others.
|
| 52 |
+
"""
|
| 53 |
+
print(f"INFO: [Ingestion] Starting processing for file: {file_path}")
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
# 1. Sahi Loader select karein
|
| 57 |
+
loader = get_loader(file_path)
|
| 58 |
+
|
| 59 |
+
# 2. File Load karein (Thread mein taake server block na ho)
|
| 60 |
+
# Note: 'aload()' har loader ke paas nahi hota, isliye hum standard 'load()' ko async wrap karte hain
|
| 61 |
+
docs = await asyncio.to_thread(loader.load)
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"ERROR: [Ingestion] Failed to load file {file_path}: {e}")
|
| 65 |
+
return 0
|
| 66 |
+
|
| 67 |
+
if not docs:
|
| 68 |
+
print(f"WARNING: [Ingestion] Could not extract any content from {file_path}")
|
| 69 |
+
return 0
|
| 70 |
+
|
| 71 |
+
# 3. Document ko chunks mein todein
|
| 72 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 73 |
+
chunk_size=1000,
|
| 74 |
+
chunk_overlap=200,
|
| 75 |
+
length_function=len
|
| 76 |
+
)
|
| 77 |
+
split_docs = text_splitter.split_documents(docs)
|
| 78 |
+
|
| 79 |
+
# Metadata update (Source tracking ke liye)
|
| 80 |
+
for doc in split_docs:
|
| 81 |
+
doc.metadata["session_id"] = session_id
|
| 82 |
+
doc.metadata["file_name"] = os.path.basename(file_path)
|
| 83 |
+
# Extension bhi store kar lete hain filter karne ke liye
|
| 84 |
+
doc.metadata["file_type"] = os.path.splitext(file_path)[1].lower()
|
| 85 |
+
|
| 86 |
+
# 4. Qdrant mein upload karein
|
| 87 |
+
try:
|
| 88 |
+
vector_store = get_vector_store()
|
| 89 |
+
await vector_store.aadd_documents(split_docs)
|
| 90 |
+
print(f"SUCCESS: [Ingestion] Processed {len(split_docs)} chunks from {file_path}")
|
| 91 |
+
return len(split_docs)
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"ERROR: [Ingestion] Failed to upload to Qdrant: {e}")
|
| 94 |
+
return 0
|
backend/src/services/ingestion/guardrail_factory.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import CrossEncoder
|
| 2 |
+
from functools import lru_cache
|
| 3 |
+
import asyncio
|
| 4 |
+
|
| 5 |
+
# Global Cache
|
| 6 |
+
_model_instance = None
|
| 7 |
+
|
| 8 |
+
def get_guardrail_model():
|
| 9 |
+
"""
|
| 10 |
+
Model ko sirf ek baar load karega.
|
| 11 |
+
"""
|
| 12 |
+
global _model_instance
|
| 13 |
+
if _model_instance is None:
|
| 14 |
+
print("⏳ INFO: Loading AI Guardrail Model into RAM (First Time Only)...")
|
| 15 |
+
# 'nli-distilroberta-base' thoda heavy hai, agar PC slow hai to 'cross-encoder/ms-marco-TinyBERT-L-2' use karein
|
| 16 |
+
_model_instance = CrossEncoder('cross-encoder/nli-distilroberta-base')
|
| 17 |
+
print("✅ INFO: AI Guardrail Model Loaded!")
|
| 18 |
+
return _model_instance
|
| 19 |
+
|
| 20 |
+
async def predict_with_model(text, label):
|
| 21 |
+
"""
|
| 22 |
+
Prediction ko background thread mein chalata hai taake server hang na ho.
|
| 23 |
+
"""
|
| 24 |
+
model = get_guardrail_model()
|
| 25 |
+
|
| 26 |
+
# Ye line magic hai: Heavy kaam ko alag thread mein bhej do
|
| 27 |
+
scores = await asyncio.to_thread(model.predict, [(text, label)])
|
| 28 |
+
return scores[0]
|
backend/src/services/ingestion/web_processor.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from langchain_community.document_loaders import WebBaseLoader
|
| 3 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 4 |
+
from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 5 |
+
|
| 6 |
+
async def process_url(url: str, session_id: str):
|
| 7 |
+
"""
|
| 8 |
+
Ek URL se data scrape karta hai, chunks banata hai aur Qdrant mein save karta hai.
|
| 9 |
+
"""
|
| 10 |
+
print(f"INFO: [Ingestion] Starting scraping for URL: {url}")
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
# 1. Load Data from URL
|
| 14 |
+
# Hum loader ko async thread mein chalayenge taake server block na ho
|
| 15 |
+
def load_data():
|
| 16 |
+
loader = WebBaseLoader(url)
|
| 17 |
+
return loader.load()
|
| 18 |
+
|
| 19 |
+
docs = await asyncio.to_thread(load_data)
|
| 20 |
+
|
| 21 |
+
if not docs:
|
| 22 |
+
print(f"WARNING: [Ingestion] No content found at {url}")
|
| 23 |
+
return 0
|
| 24 |
+
|
| 25 |
+
print(f"INFO: [Ingestion] Successfully fetched content. Length: {len(docs[0].page_content)} chars.")
|
| 26 |
+
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"ERROR: [Ingestion] Failed to scrape URL {url}: {e}")
|
| 29 |
+
raise e # Error upar bhejenge taake API user ko bata sake
|
| 30 |
+
|
| 31 |
+
# 2. Split Text into Chunks
|
| 32 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 33 |
+
chunk_size=1000,
|
| 34 |
+
chunk_overlap=200,
|
| 35 |
+
length_function=len
|
| 36 |
+
)
|
| 37 |
+
split_docs = text_splitter.split_documents(docs)
|
| 38 |
+
|
| 39 |
+
# 3. Add Metadata (Bohat Zaroori)
|
| 40 |
+
for doc in split_docs:
|
| 41 |
+
doc.metadata["session_id"] = session_id
|
| 42 |
+
doc.metadata["source"] = url # Taake pata chale ye data kahan se aaya
|
| 43 |
+
doc.metadata["type"] = "web_scrape"
|
| 44 |
+
|
| 45 |
+
# 4. Save to Qdrant
|
| 46 |
+
try:
|
| 47 |
+
vector_store = get_vector_store()
|
| 48 |
+
await vector_store.aadd_documents(split_docs)
|
| 49 |
+
print(f"SUCCESS: [Ingestion] Processed {len(split_docs)} chunks from {url}")
|
| 50 |
+
return len(split_docs)
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"ERROR: [Ingestion] Failed to upload to Qdrant: {e}")
|
| 53 |
+
return 0
|
backend/src/services/ingestion/zip_processor.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import zipfile
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import asyncio
|
| 5 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 6 |
+
from backend.src.models.ingestion import IngestionJob, JobStatus
|
| 7 |
+
from backend.src.services.ingestion.file_processor import process_file
|
| 8 |
+
from backend.src.services.vector_store.qdrant_adapter import get_vector_store
|
| 9 |
+
from qdrant_client.http import models
|
| 10 |
+
|
| 11 |
+
# --- CONFIGURATION ---
|
| 12 |
+
SUPPORTED_EXTENSIONS = ['.pdf', '.txt', '.md', '.docx', '.csv']
|
| 13 |
+
MAX_FILES_IN_ZIP = 500
|
| 14 |
+
|
| 15 |
+
class SmartZipProcessor:
|
| 16 |
+
def __init__(self, job_id: int, zip_path: str, session_id: str, db: AsyncSession):
|
| 17 |
+
self.job_id = job_id
|
| 18 |
+
self.zip_path = zip_path
|
| 19 |
+
self.session_id = session_id
|
| 20 |
+
self.db = db
|
| 21 |
+
self.vector_store = get_vector_store()
|
| 22 |
+
self.temp_dir = f"./temp_unzip_{job_id}"
|
| 23 |
+
self.report = []
|
| 24 |
+
|
| 25 |
+
async def log_status(self, status: str, processed=0, total=0, error=None):
|
| 26 |
+
"""Database mein job status update karta hai"""
|
| 27 |
+
try:
|
| 28 |
+
job = await self.db.get(IngestionJob, self.job_id)
|
| 29 |
+
if job:
|
| 30 |
+
job.status = status
|
| 31 |
+
job.items_processed = processed
|
| 32 |
+
job.total_items = total
|
| 33 |
+
job.details = self.report # Report bhi save karo
|
| 34 |
+
if error:
|
| 35 |
+
job.error_message = str(error)
|
| 36 |
+
await self.db.commit()
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"DB Log Error: {e}")
|
| 39 |
+
|
| 40 |
+
async def clean_existing_data(self):
|
| 41 |
+
"""Update Logic: Is session ka purana data saaf karo"""
|
| 42 |
+
print(f"INFO: Cleaning old data for session_id: {self.session_id}")
|
| 43 |
+
try:
|
| 44 |
+
self.vector_store.client.delete(
|
| 45 |
+
collection_name=self.vector_store.collection_name,
|
| 46 |
+
points_selector=models.FilterSelector(
|
| 47 |
+
filter=models.Filter(
|
| 48 |
+
must=[
|
| 49 |
+
models.FieldCondition(
|
| 50 |
+
key="metadata.session_id",
|
| 51 |
+
match=models.MatchValue(value=self.session_id)
|
| 52 |
+
)
|
| 53 |
+
]
|
| 54 |
+
)
|
| 55 |
+
)
|
| 56 |
+
)
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"Warning: Clean data failed (maybe first upload): {e}")
|
| 59 |
+
|
| 60 |
+
def inspect_zip(self) -> list:
|
| 61 |
+
"""Zip ko bina extract kiye check karta hai"""
|
| 62 |
+
with zipfile.ZipFile(self.zip_path, 'r') as zf:
|
| 63 |
+
file_list = zf.infolist()
|
| 64 |
+
|
| 65 |
+
# Guardrail 1: File Count
|
| 66 |
+
if len(file_list) > MAX_FILES_IN_ZIP:
|
| 67 |
+
raise ValueError(f"Zip contains too many files ({len(file_list)}). Max allowed is {MAX_FILES_IN_ZIP}.")
|
| 68 |
+
|
| 69 |
+
# Sirf "Files" return karo, folders nahi
|
| 70 |
+
return [f for f in file_list if not f.is_dir()]
|
| 71 |
+
|
| 72 |
+
def extract_zip(self):
|
| 73 |
+
"""Zip ko temp folder mein extract karta hai"""
|
| 74 |
+
os.makedirs(self.temp_dir, exist_ok=True)
|
| 75 |
+
with zipfile.ZipFile(self.zip_path, 'r') as zf:
|
| 76 |
+
zf.extractall(self.temp_dir)
|
| 77 |
+
|
| 78 |
+
def cleanup(self):
|
| 79 |
+
"""Temp files/folders delete karta hai"""
|
| 80 |
+
if os.path.exists(self.temp_dir):
|
| 81 |
+
shutil.rmtree(self.temp_dir)
|
| 82 |
+
if os.path.exists(self.zip_path):
|
| 83 |
+
os.remove(self.zip_path)
|
| 84 |
+
|
| 85 |
+
async def start(self):
|
| 86 |
+
"""Main Processing Loop"""
|
| 87 |
+
try:
|
| 88 |
+
# Step 1: Inspect
|
| 89 |
+
files_to_process = self.inspect_zip()
|
| 90 |
+
total_files = len(files_to_process)
|
| 91 |
+
await self.log_status(JobStatus.PROCESSING, total=total_files)
|
| 92 |
+
|
| 93 |
+
# Step 2: Clean old data (Atomic Update)
|
| 94 |
+
await self.clean_existing_data()
|
| 95 |
+
|
| 96 |
+
# Step 3: Extract
|
| 97 |
+
self.extract_zip()
|
| 98 |
+
|
| 99 |
+
# Step 4: Process each file
|
| 100 |
+
processed_count = 0
|
| 101 |
+
for file_info in files_to_process:
|
| 102 |
+
file_path = os.path.join(self.temp_dir, file_info.filename)
|
| 103 |
+
|
| 104 |
+
# Guardrail 2: Supported Extension
|
| 105 |
+
ext = os.path.splitext(file_path)[1].lower()
|
| 106 |
+
if ext not in SUPPORTED_EXTENSIONS:
|
| 107 |
+
self.report.append({"file": file_info.filename, "status": "skipped", "reason": "unsupported_type"})
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
# Process the file
|
| 111 |
+
try:
|
| 112 |
+
# process_file (jo humne pehle banaya tha) ko call karo
|
| 113 |
+
chunks_added = await process_file(file_path, self.session_id)
|
| 114 |
+
if chunks_added > 0:
|
| 115 |
+
self.report.append({"file": file_info.filename, "status": "success", "chunks": chunks_added})
|
| 116 |
+
else:
|
| 117 |
+
raise ValueError("No content extracted")
|
| 118 |
+
except Exception as e:
|
| 119 |
+
self.report.append({"file": file_info.filename, "status": "failed", "reason": str(e)})
|
| 120 |
+
|
| 121 |
+
processed_count += 1
|
| 122 |
+
await self.log_status(JobStatus.PROCESSING, processed=processed_count, total=total_files)
|
| 123 |
+
await asyncio.sleep(0.1) # Thoda saans lene do
|
| 124 |
+
|
| 125 |
+
await self.log_status(JobStatus.COMPLETED, processed=processed_count, total=total_files)
|
| 126 |
+
print(f"SUCCESS: Zip processing finished. Processed {processed_count}/{total_files} files.")
|
| 127 |
+
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"ERROR: Zip processing failed: {e}")
|
| 130 |
+
await self.log_status(JobStatus.FAILED, error=str(e))
|
| 131 |
+
finally:
|
| 132 |
+
self.cleanup()
|
backend/src/services/llm/factory.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 3 |
+
from langchain_openai import ChatOpenAI
|
| 4 |
+
from backend.src.core.config import settings
|
| 5 |
+
|
| 6 |
+
def get_llm_model(credentials: dict = None):
|
| 7 |
+
"""
|
| 8 |
+
True Universal Factory (Fixed).
|
| 9 |
+
Ab ye provider ke hisaab se sahi 'base_url' set karega.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
# --- Default settings (Fallback) ---
|
| 13 |
+
llm_provider = settings.LLM_PROVIDER.lower()
|
| 14 |
+
llm_model_name = settings.LLM_MODEL_NAME
|
| 15 |
+
llm_base_url = settings.LLM_BASE_URL
|
| 16 |
+
llm_api_key = settings.LLM_API_KEY
|
| 17 |
+
google_api_key = settings.GOOGLE_API_KEY
|
| 18 |
+
|
| 19 |
+
# --- User-specific settings (Override) ---
|
| 20 |
+
if credentials:
|
| 21 |
+
# User ki settings use karo
|
| 22 |
+
llm_provider = credentials.get("provider", llm_provider).lower()
|
| 23 |
+
llm_model_name = credentials.get("model_name", llm_model_name)
|
| 24 |
+
llm_base_url = credentials.get("base_url", llm_base_url)
|
| 25 |
+
llm_api_key = credentials.get("api_key", llm_api_key)
|
| 26 |
+
|
| 27 |
+
# Google ke liye
|
| 28 |
+
if llm_provider == "google":
|
| 29 |
+
google_api_key = llm_api_key
|
| 30 |
+
|
| 31 |
+
# --- MAGIC FIX: Set Base URL for known providers ---
|
| 32 |
+
if llm_provider == "groq" and not llm_base_url:
|
| 33 |
+
llm_base_url = "https://api.groq.com/openai/v1"
|
| 34 |
+
# Groq key .env se le lo agar user ne nahi di (fallback)
|
| 35 |
+
llm_api_key = llm_api_key or settings.GROQ_API_KEY
|
| 36 |
+
|
| 37 |
+
print(f"🤖 Loading AI Model: {llm_provider} -> {llm_model_name}")
|
| 38 |
+
|
| 39 |
+
# --- BLOCK 1: GOOGLE GEMINI ---
|
| 40 |
+
if llm_provider == "google":
|
| 41 |
+
if not google_api_key:
|
| 42 |
+
raise ValueError("Google API key not found.")
|
| 43 |
+
return ChatGoogleGenerativeAI(
|
| 44 |
+
model=llm_model_name,
|
| 45 |
+
google_api_key=google_api_key,
|
| 46 |
+
temperature=0.7,
|
| 47 |
+
convert_system_message_to_human=True
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# --- BLOCK 2: UNIVERSAL OPENAI-COMPATIBLE ---
|
| 51 |
+
# Ye block Groq, OpenAI, Ollama, etc. sabko handle karega
|
| 52 |
+
else:
|
| 53 |
+
if not llm_api_key and "localhost" not in (llm_base_url or ""):
|
| 54 |
+
print("⚠️ WARNING: No API Key provided for LLM. Trying global fallback.")
|
| 55 |
+
# Fallback to global keys
|
| 56 |
+
if settings.OPENAI_API_KEY and llm_provider == "openai":
|
| 57 |
+
llm_api_key = settings.OPENAI_API_KEY
|
| 58 |
+
|
| 59 |
+
print(f" -> Endpoint URL: {llm_base_url or 'Default OpenAI'}")
|
| 60 |
+
|
| 61 |
+
return ChatOpenAI(
|
| 62 |
+
model_name=llm_model_name,
|
| 63 |
+
api_key=llm_api_key or "dummy-key",
|
| 64 |
+
openai_api_base=llm_base_url,
|
| 65 |
+
temperature=0.7
|
| 66 |
+
)
|
backend/src/services/routing/semantic_router.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
class SemanticRouter:
|
| 6 |
+
_instance = None
|
| 7 |
+
_model = None
|
| 8 |
+
|
| 9 |
+
def __new__(cls):
|
| 10 |
+
if cls._instance is None:
|
| 11 |
+
cls._instance = super(SemanticRouter, cls).__new__(cls)
|
| 12 |
+
print("🧠 [Router] Loading Multilingual Embedding Model...")
|
| 13 |
+
# --- CHANGE IS HERE ---
|
| 14 |
+
# Ye model Hindi/Urdu/English sab samajhta hai
|
| 15 |
+
cls._model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
|
| 16 |
+
print("✅ [Router] Multilingual Model Loaded.")
|
| 17 |
+
return cls._instance
|
| 18 |
+
|
| 19 |
+
def route(self, query: str, tools_map: dict) -> str | None:
|
| 20 |
+
if not tools_map:
|
| 21 |
+
return None
|
| 22 |
+
|
| 23 |
+
tool_names = list(tools_map.keys())
|
| 24 |
+
descriptions = list(tools_map.values())
|
| 25 |
+
|
| 26 |
+
# Encode (Query + Descriptions)
|
| 27 |
+
all_texts = [query] + descriptions
|
| 28 |
+
embeddings = self._model.encode(all_texts)
|
| 29 |
+
|
| 30 |
+
query_vec = embeddings[0].reshape(1, -1)
|
| 31 |
+
tool_vecs = embeddings[1:]
|
| 32 |
+
|
| 33 |
+
# Scores Calculate karo
|
| 34 |
+
scores = cosine_similarity(query_vec, tool_vecs)[0]
|
| 35 |
+
|
| 36 |
+
# Debugging Print
|
| 37 |
+
print(f"\n📊 [Router Logic] Query: '{query}'")
|
| 38 |
+
for name, score in zip(tool_names, scores):
|
| 39 |
+
print(f" 🔹 {name}: {score:.4f}")
|
| 40 |
+
|
| 41 |
+
best_idx = np.argmax(scores)
|
| 42 |
+
best_score = scores[best_idx]
|
| 43 |
+
best_tool = tool_names[best_idx]
|
| 44 |
+
|
| 45 |
+
# --- THRESHOLD ADJUSTMENT ---
|
| 46 |
+
# Hinglish/Multilingual matching ke liye score thoda kam aata hai.
|
| 47 |
+
# Hum 0.05 rakhenge taake agar halka sa bhi match ho to pakad le.
|
| 48 |
+
if best_score < 0.05:
|
| 49 |
+
print(f"⛔ [Router] Score too low ({best_score:.4f} < 0.05). Fallback.")
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
return best_tool
|
backend/src/services/security/pii_scrubber.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
class SecurityException(Exception):
|
| 5 |
+
"""Custom exception for security violations like prompt injection."""
|
| 6 |
+
pass
|
| 7 |
+
|
| 8 |
+
class PIIScrubber:
|
| 9 |
+
# Pre-compiling Regex patterns for performance
|
| 10 |
+
|
| 11 |
+
# Email: Standard pattern
|
| 12 |
+
EMAIL_REGEX = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
|
| 13 |
+
|
| 14 |
+
# Phone: Matches +1-555-555-5555, (555) 555-5555, 555 555 5555
|
| 15 |
+
# Logic: Look for digits with common separators, length approx 10-15
|
| 16 |
+
PHONE_REGEX = re.compile(r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b')
|
| 17 |
+
|
| 18 |
+
# Credit Card: Matches 13-16 digits, with potential dashes or spaces
|
| 19 |
+
# Logic: Look for groups of 4 digits or continuous strings
|
| 20 |
+
CREDIT_CARD_REGEX = re.compile(r'\b(?:\d{4}[-\s]?){3}\d{4}\b|\b\d{13,16}\b')
|
| 21 |
+
|
| 22 |
+
# IPv4 Address: 0.0.0.0 to 255.255.255.255
|
| 23 |
+
IP_REGEX = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')
|
| 24 |
+
|
| 25 |
+
# Basic Injection Keywords (Lowercased for case-insensitive check)
|
| 26 |
+
INJECTION_KEYWORDS = [
|
| 27 |
+
"ignore all previous instructions",
|
| 28 |
+
"ignore previous instructions",
|
| 29 |
+
"system override",
|
| 30 |
+
"delete database",
|
| 31 |
+
"drop table",
|
| 32 |
+
"you are now",
|
| 33 |
+
"bypass security"
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
@staticmethod
|
| 37 |
+
def scrub(text: str) -> str:
|
| 38 |
+
"""
|
| 39 |
+
Sanitizes the input text by replacing PII with placeholders.
|
| 40 |
+
"""
|
| 41 |
+
if not text:
|
| 42 |
+
return ""
|
| 43 |
+
|
| 44 |
+
# Apply redactions sequentially
|
| 45 |
+
scrubbed_text = text
|
| 46 |
+
scrubbed_text = PIIScrubber.EMAIL_REGEX.sub("[EMAIL_REDACTED]", scrubbed_text)
|
| 47 |
+
scrubbed_text = PIIScrubber.PHONE_REGEX.sub("[PHONE_REDACTED]", scrubbed_text)
|
| 48 |
+
scrubbed_text = PIIScrubber.CREDIT_CARD_REGEX.sub("[CC_REDACTED]", scrubbed_text)
|
| 49 |
+
scrubbed_text = PIIScrubber.IP_REGEX.sub("[IP_REDACTED]", scrubbed_text)
|
| 50 |
+
|
| 51 |
+
return scrubbed_text
|
| 52 |
+
|
| 53 |
+
@staticmethod
|
| 54 |
+
def check_for_injection(text: str) -> Tuple[bool, str]:
|
| 55 |
+
"""
|
| 56 |
+
Checks for basic Prompt Injection attempts.
|
| 57 |
+
Returns: (is_safe: bool, reason: str)
|
| 58 |
+
"""
|
| 59 |
+
if not text:
|
| 60 |
+
return True, ""
|
| 61 |
+
|
| 62 |
+
lower_text = text.lower()
|
| 63 |
+
for keyword in PIIScrubber.INJECTION_KEYWORDS:
|
| 64 |
+
if keyword in lower_text:
|
| 65 |
+
return False, f"Malicious keyword detected: '{keyword}'"
|
| 66 |
+
|
| 67 |
+
return True, ""
|
backend/src/services/tools/cms_agent.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json
|
| 3 |
+
from langchain.agents import create_agent
|
| 4 |
+
from backend.src.services.llm.factory import get_llm_model
|
| 5 |
+
from backend.src.services.tools.cms_tool import CMSQueryTool
|
| 6 |
+
from typing import Optional, Dict
|
| 7 |
+
|
| 8 |
+
# --- THE CMS EXPERT PROMPT (ANTI-YAP VERSION 🤐) ---
|
| 9 |
+
CMS_SYSTEM_PROMPT = """You are a Sanity GROQ Query Generator.
|
| 10 |
+
Your goal is to query the database based on the user's request.
|
| 11 |
+
|
| 12 |
+
--- KNOWLEDGE BASE (SCHEMA) ---
|
| 13 |
+
{schema_map}
|
| 14 |
+
|
| 15 |
+
--- RULES (READ CAREFULLY) ---
|
| 16 |
+
1. **NO EXPLANATIONS:** Do NOT say "Here is the query" or "I will search for...".
|
| 17 |
+
2. **JUST THE QUERY:** Directly call the 'cms_query_tool' with the GROQ string.
|
| 18 |
+
3. **USE THE SCHEMA:** Look at the schema map above. If `price` is inside `variants`, use `variants[].price`.
|
| 19 |
+
4. **SYNTAX:** `*[_type == "product" && title match "Blue*"]`
|
| 20 |
+
|
| 21 |
+
--- ERROR HANDLING ---
|
| 22 |
+
If the query fails or returns empty, just say: "No products found matching your criteria."
|
| 23 |
+
Do NOT make up fake products from Amazon or other websites.
|
| 24 |
+
|
| 25 |
+
User Input: {input}
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
# --- AGENT ADAPTER ---
|
| 29 |
+
class AgentAdapter:
|
| 30 |
+
def __init__(self, agent):
|
| 31 |
+
self.agent = agent
|
| 32 |
+
|
| 33 |
+
async def ainvoke(self, input_dict):
|
| 34 |
+
# Hum input ko thoda modify karke bhejenge taake AI focus kare
|
| 35 |
+
user_text = input_dict.get("input", "")
|
| 36 |
+
# Force instruction appended to user query
|
| 37 |
+
strict_input = f"{user_text} (Return ONLY the GROQ query tool call. Do not explain.)"
|
| 38 |
+
|
| 39 |
+
payload = {"messages": [("user", strict_input)]}
|
| 40 |
+
result = await self.agent.ainvoke(payload)
|
| 41 |
+
last_message = result["messages"][-1]
|
| 42 |
+
return {"output": last_message.content}
|
| 43 |
+
|
| 44 |
+
# --- DYNAMIC AGENT FACTORY ---
|
| 45 |
+
def get_cms_agent(
|
| 46 |
+
user_id: str,
|
| 47 |
+
schema_map: dict,
|
| 48 |
+
llm_credentials: Optional[Dict[str, str]] = None
|
| 49 |
+
):
|
| 50 |
+
# 1. Load User's LLM
|
| 51 |
+
llm = get_llm_model(credentials=llm_credentials)
|
| 52 |
+
|
| 53 |
+
# 2. Initialize Tool
|
| 54 |
+
tool = CMSQueryTool(user_id=str(user_id))
|
| 55 |
+
tools = [tool]
|
| 56 |
+
|
| 57 |
+
# Convert schema to string
|
| 58 |
+
schema_str = json.dumps(schema_map, indent=2)
|
| 59 |
+
|
| 60 |
+
# 3. Create Agent
|
| 61 |
+
agent_runnable = create_agent(
|
| 62 |
+
model=llm,
|
| 63 |
+
tools=tools,
|
| 64 |
+
system_prompt=CMS_SYSTEM_PROMPT.format(schema_map=schema_str, input="{input}")
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
return AgentAdapter(agent_runnable)
|
backend/src/services/tools/cms_tool.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json
|
| 3 |
+
import ast
|
| 4 |
+
from typing import Type
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
from langchain_core.tools import BaseTool
|
| 7 |
+
from sqlalchemy.future import select
|
| 8 |
+
|
| 9 |
+
# Imports for DB access & Connector
|
| 10 |
+
from backend.src.db.session import AsyncSessionLocal
|
| 11 |
+
from backend.src.models.integration import UserIntegration
|
| 12 |
+
# Ab hum Mock nahi, Real use karenge
|
| 13 |
+
from backend.src.services.connectors.sanity_connector import SanityConnector
|
| 14 |
+
|
| 15 |
+
class CMSQueryInput(BaseModel):
|
| 16 |
+
query: str = Field(..., description="The query string (GROQ/GraphQL) to execute.")
|
| 17 |
+
|
| 18 |
+
class CMSQueryTool(BaseTool):
|
| 19 |
+
name: str = "cms_query_tool"
|
| 20 |
+
description: str = """
|
| 21 |
+
Use this tool to fetch products, offers, or content from the CMS.
|
| 22 |
+
Input should be a specific query string (e.g., GROQ for Sanity).
|
| 23 |
+
"""
|
| 24 |
+
args_schema: Type[BaseModel] = CMSQueryInput
|
| 25 |
+
user_id: str
|
| 26 |
+
|
| 27 |
+
def _run(self, query: str) -> str:
|
| 28 |
+
raise NotImplementedError("Use _arun for async execution")
|
| 29 |
+
|
| 30 |
+
async def _arun(self, query: str) -> str:
|
| 31 |
+
print(f"🛒 [CMS Tool] Processing Query: {query}")
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
async with AsyncSessionLocal() as db:
|
| 35 |
+
# 1. Fetch Integration
|
| 36 |
+
stmt = select(UserIntegration).where(
|
| 37 |
+
UserIntegration.user_id == self.user_id,
|
| 38 |
+
UserIntegration.provider == 'sanity', # Specifically find Sanity
|
| 39 |
+
UserIntegration.is_active == True
|
| 40 |
+
)
|
| 41 |
+
result = await db.execute(stmt)
|
| 42 |
+
integration = result.scalars().first()
|
| 43 |
+
|
| 44 |
+
if not integration:
|
| 45 |
+
return "Error: No active Sanity integration found. Please connect first."
|
| 46 |
+
|
| 47 |
+
# 2. Decrypt & Parse Credentials
|
| 48 |
+
creds_dict = {}
|
| 49 |
+
try:
|
| 50 |
+
creds_str = integration.credentials
|
| 51 |
+
creds_dict = json.loads(creds_str)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"❌ [CMS Tool] Credential parsing failed: {e}")
|
| 54 |
+
return "Error: Invalid Sanity credentials format in database."
|
| 55 |
+
|
| 56 |
+
# 3. Connect & Execute (FIX IS HERE)
|
| 57 |
+
# Pass the credentials to the connector
|
| 58 |
+
connector = SanityConnector(credentials=creds_dict)
|
| 59 |
+
|
| 60 |
+
if not connector.connect():
|
| 61 |
+
return "Error: Could not connect to Sanity. Please check your credentials."
|
| 62 |
+
|
| 63 |
+
data = connector.execute_query(query)
|
| 64 |
+
|
| 65 |
+
if not data:
|
| 66 |
+
return "No data found matching your query."
|
| 67 |
+
|
| 68 |
+
return json.dumps(data, indent=2)
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"❌ [CMS Tool] CRITICAL ERROR: {e}")
|
| 72 |
+
import traceback
|
| 73 |
+
traceback.print_exc()
|
| 74 |
+
return f"Error executing CMS query: {str(e)}"
|
backend/src/services/tools/nosql_agent.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from langchain.agents import create_agent
|
| 3 |
+
from backend.src.services.llm.factory import get_llm_model
|
| 4 |
+
from backend.src.services.tools.nosql_tool import NoSQLQueryTool
|
| 5 |
+
from typing import Optional, Dict
|
| 6 |
+
|
| 7 |
+
# --- THE CONSTITUTION (Same as before) ---
|
| 8 |
+
NOSQL_SYSTEM_PROMPT = """You are a User Data Assistant with access to a NoSQL Database.
|
| 9 |
+
Your job is to retrieve user profile details and activity logs using the 'nosql_database_tool'.
|
| 10 |
+
|
| 11 |
+
--- CRITICAL RULES FOR QUERYING ---
|
| 12 |
+
1. **DO NOT** include 'user_id' or '_id' in the 'query_json'.
|
| 13 |
+
- The tool AUTOMATICALLY applies the security filter for the current user.
|
| 14 |
+
- If you want to fetch the user's profile, just send an empty query: "{{}}"
|
| 15 |
+
|
| 16 |
+
2. **DO NOT** try to select specific fields in the query_json.
|
| 17 |
+
- Incorrect: {{"fields": ["email"]}}
|
| 18 |
+
- Correct: {{}} (Fetch the whole document, then you extract the email).
|
| 19 |
+
|
| 20 |
+
3. You are acting on behalf of User ID: {user_id}.
|
| 21 |
+
|
| 22 |
+
--- AVAILABLE COLLECTIONS ---
|
| 23 |
+
1. 'users': Contains profile info (name, email, membership_tier).
|
| 24 |
+
2. 'activity_logs': Contains login history and actions.
|
| 25 |
+
|
| 26 |
+
--- EXAMPLES ---
|
| 27 |
+
- User: "Show my profile" -> Tool Input: collection='users', query_json='{{}}'
|
| 28 |
+
- User: "Show my login history" -> Tool Input: collection='activity_logs', query_json='{{"action": "login"}}'
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
class AgentAdapter:
|
| 32 |
+
"""Wrapper for V1 Agent compatibility"""
|
| 33 |
+
def __init__(self, agent):
|
| 34 |
+
self.agent = agent
|
| 35 |
+
|
| 36 |
+
async def ainvoke(self, input_dict):
|
| 37 |
+
user_text = input_dict.get("input", "")
|
| 38 |
+
payload = {"messages": [("user", user_text)]}
|
| 39 |
+
result = await self.agent.ainvoke(payload)
|
| 40 |
+
last_message = result["messages"][-1]
|
| 41 |
+
return {"output": last_message.content}
|
| 42 |
+
|
| 43 |
+
# --- DYNAMIC AGENT FACTORY (UPDATED) ---
|
| 44 |
+
def get_nosql_agent(
|
| 45 |
+
user_id: str,
|
| 46 |
+
llm_credentials: Optional[Dict[str, str]] = None # <--- Added this
|
| 47 |
+
):
|
| 48 |
+
"""
|
| 49 |
+
Creates a NoSQL Agent using the user's specific LLM credentials.
|
| 50 |
+
"""
|
| 51 |
+
# 1. Load User's LLM
|
| 52 |
+
llm = get_llm_model(credentials=llm_credentials)
|
| 53 |
+
|
| 54 |
+
# 2. Initialize the tool
|
| 55 |
+
tool = NoSQLQueryTool(user_id=str(user_id))
|
| 56 |
+
tools = [tool]
|
| 57 |
+
|
| 58 |
+
# 3. Create Agent
|
| 59 |
+
agent_runnable = create_agent(
|
| 60 |
+
model=llm,
|
| 61 |
+
tools=tools,
|
| 62 |
+
system_prompt=NOSQL_SYSTEM_PROMPT.format(user_id=user_id)
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return AgentAdapter(agent_runnable)
|
backend/src/services/tools/nosql_tool.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json
|
| 3 |
+
import asyncio
|
| 4 |
+
from typing import Type
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
from langchain_core.tools import BaseTool
|
| 7 |
+
from backend.src.services.connectors.mongo_connector import MongoConnector
|
| 8 |
+
from typing import Dict, Optional
|
| 9 |
+
|
| 10 |
+
# --- NoSQLQueryInput Schema (Same as before) ---
|
| 11 |
+
class NoSQLQueryInput(BaseModel):
|
| 12 |
+
collection: str = Field(..., description="The name of the collection to query (e.g., 'users', 'activity_logs').")
|
| 13 |
+
query_json: str = Field(..., description="A valid JSON string representing the query filter.")
|
| 14 |
+
|
| 15 |
+
class NoSQLQueryTool(BaseTool):
|
| 16 |
+
name: str = "nosql_database_tool"
|
| 17 |
+
description: str = """
|
| 18 |
+
Use this tool to query the NoSQL User Database.
|
| 19 |
+
Useful for retrieving User Profiles and Activity Logs.
|
| 20 |
+
"""
|
| 21 |
+
args_schema: Type[BaseModel] = NoSQLQueryInput
|
| 22 |
+
|
| 23 |
+
# --- DYNAMIC INJECTION ---
|
| 24 |
+
user_id: str
|
| 25 |
+
db_credentials: Dict[str, str] # User's Mongo URL will come here
|
| 26 |
+
|
| 27 |
+
def _run(self, collection: str, query_json: str) -> str:
|
| 28 |
+
# 1. Initialize connector WITH User Credentials
|
| 29 |
+
# Note: Future-proofing to select connector based on provider
|
| 30 |
+
connector = MongoConnector(credentials=self.db_credentials)
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# 2. Parse Query
|
| 34 |
+
query_dict = json.loads(query_json.replace("'", '"'))
|
| 35 |
+
|
| 36 |
+
# 3. Security Checks (Injection & RBAC)
|
| 37 |
+
query_str = str(query_dict)
|
| 38 |
+
if "$where" in query_str or "$function" in query_str:
|
| 39 |
+
return "⛔ SECURITY ALERT: Malicious operators detected."
|
| 40 |
+
|
| 41 |
+
# Force user_id filter
|
| 42 |
+
query_dict['user_id'] = self.user_id
|
| 43 |
+
|
| 44 |
+
print(f"🔎 [NoSQL Tool] Executing Query on '{collection}': {query_dict}")
|
| 45 |
+
|
| 46 |
+
# 4. Execute
|
| 47 |
+
results = connector.find_many(collection, query_dict, limit=5)
|
| 48 |
+
|
| 49 |
+
if not results:
|
| 50 |
+
return "No records found matching your request."
|
| 51 |
+
|
| 52 |
+
return f"Found {len(results)} records:\n{json.dumps(results, indent=2, default=str)}"
|
| 53 |
+
|
| 54 |
+
except json.JSONDecodeError:
|
| 55 |
+
return "❌ Error: Invalid JSON query format."
|
| 56 |
+
except Exception as e:
|
| 57 |
+
return f"❌ System Error: {str(e)}"
|
| 58 |
+
|
| 59 |
+
async def _arun(self, collection: str, query_json: str):
|
| 60 |
+
"""Async wrapper for the tool."""
|
| 61 |
+
return await asyncio.to_thread(self._run, collection, query_json)
|
backend/src/services/tools/secure_agent.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from langchain.agents import create_agent
|
| 3 |
+
from backend.src.services.llm.factory import get_llm_model
|
| 4 |
+
from backend.src.services.tools.sql_tool import get_sql_toolkit # Updated Import
|
| 5 |
+
from typing import Optional, Dict
|
| 6 |
+
|
| 7 |
+
# --- PROMPTS (Same as before) ---
|
| 8 |
+
ADMIN_PREFIX = "You are a PostgreSQL expert... full access..."
|
| 9 |
+
CUSTOMER_PREFIX = """You are a SQL helper for User ID: {user_id}.
|
| 10 |
+
CRITICAL: For every query, you MUST add a "WHERE user_id = {user_id}" clause.
|
| 11 |
+
Never show data of other users.
|
| 12 |
+
Always present data in a clean MARKDOWN TABLE.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
# --- AGENT ADAPTER (Same as before) ---
|
| 16 |
+
class AgentAdapter:
|
| 17 |
+
def __init__(self, agent):
|
| 18 |
+
self.agent = agent
|
| 19 |
+
|
| 20 |
+
async def ainvoke(self, input_dict):
|
| 21 |
+
user_text = input_dict.get("input", "")
|
| 22 |
+
payload = {"messages": [("user", user_text)]}
|
| 23 |
+
result = await self.agent.ainvoke(payload)
|
| 24 |
+
last_message = result["messages"][-1]
|
| 25 |
+
return {"output": last_message.content}
|
| 26 |
+
|
| 27 |
+
# --- DYNAMIC AGENT FACTORY ---
|
| 28 |
+
def get_secure_agent(
|
| 29 |
+
user_id: int,
|
| 30 |
+
role: str,
|
| 31 |
+
db_credentials: Dict[str, str],
|
| 32 |
+
llm_credentials: Optional[Dict[str, str]] = None
|
| 33 |
+
):
|
| 34 |
+
"""
|
| 35 |
+
Creates a Secure SQL Agent using the specific user's databases and LLM.
|
| 36 |
+
"""
|
| 37 |
+
# 1. Load User's LLM (via factory)
|
| 38 |
+
llm = get_llm_model(credentials=llm_credentials)
|
| 39 |
+
|
| 40 |
+
# 2. Get User-specific SQL Toolkit
|
| 41 |
+
toolkit = get_sql_toolkit(db_credentials, llm_credentials)
|
| 42 |
+
tools = toolkit.get_tools() # Toolkit se tools nikalo
|
| 43 |
+
|
| 44 |
+
# 3. Select the right security prompt
|
| 45 |
+
if role == "admin":
|
| 46 |
+
system_prefix = ADMIN_PREFIX
|
| 47 |
+
else:
|
| 48 |
+
system_prefix = CUSTOMER_PREFIX.format(user_id=user_id)
|
| 49 |
+
|
| 50 |
+
# 4. Create the Agent (New V1 'create_agent' syntax)
|
| 51 |
+
agent_runnable = create_agent(
|
| 52 |
+
model=llm,
|
| 53 |
+
tools=tools,
|
| 54 |
+
system_prompt=system_prefix
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
return AgentAdapter(agent_runnable)
|
backend/src/services/tools/sql_tool.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from langchain_community.utilities import SQLDatabase
|
| 3 |
+
from langchain_community.agent_toolkits import SQLDatabaseToolkit
|
| 4 |
+
from backend.src.services.llm.factory import get_llm_model
|
| 5 |
+
from typing import Optional, Dict
|
| 6 |
+
|
| 7 |
+
# --- DYNAMIC FUNCTIONS ---
|
| 8 |
+
|
| 9 |
+
def get_database_connection(db_credentials: Dict[str, str]) -> SQLDatabase:
|
| 10 |
+
"""
|
| 11 |
+
User ki di hui connection string se connect karta hai.
|
| 12 |
+
"""
|
| 13 |
+
db_uri = db_credentials.get("url")
|
| 14 |
+
if not db_uri:
|
| 15 |
+
raise ValueError("SQL Database URL not found in user's settings.")
|
| 16 |
+
|
| 17 |
+
# --- FIX for SQLAlchemy Async Driver ---
|
| 18 |
+
# Ensure the URL is compatible with the synchronous SQLDatabase object
|
| 19 |
+
if "+asyncpg" in db_uri:
|
| 20 |
+
db_uri = db_uri.replace("+asyncpg", "") # Sync object needs sync driver
|
| 21 |
+
|
| 22 |
+
print(f"INFO: [SQL Tool] Connecting to user's SQL DB: {db_uri[:30]}...")
|
| 23 |
+
|
| 24 |
+
db = SQLDatabase.from_uri(
|
| 25 |
+
db_uri,
|
| 26 |
+
sample_rows_in_table_info=2 # 2 samples kafi hain
|
| 27 |
+
)
|
| 28 |
+
return db
|
| 29 |
+
|
| 30 |
+
def get_sql_toolkit(
|
| 31 |
+
db_credentials: Dict[str, str],
|
| 32 |
+
llm_credentials: Optional[Dict[str, str]] = None
|
| 33 |
+
) -> SQLDatabaseToolkit:
|
| 34 |
+
"""
|
| 35 |
+
User ke DB aur User ke LLM se Toolkit banata hai.
|
| 36 |
+
"""
|
| 37 |
+
# 1. Connect to User's DB
|
| 38 |
+
db = get_database_connection(db_credentials)
|
| 39 |
+
|
| 40 |
+
# 2. Load User's LLM
|
| 41 |
+
llm = get_llm_model(credentials=llm_credentials)
|
| 42 |
+
|
| 43 |
+
# 3. Create Toolkit
|
| 44 |
+
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
|
| 45 |
+
return toolkit
|
backend/src/services/vector_store/qdrant_adapter.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import qdrant_client
|
| 3 |
+
from qdrant_client import QdrantClient
|
| 4 |
+
from qdrant_client.http import models
|
| 5 |
+
from langchain_qdrant import QdrantVectorStore
|
| 6 |
+
from backend.src.core.config import settings
|
| 7 |
+
from backend.src.services.embeddings.factory import get_embedding_model
|
| 8 |
+
from typing import Optional, Dict
|
| 9 |
+
|
| 10 |
+
# @lru_cache() HATA DIYA - We can't cache user-specific connections
|
| 11 |
+
def get_vector_store(credentials: Optional[Dict[str, str]] = None):
|
| 12 |
+
"""
|
| 13 |
+
Dynamic Vector Store Connector.
|
| 14 |
+
1. Agar 'credentials' hain, to unhein use karega (User's Cloud Qdrant).
|
| 15 |
+
2. Agar nahi, to global settings use karega (Fallback/Admin).
|
| 16 |
+
"""
|
| 17 |
+
embedding_model = get_embedding_model() # Ye local hai, isko keys nahi chahiye
|
| 18 |
+
|
| 19 |
+
# --- DYNAMIC CONFIGURATION LOGIC ---
|
| 20 |
+
if credentials:
|
| 21 |
+
# User-specific Cloud settings
|
| 22 |
+
qdrant_url = credentials.get("url")
|
| 23 |
+
qdrant_api_key = credentials.get("api_key")
|
| 24 |
+
collection_name = credentials.get("collection_name", "user_default_collection")
|
| 25 |
+
else:
|
| 26 |
+
# Global fallback settings
|
| 27 |
+
qdrant_url = settings.QDRANT_URL
|
| 28 |
+
qdrant_api_key = settings.QDRANT_API_KEY
|
| 29 |
+
collection_name = settings.QDRANT_COLLECTION_NAME
|
| 30 |
+
|
| 31 |
+
if not qdrant_url:
|
| 32 |
+
raise ValueError("Qdrant URL is not configured for this user or globally.")
|
| 33 |
+
|
| 34 |
+
print(f"INFO: [VectorDB] Connecting to Qdrant at '{qdrant_url}'...")
|
| 35 |
+
|
| 36 |
+
# 1. Qdrant Client banayen (User ki keys ke sath)
|
| 37 |
+
client = QdrantClient(
|
| 38 |
+
url=qdrant_url,
|
| 39 |
+
api_key=qdrant_api_key,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# 2. CHECK: Kya Collection exist karti hai?
|
| 43 |
+
# Hum 'try-except' use karenge taake connection errors bhi pakde jayen
|
| 44 |
+
try:
|
| 45 |
+
# collection_exists is deprecated, use get_collection instead
|
| 46 |
+
client.get_collection(collection_name=collection_name)
|
| 47 |
+
print(f"INFO: [VectorDB] Collection '{collection_name}' already exists.")
|
| 48 |
+
except Exception as e:
|
| 49 |
+
# Agar error "Not found" hai, to collection banayenge
|
| 50 |
+
if "404" in str(e) or "Not found" in str(e):
|
| 51 |
+
print(f"INFO: Collection '{collection_name}' not found. Creating it now...")
|
| 52 |
+
|
| 53 |
+
# Embedding size pata karna
|
| 54 |
+
dummy_embedding = embedding_model.embed_query("test")
|
| 55 |
+
vector_size = len(dummy_embedding)
|
| 56 |
+
|
| 57 |
+
client.create_collection(
|
| 58 |
+
collection_name=collection_name,
|
| 59 |
+
vectors_config=models.VectorParams(
|
| 60 |
+
size=vector_size,
|
| 61 |
+
distance=models.Distance.COSINE
|
| 62 |
+
)
|
| 63 |
+
)
|
| 64 |
+
print(f"SUCCESS: Created collection '{collection_name}' with vector size {vector_size}.")
|
| 65 |
+
else:
|
| 66 |
+
# Koi aur error (e.g., connection refused)
|
| 67 |
+
raise ConnectionError(f"Failed to connect or access Qdrant: {e}")
|
| 68 |
+
|
| 69 |
+
# 3. Vector Store object bana kar return karein
|
| 70 |
+
vector_store = QdrantVectorStore(
|
| 71 |
+
client=client,
|
| 72 |
+
collection_name=collection_name,
|
| 73 |
+
embedding=embedding_model,
|
| 74 |
+
content_payload_key="page_content",
|
| 75 |
+
metadata_payload_key="metadata"
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return vector_store
|
backend/src/utils/auth.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from passlib.context import CryptContext
|
| 2 |
+
from datetime import datetime, timedelta
|
| 3 |
+
from jose import jwt
|
| 4 |
+
from backend.src.core.config import settings
|
| 5 |
+
|
| 6 |
+
# Password Hasher (Bcrypt)
|
| 7 |
+
pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
|
| 8 |
+
|
| 9 |
+
# JWT Configuration
|
| 10 |
+
ALGORITHM = "HS256"
|
| 11 |
+
ACCESS_TOKEN_EXPIRE_MINUTES = 30
|
| 12 |
+
|
| 13 |
+
def verify_password(plain_password, hashed_password):
|
| 14 |
+
"""Check karein ke user ka password sahi hai ya nahi"""
|
| 15 |
+
return pwd_context.verify(plain_password, hashed_password)
|
| 16 |
+
|
| 17 |
+
def get_password_hash(password):
|
| 18 |
+
"""Password ko encrypt karein taake DB mein plain text save na ho"""
|
| 19 |
+
return pwd_context.hash(password)
|
| 20 |
+
|
| 21 |
+
def create_access_token(data: dict):
|
| 22 |
+
"""User ke liye Login Token (Badge) banayein"""
|
| 23 |
+
to_encode = data.copy()
|
| 24 |
+
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
| 25 |
+
to_encode.update({"exp": expire})
|
| 26 |
+
|
| 27 |
+
# Secret Key config se lenge (Ensure karein ke config mein ho)
|
| 28 |
+
secret_key = settings.SECRET_KEY
|
| 29 |
+
encoded_jwt = jwt.encode(to_encode, secret_key, algorithm=ALGORITHM)
|
| 30 |
+
return encoded_jwt
|
backend/src/utils/security.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from cryptography.fernet import Fernet
|
| 2 |
+
import base64
|
| 3 |
+
|
| 4 |
+
# --- FIX: A Valid, Consistent 32-byte Base64 Key ---
|
| 5 |
+
# Ye key change nahi hogi, to decryption hamesha chalega.
|
| 6 |
+
DEFAULT_KEY = b'8_sW7x9y2z4A5b6C8d9E0f1G2h3I4j5K6l7M8n9O0pQ='
|
| 7 |
+
|
| 8 |
+
class SecurityUtils:
|
| 9 |
+
@staticmethod
|
| 10 |
+
def get_cipher():
|
| 11 |
+
# Production mein ye .env se aana chahiye
|
| 12 |
+
# Development ke liye hum hardcoded valid key use kar rahe hain
|
| 13 |
+
return Fernet(DEFAULT_KEY)
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def encrypt(data: str) -> str:
|
| 17 |
+
if not data: return ""
|
| 18 |
+
cipher = SecurityUtils.get_cipher()
|
| 19 |
+
return cipher.encrypt(data.encode()).decode()
|
| 20 |
+
|
| 21 |
+
@staticmethod
|
| 22 |
+
def decrypt(token: str) -> str:
|
| 23 |
+
if not token: return ""
|
| 24 |
+
cipher = SecurityUtils.get_cipher()
|
| 25 |
+
try:
|
| 26 |
+
return cipher.decrypt(token.encode()).decode()
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"🔐 Decryption Failed: {e}")
|
| 29 |
+
raise ValueError("Invalid Key or Corrupted Data")
|
dummy_cms_data.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"_id": "p1",
|
| 4 |
+
"_type": "product",
|
| 5 |
+
"title": "Classic Blue Denim Jacket",
|
| 6 |
+
"price": 59.99,
|
| 7 |
+
"inStock": true,
|
| 8 |
+
"colors": ["Blue", "Black"],
|
| 9 |
+
"description": "A timeless classic denim jacket for all seasons."
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"_id": "p2",
|
| 13 |
+
"_type": "product",
|
| 14 |
+
"title": "Urban Running Shoes",
|
| 15 |
+
"price": 89.50,
|
| 16 |
+
"inStock": true,
|
| 17 |
+
"colors": ["White", "Grey"],
|
| 18 |
+
"description": "High performance running shoes with foam technology."
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"_id": "p3",
|
| 22 |
+
"_type": "product",
|
| 23 |
+
"title": "Graphic Tee - Retro",
|
| 24 |
+
"price": 25.00,
|
| 25 |
+
"inStock": false,
|
| 26 |
+
"colors": ["Red"],
|
| 27 |
+
"description": "100% Cotton tee with retro print."
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"_id": "offer1",
|
| 31 |
+
"_type": "offer",
|
| 32 |
+
"code": "SUMMER20",
|
| 33 |
+
"discount_percentage": 20,
|
| 34 |
+
"active": true,
|
| 35 |
+
"description": "Get 20% off on all summer wear."
|
| 36 |
+
}
|
| 37 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.12.0
|
| 2 |
+
aiofiles==25.1.0
|
| 3 |
+
aiohappyeyeballs==2.6.1
|
| 4 |
+
aiohttp==3.13.2
|
| 5 |
+
aiosignal==1.4.0
|
| 6 |
+
aiosqlite==0.21.0
|
| 7 |
+
annotated-doc==0.0.4
|
| 8 |
+
annotated-types==0.7.0
|
| 9 |
+
antlr4-python3-runtime==4.9.3
|
| 10 |
+
anyio==4.12.0
|
| 11 |
+
argon2-cffi==25.1.0
|
| 12 |
+
argon2-cffi-bindings==25.1.0
|
| 13 |
+
asyncpg==0.31.0
|
| 14 |
+
attrs==25.4.0
|
| 15 |
+
backoff==2.2.1
|
| 16 |
+
bcrypt==5.0.0
|
| 17 |
+
beautifulsoup4==4.14.3
|
| 18 |
+
cachetools==6.2.2
|
| 19 |
+
certifi==2025.11.12
|
| 20 |
+
cffi==2.0.0
|
| 21 |
+
charset-normalizer==3.4.4
|
| 22 |
+
click==8.3.1
|
| 23 |
+
colorama==0.4.6
|
| 24 |
+
coloredlogs==15.0.1
|
| 25 |
+
contourpy==1.3.3
|
| 26 |
+
cryptography==46.0.3
|
| 27 |
+
cycler==0.12.1
|
| 28 |
+
dataclasses-json==0.6.7
|
| 29 |
+
Deprecated==1.3.1
|
| 30 |
+
distro==1.9.0
|
| 31 |
+
dnspython==2.8.0
|
| 32 |
+
docx2txt==0.9
|
| 33 |
+
ecdsa==0.19.1
|
| 34 |
+
effdet==0.4.1
|
| 35 |
+
email-validator==2.3.0
|
| 36 |
+
emoji==2.15.0
|
| 37 |
+
et_xmlfile==2.0.0
|
| 38 |
+
fastapi==0.124.0
|
| 39 |
+
filelock==3.20.0
|
| 40 |
+
filetype==1.2.0
|
| 41 |
+
flatbuffers==25.9.23
|
| 42 |
+
fonttools==4.61.0
|
| 43 |
+
frozenlist==1.8.0
|
| 44 |
+
fsspec==2025.12.0
|
| 45 |
+
google-api-core==2.28.1
|
| 46 |
+
google-auth==2.43.0
|
| 47 |
+
google-cloud-vision==3.11.0
|
| 48 |
+
google-genai==1.54.0
|
| 49 |
+
googleapis-common-protos==1.72.0
|
| 50 |
+
greenlet==3.3.0
|
| 51 |
+
groq==0.37.1
|
| 52 |
+
grpcio==1.76.0
|
| 53 |
+
grpcio-status==1.76.0
|
| 54 |
+
h11==0.16.0
|
| 55 |
+
h2==4.3.0
|
| 56 |
+
hpack==4.1.0
|
| 57 |
+
html5lib==1.1
|
| 58 |
+
httpcore==1.0.9
|
| 59 |
+
httptools==0.7.1
|
| 60 |
+
httpx==0.28.1
|
| 61 |
+
httpx-sse==0.4.3
|
| 62 |
+
huggingface-hub==0.36.0
|
| 63 |
+
humanfriendly==10.0
|
| 64 |
+
hyperframe==6.1.0
|
| 65 |
+
idna==3.11
|
| 66 |
+
Jinja2==3.1.6
|
| 67 |
+
jiter==0.12.0
|
| 68 |
+
joblib==1.5.2
|
| 69 |
+
jsonpatch==1.33
|
| 70 |
+
jsonpointer==3.0.0
|
| 71 |
+
kiwisolver==1.4.9
|
| 72 |
+
langchain==1.1.3
|
| 73 |
+
langchain-classic==1.0.0
|
| 74 |
+
langchain-community==0.4.1
|
| 75 |
+
langchain-core==1.1.3
|
| 76 |
+
langchain-google-genai==4.0.0
|
| 77 |
+
langchain-groq==1.1.0
|
| 78 |
+
langchain-huggingface==1.1.0
|
| 79 |
+
langchain-openai==1.1.1
|
| 80 |
+
langchain-qdrant==1.1.0
|
| 81 |
+
langchain-text-splitters==1.0.0
|
| 82 |
+
langdetect==1.0.9
|
| 83 |
+
langgraph==1.0.4
|
| 84 |
+
langgraph-checkpoint==3.0.1
|
| 85 |
+
langgraph-prebuilt==1.0.5
|
| 86 |
+
langgraph-sdk==0.2.15
|
| 87 |
+
langsmith==0.4.57
|
| 88 |
+
lxml==6.0.2
|
| 89 |
+
Markdown==3.10
|
| 90 |
+
MarkupSafe==3.0.3
|
| 91 |
+
marshmallow==3.26.1
|
| 92 |
+
matplotlib==3.10.7
|
| 93 |
+
ml_dtypes==0.5.4
|
| 94 |
+
mpmath==1.3.0
|
| 95 |
+
msoffcrypto-tool==5.4.2
|
| 96 |
+
multidict==6.7.0
|
| 97 |
+
mypy_extensions==1.1.0
|
| 98 |
+
networkx==3.6.1
|
| 99 |
+
nltk==3.9.2
|
| 100 |
+
numpy==2.2.6
|
| 101 |
+
olefile==0.47
|
| 102 |
+
omegaconf==2.3.0
|
| 103 |
+
onnx==1.20.0
|
| 104 |
+
onnxruntime==1.23.2
|
| 105 |
+
openai==2.9.0
|
| 106 |
+
opencv-python==4.12.0.88
|
| 107 |
+
openpyxl==3.1.5
|
| 108 |
+
orjson==3.11.5
|
| 109 |
+
ormsgpack==1.12.0
|
| 110 |
+
packaging==25.0
|
| 111 |
+
pandas==2.3.3
|
| 112 |
+
passlib==1.7.4
|
| 113 |
+
pdf2image==1.17.0
|
| 114 |
+
pdfminer.six==20251107
|
| 115 |
+
pi_heif==1.1.1
|
| 116 |
+
pikepdf==10.0.2
|
| 117 |
+
pillow==12.0.0
|
| 118 |
+
portalocker==3.2.0
|
| 119 |
+
propcache==0.4.1
|
| 120 |
+
proto-plus==1.26.1
|
| 121 |
+
protobuf==6.33.2
|
| 122 |
+
psutil==7.1.3
|
| 123 |
+
psycopg2-binary==2.9.11
|
| 124 |
+
pyasn1==0.6.1
|
| 125 |
+
pyasn1_modules==0.4.2
|
| 126 |
+
pycocotools==2.0.10
|
| 127 |
+
pycparser==2.23
|
| 128 |
+
pydantic==2.12.5
|
| 129 |
+
pydantic-settings==2.12.0
|
| 130 |
+
pydantic_core==2.41.5
|
| 131 |
+
pymongo==4.15.5
|
| 132 |
+
pypandoc==1.16.2
|
| 133 |
+
pyparsing==3.2.5
|
| 134 |
+
pypdf==6.4.1
|
| 135 |
+
pypdfium2==5.1.0
|
| 136 |
+
pyreadline3==3.5.4
|
| 137 |
+
python-dateutil==2.9.0.post0
|
| 138 |
+
python-docx==1.2.0
|
| 139 |
+
python-dotenv==1.2.1
|
| 140 |
+
python-iso639==2025.11.16
|
| 141 |
+
python-jose==3.5.0
|
| 142 |
+
python-magic==0.4.27
|
| 143 |
+
python-multipart==0.0.20
|
| 144 |
+
python-oxmsg==0.0.2
|
| 145 |
+
python-pptx==1.0.2
|
| 146 |
+
pytz==2025.2
|
| 147 |
+
pywin32==311
|
| 148 |
+
PyYAML==6.0.3
|
| 149 |
+
qdrant-client==1.16.1
|
| 150 |
+
RapidFuzz==3.14.3
|
| 151 |
+
regex==2025.11.3
|
| 152 |
+
requests==2.32.5
|
| 153 |
+
requests-toolbelt==1.0.0
|
| 154 |
+
rsa==4.9.1
|
| 155 |
+
safetensors==0.7.0
|
| 156 |
+
scikit-learn==1.7.2
|
| 157 |
+
scipy==1.16.3
|
| 158 |
+
sentence-transformers==5.1.2
|
| 159 |
+
setuptools==80.9.0
|
| 160 |
+
six==1.17.0
|
| 161 |
+
sniffio==1.3.1
|
| 162 |
+
soupsieve==2.8
|
| 163 |
+
SQLAlchemy==2.0.45
|
| 164 |
+
starlette==0.50.0
|
| 165 |
+
sympy==1.14.0
|
| 166 |
+
tenacity==9.1.2
|
| 167 |
+
threadpoolctl==3.6.0
|
| 168 |
+
tiktoken==0.12.0
|
| 169 |
+
timm==1.0.22
|
| 170 |
+
tokenizers==0.22.1
|
| 171 |
+
torch==2.9.1
|
| 172 |
+
torchvision==0.24.1
|
| 173 |
+
tqdm==4.67.1
|
| 174 |
+
transformers==4.57.3
|
| 175 |
+
typing-inspect==0.9.0
|
| 176 |
+
typing-inspection==0.4.2
|
| 177 |
+
typing_extensions==4.15.0
|
| 178 |
+
tzdata==2025.2
|
| 179 |
+
unstructured==0.18.21
|
| 180 |
+
unstructured-client==0.42.4
|
| 181 |
+
unstructured.pytesseract==0.3.15
|
| 182 |
+
unstructured_inference==1.1.2
|
| 183 |
+
urllib3==2.6.1
|
| 184 |
+
uuid_utils==0.12.0
|
| 185 |
+
uvicorn==0.38.0
|
| 186 |
+
watchfiles==1.1.1
|
| 187 |
+
webencodings==0.5.1
|
| 188 |
+
websockets==15.0.1
|
| 189 |
+
wrapt==2.0.1
|
| 190 |
+
xlrd==2.0.2
|
| 191 |
+
xlsxwriter==3.2.9
|
| 192 |
+
xxhash==3.6.0
|
| 193 |
+
yarl==1.22.0
|
| 194 |
+
zstandard==0.25.0
|
static/widget.js
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
(function() {
|
| 2 |
+
// ----------------------------------------------------
|
| 3 |
+
// 1. CONFIGURATION: Script Tag se values uthana
|
| 4 |
+
// ----------------------------------------------------
|
| 5 |
+
const scriptTag = document.currentScript;
|
| 6 |
+
|
| 7 |
+
const USER_ID = scriptTag.getAttribute("data-user-id");
|
| 8 |
+
const API_URL = scriptTag.getAttribute("data-api-url");
|
| 9 |
+
const THEME_COLOR = scriptTag.getAttribute("data-theme-color") || "#007bff";
|
| 10 |
+
|
| 11 |
+
if (!USER_ID || !API_URL) {
|
| 12 |
+
console.error("OmniAgent Widget Error: data-user-id or data-api-url is missing!");
|
| 13 |
+
return;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
// Modern way to generate unique ID (Fixing substr deprecated warning)
|
| 17 |
+
const CHAT_SESSION_ID = "omni_session_" + Math.random().toString(36).slice(2, 11);
|
| 18 |
+
|
| 19 |
+
// ----------------------------------------------------
|
| 20 |
+
// 2. STYLES: UI Design aur Position
|
| 21 |
+
// ----------------------------------------------------
|
| 22 |
+
const style = document.createElement('style');
|
| 23 |
+
style.innerHTML = `
|
| 24 |
+
#omni-widget-container {
|
| 25 |
+
position: fixed; bottom: 20px; right: 20px; z-index: 9999; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 26 |
+
transition: all 0.3s;
|
| 27 |
+
}
|
| 28 |
+
#omni-chat-btn {
|
| 29 |
+
background: ${THEME_COLOR}; color: white; border: none; padding: 15px; border-radius: 50%;
|
| 30 |
+
cursor: pointer; box-shadow: 0 4px 12px rgba(0,0,0,0.4); width: 60px; height: 60px; font-size: 24px;
|
| 31 |
+
display: flex; align-items: center; justify-content: center;
|
| 32 |
+
}
|
| 33 |
+
#omni-chat-window {
|
| 34 |
+
display: none; width: 350px; height: 500px; background: white; border-radius: 10px;
|
| 35 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.5); flex-direction: column; overflow: hidden;
|
| 36 |
+
margin-bottom: 15px; transform-origin: bottom right; animation: fadeIn 0.3s ease-out;
|
| 37 |
+
}
|
| 38 |
+
#omni-header {
|
| 39 |
+
background: ${THEME_COLOR}; color: white; padding: 15px; font-weight: 600; display: flex;
|
| 40 |
+
justify-content: space-between; align-items: center; border-radius: 10px 10px 0 0;
|
| 41 |
+
}
|
| 42 |
+
#omni-messages { flex: 1; padding: 10px; overflow-y: auto; background: #f0f0f0; }
|
| 43 |
+
#omni-input-area { display: flex; border-top: 1px solid #ddd; }
|
| 44 |
+
#omni-input { flex: 1; padding: 12px; border: none; outline: none; font-size: 14px; }
|
| 45 |
+
#omni-send { background: white; border: none; color: ${THEME_COLOR}; font-weight: bold; cursor: pointer; padding: 0 15px; font-size: 18px; }
|
| 46 |
+
.omni-msg { margin: 8px 0; padding: 10px 15px; border-radius: 15px; max-width: 80%; font-size: 14px; line-height: 1.4; }
|
| 47 |
+
.omni-msg.user { background: ${THEME_COLOR}; color: white; margin-left: auto; border-bottom-right-radius: 2px; }
|
| 48 |
+
.omni-msg.bot { background: #e8e8e8; color: #333; margin-right: auto; border-bottom-left-radius: 2px; }
|
| 49 |
+
|
| 50 |
+
@keyframes fadeIn { from { opacity: 0; transform: scale(0.9); } to { opacity: 1; transform: scale(1); } }
|
| 51 |
+
`;
|
| 52 |
+
document.head.appendChild(style);
|
| 53 |
+
|
| 54 |
+
// ----------------------------------------------------
|
| 55 |
+
// 3. HTML Structure Banao
|
| 56 |
+
// ----------------------------------------------------
|
| 57 |
+
const container = document.createElement('div');
|
| 58 |
+
container.id = 'omni-widget-container';
|
| 59 |
+
|
| 60 |
+
const chatWindow = document.createElement('div');
|
| 61 |
+
chatWindow.id = 'omni-chat-window';
|
| 62 |
+
chatWindow.innerHTML = `
|
| 63 |
+
<div id="omni-header">
|
| 64 |
+
<span>Customer Support</span>
|
| 65 |
+
<span style="cursor:pointer; font-size: 18px;" onclick="window.toggleOmniChat()">—</span>
|
| 66 |
+
</div>
|
| 67 |
+
<div id="omni-messages"></div>
|
| 68 |
+
<div id="omni-input-area">
|
| 69 |
+
<input type="text" id="omni-input" placeholder="Type your query..." />
|
| 70 |
+
<button id="omni-send">➤</button>
|
| 71 |
+
</div>
|
| 72 |
+
`;
|
| 73 |
+
|
| 74 |
+
const chatBtn = document.createElement('button');
|
| 75 |
+
chatBtn.id = 'omni-chat-btn';
|
| 76 |
+
chatBtn.innerHTML = '💬';
|
| 77 |
+
|
| 78 |
+
// onClick ko addEventListener se theek kiya
|
| 79 |
+
chatBtn.addEventListener('click', toggleOmniChat);
|
| 80 |
+
|
| 81 |
+
container.appendChild(chatWindow);
|
| 82 |
+
container.appendChild(chatBtn);
|
| 83 |
+
document.body.appendChild(container);
|
| 84 |
+
|
| 85 |
+
// ----------------------------------------------------
|
| 86 |
+
// 4. Logic Functions (Modern Event Listeners)
|
| 87 |
+
// ----------------------------------------------------
|
| 88 |
+
|
| 89 |
+
const inputField = document.getElementById('omni-input');
|
| 90 |
+
const sendButton = document.getElementById('omni-send');
|
| 91 |
+
|
| 92 |
+
window.toggleOmniChat = function() {
|
| 93 |
+
const win = document.getElementById('omni-chat-window');
|
| 94 |
+
const isVisible = win.style.display === 'flex';
|
| 95 |
+
win.style.display = isVisible ? 'none' : 'flex';
|
| 96 |
+
if (!isVisible) {
|
| 97 |
+
inputField.focus();
|
| 98 |
+
}
|
| 99 |
+
};
|
| 100 |
+
|
| 101 |
+
function addMessage(text, sender) {
|
| 102 |
+
const msgs = document.getElementById('omni-messages');
|
| 103 |
+
const div = document.createElement('div');
|
| 104 |
+
div.className = `omni-msg ${sender}`;
|
| 105 |
+
div.innerHTML = text.replace(/(https?:\/\/[^\s]+)/g, '<a href="$1" target="_blank" style="color:white; text-decoration:underline;">$1</a>');
|
| 106 |
+
msgs.appendChild(div);
|
| 107 |
+
msgs.scrollTop = msgs.scrollHeight;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
async function sendMessage() {
|
| 111 |
+
const originalBtnText = sendButton.innerHTML;
|
| 112 |
+
const text = inputField.value.trim();
|
| 113 |
+
if (!text) return;
|
| 114 |
+
|
| 115 |
+
addMessage(text, 'user');
|
| 116 |
+
inputField.value = '';
|
| 117 |
+
inputField.disabled = true;
|
| 118 |
+
sendButton.innerHTML = '...';
|
| 119 |
+
sendButton.disabled = true;
|
| 120 |
+
|
| 121 |
+
try {
|
| 122 |
+
// Backend API Call
|
| 123 |
+
const response = await fetch(`${API_URL}/api/v1/chat`, {
|
| 124 |
+
method: 'POST',
|
| 125 |
+
headers: { 'Content-Type': 'application/json' },
|
| 126 |
+
body: JSON.stringify({
|
| 127 |
+
message: text,
|
| 128 |
+
session_id: CHAT_SESSION_ID,
|
| 129 |
+
user_id: USER_ID
|
| 130 |
+
})
|
| 131 |
+
});
|
| 132 |
+
const data = await response.json();
|
| 133 |
+
addMessage(data.response, 'bot');
|
| 134 |
+
} catch (error) {
|
| 135 |
+
addMessage("Error: Could not connect to the Agent.", 'bot');
|
| 136 |
+
console.error("OmniAgent API Error:", error);
|
| 137 |
+
} finally {
|
| 138 |
+
inputField.disabled = false;
|
| 139 |
+
sendButton.innerHTML = originalBtnText;
|
| 140 |
+
sendButton.disabled = false;
|
| 141 |
+
inputField.focus();
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
// Modern Event Listeners (Fixing deprecated 'onkeypress')
|
| 146 |
+
sendButton.addEventListener('click', sendMessage);
|
| 147 |
+
inputField.addEventListener('keypress', (e) => {
|
| 148 |
+
if(e.key === 'Enter') {
|
| 149 |
+
sendMessage();
|
| 150 |
+
e.preventDefault(); // Enter key ka default action roko
|
| 151 |
+
}
|
| 152 |
+
});
|
| 153 |
+
})();
|