amitbhatt6075 commited on
Commit
0914e96
·
0 Parent(s):

Complete fresh start - FINAL UPLOAD

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +36 -0
  2. .gitignore +15 -0
  3. Dockerfile +34 -0
  4. README.md +11 -0
  5. api/__init__.py +0 -0
  6. api/main.py +2017 -0
  7. core/__init__.py +0 -0
  8. core/anomaly_detector.py +31 -0
  9. core/creative_chat.py +125 -0
  10. core/document_parser.py +38 -0
  11. core/guardrails/safety.py +23 -0
  12. core/inference/cache.py +30 -0
  13. core/matcher.py +44 -0
  14. core/predictor.py +83 -0
  15. core/rag/store.py +46 -0
  16. core/strategist.py +609 -0
  17. core/support_agent.py +169 -0
  18. core/utils.py +77 -0
  19. embedding_model/1_Pooling/config.json +10 -0
  20. embedding_model/README.md +173 -0
  21. embedding_model/config.json +25 -0
  22. embedding_model/config_sentence_transformers.json +14 -0
  23. embedding_model/model.safetensors +3 -0
  24. embedding_model/modules.json +20 -0
  25. embedding_model/sentence_bert_config.json +4 -0
  26. embedding_model/special_tokens_map.json +37 -0
  27. embedding_model/tokenizer.json +0 -0
  28. embedding_model/tokenizer_config.json +65 -0
  29. embedding_model/vocab.txt +0 -0
  30. knowledge_base/brand/01_campaign_creation.md +21 -0
  31. knowledge_base/brand/02_understanding_status.md +13 -0
  32. knowledge_base/common/first_faq.md +19 -0
  33. knowledge_base/influencer/02_payments.md +32 -0
  34. models/budget_predictor_v1.joblib +3 -0
  35. models/comments_predictor_v1.joblib +3 -0
  36. models/earnings_encoder.joblib +3 -0
  37. models/earnings_model.joblib +3 -0
  38. models/influencer_matcher_v1.joblib +3 -0
  39. models/likes_predictor_v1.joblib +3 -0
  40. models/payout_forecaster_v1.joblib +3 -0
  41. models/performance_predictor_v1.joblib +3 -0
  42. models/performance_scorer_v1.joblib +3 -0
  43. models/revenue_forecaster_v1.joblib +3 -0
  44. requirements.txt +24 -0
  45. scripts/download_embedding_model.py +39 -0
  46. scripts/download_model.py +52 -0
  47. scripts/export_performance_data.py +87 -0
  48. scripts/export_revenue_data.py +71 -0
  49. scripts/export_training_data.py +76 -0
  50. scripts/ingest_data.py +60 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.gguf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache
2
+ __pycache__/
3
+ *.pyc
4
+
5
+ # Virtual Environments
6
+ .env
7
+ .venv
8
+ venv/
9
+ env/
10
+
11
+ # IGNORE THE ENTIRE LLM FOLDER
12
+ /llm_model/
13
+
14
+ # IGNORE local data files
15
+ /data/
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start with a stable Python image
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables for non-interactive installs
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV APP_HOME=/app
7
+
8
+ # Set the working directory
9
+ WORKDIR $APP_HOME
10
+
11
+ # Install system dependencies needed for libraries like llama-cpp-python
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ build-essential \
14
+ && apt-get clean \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Copy ONLY the requirements file to leverage Docker's cache
18
+ COPY requirements.txt ./
19
+
20
+ # Install Python dependencies
21
+ # CMAKE_ARGS is needed for llama-cpp-python to build correctly
22
+ ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on"
23
+ RUN pip install --no-cache-dir --upgrade pip && \
24
+ pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Copy the entire application code into the container
27
+ COPY . .
28
+
29
+ # Expose the port the app will run on
30
+ EXPOSE 7860
31
+
32
+ # The command to run your FastAPI application
33
+ # This starts the server when the Docker container launches
34
+ CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Reachify Ai Service
3
+ emoji: 🌖
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
api/__init__.py ADDED
File without changes
api/main.py ADDED
@@ -0,0 +1,2017 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File Location: ai-service/api/main.py
2
+
3
+ import os
4
+ import sys
5
+ import joblib
6
+ import pandas as pd
7
+ import json
8
+ import re
9
+ import uuid
10
+
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
14
+ from supabase import Client
15
+
16
+ from pydantic import BaseModel, Field
17
+ from pydantic.config import ConfigDict
18
+
19
+ from typing import List, Optional, Any, Dict
20
+ import traceback
21
+ from llama_cpp import Llama
22
+ from statsmodels.tsa.api import Holt
23
+ from dateutil.relativedelta import relativedelta
24
+ from sklearn.preprocessing import LabelEncoder
25
+ from core.support_agent import SupportAgent
26
+ from core.strategist import AIStrategist
27
+ from core.predictor import rank_influencers_by_match
28
+ from core.utils import get_supabase_client
29
+ from core.anomaly_detector import find_anomalies
30
+ from core.matcher import load_embedding_model, rank_documents_by_similarity
31
+ from core.utils import get_supabase_client, extract_colors_from_url
32
+ from core.document_parser import parse_pdf_from_url
33
+ from core.creative_chat import director
34
+
35
+ try:
36
+ from core.rag.store import VectorStore
37
+ from core.inference.cache import cached_response
38
+ except ImportError:
39
+
40
+ VectorStore = None
41
+
42
+ def cached_response(func): return func
43
+
44
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
45
+ MODELS_DIR = os.path.join(ROOT_DIR, 'models')
46
+
47
+ # === FIX #2: Dynamic Model Downloading Logic ===
48
+ # This replaces your old static LLAMA_MODEL_PATH
49
+ MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
50
+ MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
51
+ # Hugging Face Spaces provides a writable directory at /data or we can fall back to /tmp
52
+ MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "llm_model")
53
+ # This will be the final path to our model file once it's downloaded
54
+ LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
55
+ # ===============================================
56
+
57
+ EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
58
+ EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
59
+ DB_PATH = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "vector_db_persistent")
60
+
61
+ FINAL_EMBEDDING_PATH = EMBEDDING_MODEL_PATH if os.path.exists(EMBEDDING_MODEL_PATH) else EMBEDDING_MODEL_NAME
62
+
63
+ _llm_instance: Optional[Llama] = None
64
+ _vector_store: Optional[Any] = None
65
+ _ai_strategist: Optional[AIStrategist] = None
66
+ _support_agent: Optional[SupportAgent] = None
67
+ _budget_predictor = None
68
+ _influencer_matcher = None
69
+ _performance_predictor = None
70
+ _payout_forecaster = None
71
+ _earnings_optimizer = None
72
+ _earnings_encoder = None
73
+ _likes_predictor = None
74
+ _comments_predictor = None
75
+ _revenue_forecaster = None
76
+ _performance_scorer = None
77
+
78
+ def to_snake(name: str) -> str:
79
+ return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
80
+
81
+ class ChatMessage(BaseModel):
82
+ role: str # "user" or "assistant"
83
+ content: str
84
+
85
+ class CreativeChatRequest(BaseModel):
86
+ message: str
87
+ history: List[ChatMessage]
88
+ task_context: str
89
+
90
+ class FinalizeScriptRequest(BaseModel):
91
+ history: List[ChatMessage]
92
+ task_context: str
93
+
94
+ class FinalScriptResponse(BaseModel):
95
+ hook: str
96
+ script: str
97
+ visuals: List[str]
98
+ tools: List[str]
99
+ class ChatQuery(BaseModel):
100
+ question: str = Field(..., min_length=1)
101
+ role: str; live_data: str; conversationId: str
102
+ class ChatAnswer(BaseModel):
103
+ response: str; context: Optional[str] = None
104
+ class ChatResponseRequest(BaseModel):
105
+ prompt: str = Field(..., description="The user's direct question.")
106
+ context: str = Field(..., description="The real-time data context from the backend.")
107
+ class ChatResponsePayload(BaseModel):
108
+ response: str
109
+ class CaptionRequest(BaseModel):
110
+ caption: str; action: str
111
+ class CaptionResponse(BaseModel):
112
+ new_caption: str
113
+ class BudgetRequest(BaseModel):
114
+ campaign_goal: str; influencer_count: int; platform: str; location: str; category: str; final_reach: int
115
+ class BudgetResponse(BaseModel):
116
+ predicted_budget_usd: float
117
+ class MatcherRequest(BaseModel):
118
+ campaign_description: str; target_audience_age: str; target_audience_gender: str; engagement_rate: float; followers: int; country: str; niche: str
119
+ class MatcherResponse(BaseModel):
120
+ suggested_influencer_ids: List[int]
121
+ class PerformanceRequest(BaseModel):
122
+ budget_usd: float; influencer_count: int; platform: str; location: str; category: str; budget: float
123
+ class PerformanceResponse(BaseModel):
124
+ predicted_engagement_rate: float; predicted_reach: int
125
+ class StrategyRequest(BaseModel):
126
+ prompt: str
127
+ class StrategyResponse(BaseModel):
128
+ response: str
129
+ class OutlineRequest(BaseModel):
130
+ title: str
131
+ class OutlineResponse(BaseModel):
132
+ outline: str
133
+
134
+ class TaskPrioritizationRequest(BaseModel):
135
+ title: str
136
+ description: Optional[str] = None
137
+
138
+ class TaskPrioritizationResponse(BaseModel):
139
+ priority: str
140
+
141
+ class DashboardInsightsRequest(BaseModel):
142
+ """ The KPI data sent from the main backend. """
143
+ total_revenue_monthly: float
144
+ new_users_weekly: int
145
+ active_campaigns: int
146
+ pending_approvals: int
147
+
148
+ class TimeSeriesDataPoint(BaseModel):
149
+ date: str
150
+ value: float
151
+
152
+ class TimeSeriesForecastRequest(BaseModel):
153
+ data: List[TimeSeriesDataPoint]
154
+ periods_to_predict: int
155
+ business_context: Optional[str] = "No specific context provided."
156
+
157
+ class SmartForecastDataPoint(BaseModel):
158
+ date: str
159
+ predicted_value: float
160
+ trend: str
161
+ commentary: Optional[str] = None
162
+
163
+ class TimeSeriesForecastResponse(BaseModel):
164
+ forecast: List[SmartForecastDataPoint]
165
+
166
+ class HealthKpiRequest(BaseModel):
167
+ platformRevenue: float
168
+ activeCampaigns: int
169
+ totalBrands: int
170
+
171
+ class HealthSummaryResponse(BaseModel):
172
+ summary: str
173
+
174
+ class InfluencerData(BaseModel):
175
+ id: str
176
+ name: Optional[str] = None
177
+ handle: Optional[str] = None
178
+ followers: Optional[int] = 0
179
+ category: Optional[str] = None
180
+ bio: Optional[str] = None
181
+
182
+ class TeamStrategyRequest(BaseModel):
183
+ brand_name: str
184
+ campaign_goal: str
185
+ target_audience: str
186
+ budget_range: str
187
+ influencers: List[InfluencerData]
188
+
189
+ class CreativeBrief(BaseModel):
190
+ title: str
191
+ description: str
192
+ goal_kpi: str
193
+ content_guidelines: List[str]
194
+
195
+ class TeamStrategyResponse(BaseModel):
196
+ success: bool
197
+ strategy: Optional[CreativeBrief] = None
198
+ suggested_influencers: Optional[List[InfluencerData]] = None
199
+ error: Optional[str] = None
200
+
201
+ class AnalyticsInsightsRequest(BaseModel):
202
+ """Data structure for requesting analytics insights."""
203
+ totalReach: Optional[int] = 0
204
+ totalLikes: Optional[int] = 0
205
+ averageEngagementRate: Optional[float] = 0.0
206
+ topPerformingInfluencer: Optional[str] = "N/A"
207
+
208
+ class AnalyticsInsightsResponse(BaseModel):
209
+ """The response containing the generated insights."""
210
+ insights: str
211
+
212
+ class CampaignDetailsForMatch(BaseModel):
213
+ """Campaign ki sirf woh details jo matching ke liye zaroori hain."""
214
+ description: Optional[str] = ""
215
+ goal_kpi: Optional[str] = ""
216
+ category: Optional[str] = ""
217
+
218
+ class InfluencerRankRequest(BaseModel):
219
+ """Backend se aane wala request ka format."""
220
+ campaign_details: CampaignDetailsForMatch
221
+ influencers: List[InfluencerData]
222
+
223
+ class InfluencerRankResponse(BaseModel):
224
+ """AI-service se wapas jaane wala jawab ka format."""
225
+ ranked_influencers: List[InfluencerData]
226
+
227
+ class WeeklySummaryRequest(BaseModel):
228
+ start_date: str
229
+ end_date: str
230
+ total_ad_spend: float
231
+ total_clicks: int
232
+ new_followers: int
233
+ top_performing_campaign: str
234
+
235
+ class WeeklySummaryResponse(BaseModel):
236
+ summary: str
237
+
238
+ class PayoutForecastInput(BaseModel):
239
+ total_budget_active_campaigns: float = Field(..., description="The sum of budgets for all of a manager's currently active campaigns.")
240
+
241
+ class PayoutForecastOutput(BaseModel):
242
+ forecastedAmount: float
243
+ commentary: str
244
+
245
+ class ContentQualityRequest(BaseModel):
246
+ caption: str = Field(..., description="The caption text to be analyzed.")
247
+
248
+ class ContentQualityScore(BaseModel):
249
+ readability: int
250
+ engagement: int
251
+ call_to_action: int
252
+ hashtag_strategy: int
253
+
254
+ class ContentQualityResponse(BaseModel):
255
+ overall_score: float
256
+ scores: ContentQualityScore
257
+ feedback: str
258
+
259
+ class CampaignForRanking(BaseModel):
260
+ id: int
261
+ description: Optional[str] = ""
262
+
263
+ class InfluencerForRanking(BaseModel):
264
+ id: str
265
+ category: Optional[str] = "Fashion"
266
+ bio: Optional[str] = ""
267
+
268
+ class RankCampaignsRequest(BaseModel):
269
+ influencer: InfluencerForRanking
270
+ campaigns: List[CampaignForRanking]
271
+
272
+ class RankedCampaignResult(BaseModel):
273
+ campaign_id: int
274
+ score: float
275
+
276
+ class RankCampaignsResponse(BaseModel):
277
+ ranked_campaigns: List[RankedCampaignResult]
278
+
279
+ class CaptionAssistRequest(BaseModel):
280
+ caption: str
281
+ action: str = Field(..., description="Action to perform: 'improve', 'hashtags', or 'check_guidelines'")
282
+ guidelines: Optional[str] = None # For the 'check_guidelines' action
283
+
284
+ class CaptionAssistResponse(BaseModel):
285
+ new_text: str
286
+
287
+ class ForecastRequest(BaseModel):
288
+ budget: float
289
+ category: str
290
+ follower_count: int
291
+ engagement_rate: float
292
+
293
+ class PerformanceForecast(BaseModel):
294
+ predicted_engagement_rate: float
295
+ predicted_reach: int
296
+
297
+ class PayoutForecast(BaseModel):
298
+ estimated_earning: float
299
+
300
+ class ForecastResponse(BaseModel):
301
+ performance: PerformanceForecast
302
+ payout: PayoutForecast
303
+
304
+ class InfluencerKpiData(BaseModel):
305
+ totalReach: int
306
+ totalLikes: int
307
+ totalComments: int
308
+ avgEngagementRate: float
309
+ totalSubmissions: int
310
+
311
+ class InfluencerAnalyticsSummaryResponse(BaseModel):
312
+ summary: str
313
+
314
+ class PortfolioOption(BaseModel):
315
+ id: str
316
+ contentUrl: str
317
+ caption: Optional[str] = ""
318
+ likes: Optional[int] = 0 # Likes ko ab hum use karenge
319
+ campaign: dict
320
+
321
+ class CuratePortfolioRequest(BaseModel):
322
+ submissions: List[PortfolioOption]
323
+
324
+ class CuratePortfolioResponse(BaseModel):
325
+ featured_submission_ids: List[str]
326
+
327
+ class EarningOpportunityRequest(BaseModel):
328
+ follower_count: int = Field(..., description="Influencer ke current followers")
329
+
330
+ class Opportunity(BaseModel):
331
+ campaign_niche: str
332
+ content_format: str
333
+ estimated_score: float
334
+ commentary: str
335
+
336
+ class EarningOpportunityResponse(BaseModel):
337
+ opportunities: List[Opportunity]
338
+
339
+ class PostPerformanceRequest(BaseModel):
340
+ follower_count: int
341
+ caption_length: int
342
+ campaign_niche: str
343
+ content_format: str
344
+
345
+ class PostPerformanceResponse(BaseModel):
346
+ predicted_likes: int
347
+ predicted_comments: int
348
+ feedback: str
349
+
350
+ class AnomalyInsight(BaseModel):
351
+ influencer_id: str
352
+ influencer_name: str
353
+ insights: List[str]
354
+
355
+ class RevenueForecastDatapoint(BaseModel):
356
+ month: str
357
+ predicted_revenue: float
358
+ trend: str
359
+
360
+ class RevenueForecastResponse(BaseModel):
361
+ forecast: List[RevenueForecastDatapoint]
362
+ ai_commentary: str
363
+
364
+ class InfluencerPerformanceStats(BaseModel):
365
+ avg_engagement_rate: float
366
+ on_time_submission_rate: float
367
+ avg_brand_rating: float
368
+ monthly_earnings: float
369
+
370
+ class InfluencerPerformanceResponse(BaseModel):
371
+ performance_score: int
372
+
373
+ class MatchDocument(BaseModel):
374
+ id: str
375
+ text: str
376
+ match_score: Optional[int] = None
377
+
378
+ class RankBySimilarityRequest(BaseModel):
379
+ query: str
380
+ documents: List[MatchDocument]
381
+
382
+ class RankBySimilarityResponse(BaseModel):
383
+ ranked_documents: List[MatchDocument]
384
+
385
+ class ContentQualityRequest(BaseModel):
386
+ caption: str = Field(..., description="The caption text to be analyzed.")
387
+
388
+ class ContentQualityScore(BaseModel):
389
+ readability: int
390
+ engagement: int
391
+ call_to_action: int
392
+ hashtag_strategy: int
393
+
394
+ class ContentQualityResponse(BaseModel):
395
+ overall_score: float
396
+ scores: ContentQualityScore
397
+ feedback: str
398
+
399
+ class DailyBriefingData(BaseModel):
400
+ roster_size: int
401
+ on_bench_influencers: int
402
+ pending_submissions: int
403
+ revisions_requested: int
404
+ lowest_ai_score: Optional[int] = None
405
+ highest_pending_payout: float
406
+
407
+ class DailyBriefingResponse(BaseModel):
408
+ briefing_text: str
409
+
410
+ class ContractURL(BaseModel):
411
+ pdf_url: str
412
+
413
+ class ContractSummary(BaseModel):
414
+ payment_details: str
415
+ deliverables: str
416
+ deadlines: str
417
+ exclusivity: str
418
+ ownership: str
419
+ summary_points: List[str]
420
+
421
+ class InfluencerPerformanceStats(BaseModel):
422
+ """
423
+ Backend se aane wala data. Yeh 'helpers.py' ke 'get_real_stats_for_influencer' se match karta hai.
424
+ """
425
+ avg_engagement_rate: float
426
+ on_time_submission_rate: float
427
+ avg_brand_rating: float
428
+ monthly_earnings: float
429
+
430
+ class InfluencerPerformanceResponse(BaseModel):
431
+ """
432
+ AI-service se wapas jaane wala jawab. Sirf ek score.
433
+ """
434
+ performance_score: int
435
+
436
+
437
+ class AIGrowthPlanRequest(BaseModel):
438
+ """Backend se aane wala data, jismein influencer ki live stats hain."""
439
+ fullName: str
440
+ category: Optional[str] = None
441
+ avgEngagementRate: float
442
+ monthlyEarnings: float
443
+ onTimeSubmissionRate: float
444
+ bestPostCaption: Optional[str] = None
445
+ worstPostCaption: Optional[str] = None
446
+
447
+ class AIGrowthPlanResponse(BaseModel):
448
+ """AI service se wapas jaane wala jawab."""
449
+ insights: List[str]
450
+
451
+ class BrandAssetAnalysisRequest(BaseModel):
452
+ file_url: str = Field(..., description="URL of the logo or brand image")
453
+ asset_type: str = "logo"
454
+
455
+ class BrandAssetAnalysisResponse(BaseModel):
456
+ dominant_colors: List[str]
457
+
458
+
459
+ class ServiceBlueprintRequest(BaseModel):
460
+ service_type: str = Field(..., description="e.g., 'web-dev' or 'growth'")
461
+ requirements: str = Field(..., min_length=10)
462
+
463
+ class ServiceBlueprintResponse(BaseModel):
464
+ title: str
465
+ deliverables: List[str]
466
+ stack: str
467
+ price_est: str
468
+ timeline: str
469
+
470
+ class GrowthPlanRequest(BaseModel):
471
+ platform_handle: str
472
+ goals: str
473
+ challenges: str
474
+
475
+ class AISummaryJobRequest(BaseModel):
476
+ checkin_id: int
477
+ raw_text: str
478
+
479
+ class WeeklyCheckinSummaryResponse(BaseModel):
480
+ wins: List[str]
481
+ challenges: List[str]
482
+ opportunities: List[str]
483
+ sentiment: str
484
+
485
+ class WeeklyPlanContext(BaseModel):
486
+ niche: str
487
+ current_mood: str
488
+ recent_achievements: List[str]
489
+ active_trends: List[Dict[str, str]]
490
+
491
+ class WeeklyPlanRequest(BaseModel):
492
+ context: WeeklyPlanContext
493
+
494
+ class PlanOption(BaseModel):
495
+ type: str
496
+ title: str
497
+ platform: str
498
+ contentType: str
499
+ instructions: str
500
+ reasoning: str
501
+
502
+ class WeeklyPlanResponse(BaseModel):
503
+ options: List[PlanOption]
504
+
505
+ app = FastAPI(title="Reachify AI Service (Deploy-Ready)", version="11.0.0")
506
+
507
+ @app.on_event("startup")
508
+ def startup_event():
509
+ global _llm_instance, _ai_strategist, _support_agent, _vector_store, \
510
+ _budget_predictor, _influencer_matcher, _performance_predictor, _payout_forecaster, \
511
+ _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
512
+ _revenue_forecaster, _performance_scorer
513
+
514
+ print("--- 🚀 AI Service Starting Up (Hugging Face Mode)... ---")
515
+
516
+ # === FIX #3: The Model Download and Loading Logic ===
517
+ try:
518
+ # Step 1: Download the model if it doesn't exist
519
+ os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
520
+ if not os.path.exists(LLAMA_MODEL_PATH):
521
+ print(f" - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
522
+ hf_hub_download(
523
+ repo_id=MODEL_REPO,
524
+ filename=MODEL_FILENAME,
525
+ local_dir=MODEL_SAVE_DIRECTORY,
526
+ local_dir_use_symlinks=False # This is safer for containers
527
+ )
528
+ print(" - ✅ Model downloaded successfully.")
529
+ else:
530
+ print(f" - LLM model found at {LLAMA_MODEL_PATH}. Skipping download.")
531
+
532
+ # Step 2: Now that the file is guaranteed to be there, load it.
533
+ print(" - Loading Llama LLM into memory from downloaded file...")
534
+ _llm_instance = Llama(
535
+ model_path=LLAMA_MODEL_PATH,
536
+ n_gpu_layers=0, # Ensure CPU usage on free tier
537
+ n_ctx=2048,
538
+ verbose=False,
539
+ use_mmap=False
540
+ )
541
+ print(" - ✅ LLM Loaded into Memory on CPU.")
542
+
543
+ except Exception as e:
544
+ print(f" - ❌ FATAL ERROR: Could not download or load LLM model: {e}")
545
+ traceback.print_exc()
546
+ _llm_instance = None # Ensure it is None if it fails
547
+ # =========================================================
548
+
549
+ # --- All the rest of your startup logic remains EXACTLY THE SAME ---
550
+ if VectorStore:
551
+ try:
552
+ _vector_store = VectorStore()
553
+ print(" - ✅ RAG Engine Ready.")
554
+ except Exception:
555
+ _vector_store = None
556
+ else:
557
+ _vector_store = None
558
+
559
+ print(" - Initializing AI Strategist...")
560
+ _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
561
+ print(" - ✅ AI Strategist ready.")
562
+
563
+ print(" - Initializing Support Agent...")
564
+ _support_agent = SupportAgent(
565
+ llm_instance=_llm_instance,
566
+ embedding_path=EMBEDDING_MODEL_PATH,
567
+ db_path=DB_PATH
568
+ )
569
+ print(" - ✅ Support Agent ready.")
570
+
571
+ print(" - Loading ML models from joblib files...")
572
+ model_paths = {
573
+ 'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
574
+ 'matcher': ('_influencer_matcher', 'influencer_matcher_v1.joblib'),
575
+ 'performance': ('_performance_predictor', 'performance_predictor_v1.joblib'),
576
+ 'payout': ('_payout_forecaster', 'payout_forecaster_v1.joblib'),
577
+ 'earnings': ('_earnings_optimizer', 'earnings_model.joblib'),
578
+ 'earnings_encoder': ('_earnings_encoder', 'earnings_encoder.joblib'),
579
+ 'likes_predictor': ('_likes_predictor', 'likes_predictor_v1.joblib'),
580
+ 'comments_predictor': ('_comments_predictor', 'comments_predictor_v1.joblib'),
581
+ 'revenue_forecaster': ('_revenue_forecaster', 'revenue_forecaster_v1.joblib'),
582
+ 'performance_scorer': ('_performance_scorer', 'performance_scorer_v1.joblib'),
583
+ }
584
+ for name, (var, file) in model_paths.items():
585
+ path = os.path.join(MODELS_DIR, file)
586
+ try:
587
+ globals()[var] = joblib.load(path)
588
+ print(f" - Loaded {name} model.")
589
+ except FileNotFoundError:
590
+ globals()[var] = None
591
+ print(f" - ⚠️ WARNING: Model '{name}' not found at {path}. Endpoint disabled.")
592
+
593
+ print(" - Initializing Text Embedding Model...")
594
+ load_embedding_model(EMBEDDING_MODEL_PATH)
595
+
596
+ print("\n--- ✅ AI Service is fully operational! ---")
597
+
598
+ @app.get("/", summary="Health Check")
599
+ def read_root():
600
+ return {"status": "AI Service is running"}
601
+
602
+ def _cleanup_llm_response(data: dict) -> dict:
603
+ """A robust helper to clean common messy JSON outputs from smaller LLMs."""
604
+ cleaned = { "wins": [], "challenges": [], "opportunities": [], "sentiment": "Mixed" } # Default to Mixed
605
+
606
+ # Clean list-based fields
607
+ for key in ["wins", "challenges", "opportunities"]:
608
+ if key in data and isinstance(data[key], list):
609
+ for item in data[key]:
610
+ if isinstance(item, str) and item: # Check if string is not empty
611
+ cleaned[key].append(item.strip())
612
+ elif isinstance(item, dict) and 'text' in item and isinstance(item['text'], str) and item['text']:
613
+ cleaned[key].append(item['text'].strip())
614
+
615
+ # Clean sentiment field
616
+ sentiment_data = data.get("sentiment")
617
+ if isinstance(sentiment_data, str) and sentiment_data:
618
+ # Sometimes model sends "Positive." with a period, strip it.
619
+ cleaned["sentiment"] = sentiment_data.strip().replace('.', '')
620
+ elif isinstance(sentiment_data, dict):
621
+ if sentiment_data.get('positive'): cleaned["sentiment"] = "Positive"
622
+ elif sentiment_data.get('negative'): cleaned["sentiment"] = "Negative"
623
+ else: cleaned["sentiment"] = "Mixed"
624
+
625
+ return cleaned
626
+
627
+ def process_summary_in_background(checkin_id: int, raw_text: str):
628
+ """
629
+ This is the long-running background task.
630
+ This version has the final, official prompt format for the Phi-2 model.
631
+ """
632
+ print(f" - ⚙️ BACKGROUND JOB STARTED for check-in ID: {checkin_id}")
633
+
634
+ # Each background task needs its own Supabase client.
635
+ supabase = get_supabase_client()
636
+
637
+ if not _llm_instance:
638
+ print(f" - ❌ JOB FAILED ({checkin_id}): LLM instance was not available during background task.")
639
+ supabase.table("influencer_weekly_checkins").update({
640
+ "status": "failed", "error_message": "AI model was not loaded."
641
+ }).eq("id", checkin_id).execute()
642
+ return
643
+
644
+ # --- THE OFFICIAL & FINAL PROMPT FOR PHI-2 ---
645
+ # This format is what the model is trained to understand.
646
+ # It's direct, simple, and gives the instruction *after* the text.
647
+ final_prompt = f'''Text: """{raw_text}"""
648
+
649
+ Instruct: Analyze the text above and extract key points into a single, valid JSON object with the keys "wins", "challenges", "opportunities", and "sentiment".
650
+ - "wins" should contain 1-2 positive sentences.
651
+ - "challenges" should contain 1-2 negative sentences.
652
+ - "opportunities" should contain 1-2 new ideas.
653
+ - DO NOT repeat sentences across categories.
654
+ - "sentiment" must be ONE word: "Positive", "Negative", or "Mixed".
655
+ Your entire response must ONLY be the JSON object, starting with {{ and ending with }}.
656
+
657
+ Output:
658
+ '''
659
+
660
+ try:
661
+ # Call the pre-loaded Llama instance
662
+ output = _llm_instance(
663
+ final_prompt,
664
+ max_tokens=1024,
665
+ temperature=0.0,
666
+ top_p=0.95,
667
+ top_k=40,
668
+ repeat_penalty=1.1,
669
+ stop=["Instruct:", "Text:", "Output:"], # Strict stop tokens matching the prompt
670
+ echo=False
671
+ )
672
+
673
+ # Get the raw text from the model's response
674
+ raw_response_text = output['choices'][0]['text'].strip()
675
+ print(f" - 🤖 JOB ({checkin_id}): Official Phi-2 Raw Response:\n---\n{raw_response_text}\n---")
676
+
677
+ # The robust JSON parser
678
+ json_match = re.search(r'\{.*\}', raw_response_text, re.DOTALL)
679
+ if not json_match:
680
+ raise ValueError("No valid JSON object found in the LLM's response. The model may have returned plain text.")
681
+
682
+ # Extract the JSON string and parse it
683
+ clean_json_text = json_match.group(0)
684
+ summary_data_raw = json.loads(clean_json_text)
685
+
686
+ # The final cleanup helper to ensure correct formatting
687
+ cleaned_summary = _cleanup_llm_response(summary_data_raw)
688
+
689
+ # SUCCESS: Update the database with the result and 'completed' status.
690
+ print(f" - ✅ JOB ({checkin_id}): COMPLETED. Updating database with: {cleaned_summary}")
691
+ supabase.table("influencer_weekly_checkins").update({
692
+ "structured_summary": cleaned_summary,
693
+ "status": "completed"
694
+ }).eq("id", checkin_id).execute()
695
+
696
+ except Exception as e:
697
+ error_message = f"AI model failed: {str(e)}"
698
+ print(f" - ❌ JOB FAILED for check-in ID: {checkin_id}. Error: {error_message}")
699
+ import traceback
700
+ traceback.print_exc()
701
+ supabase.table("influencer_weekly_checkins").update({
702
+ "status": "failed",
703
+ "error_message": error_message
704
+ }).eq("id", checkin_id).execute()
705
+
706
+ @app.post("/generate-chat-response", response_model=ChatResponsePayload, summary="Interactive AI Strategist Chat")
707
+ async def generate_chat_response_route(request: ChatResponseRequest):
708
+ print(f"\n✅ Received request on /generate-chat-response")
709
+ if not _ai_strategist:
710
+ raise HTTPException(status_code=503, detail="The AI Strategist is not available.")
711
+ try:
712
+ response_text = _ai_strategist.generate_chat_response(prompt=request.prompt, context=request.context)
713
+ return ChatResponsePayload(response=response_text)
714
+ except Exception as e:
715
+ raise HTTPException(status_code=500, detail=str(e))
716
+
717
+ @app.post("/api/v1/chat", response_model=ChatAnswer, summary="Role-Aware AI Support Agent")
718
+ async def ask_support_agent(query: ChatQuery):
719
+ if not _support_agent: raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
720
+ return _support_agent.answer(payload=query.model_dump(), conversation_id=query.conversationId)
721
+
722
+ @app.post("/api/v1/generate/caption", response_model=CaptionResponse, summary="Generate variations of a caption")
723
+ async def generate_caption_route(request: CaptionRequest):
724
+ if not _support_agent: raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
725
+ new_caption_text = _support_agent.generate_caption_variant(caption=request.caption, action=request.action)
726
+ return CaptionResponse(new_caption=new_caption_text)
727
+
728
+ @app.post("/generate-strategy", response_model=StrategyResponse, summary="Generate a Digital Marketing Strategy")
729
+ async def generate_strategy_route(request: StrategyRequest):
730
+ if not _support_agent:
731
+ raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
732
+ try:
733
+ strategy_text = _support_agent.generate_marketing_strategy(prompt=request.prompt)
734
+ return StrategyResponse(response=strategy_text)
735
+ except Exception as e:
736
+ raise HTTPException(status_code=500, detail=f"An internal error occurred in the AI model: {e}")
737
+
738
+ @app.post("/api/v1/predict/budget", response_model=BudgetResponse, summary="Predict Campaign Budget")
739
+ async def predict_budget(request: BudgetRequest):
740
+ if not _budget_predictor: raise HTTPException(status_code=503, detail="Budget predictor is not available.")
741
+ input_data = pd.DataFrame([request.model_dump()])
742
+ prediction = _budget_predictor.predict(input_data)[0]
743
+ return BudgetResponse(predicted_budget_usd=round(prediction, 2))
744
+
745
+ @app.post("/api/v1/match/influencers", response_model=MatcherResponse, summary="Match Influencers to Campaign")
746
+ async def match_influencers(request: MatcherRequest):
747
+ if not _influencer_matcher: raise HTTPException(status_code=503, detail="Influencer matcher is not available.")
748
+ input_data = pd.DataFrame([request.model_dump()])
749
+ prediction = _influencer_matcher.predict(input_data)
750
+ integer_ids = [int(pid) for pid in prediction]
751
+ return MatcherResponse(suggested_influencer_ids=integer_ids)
752
+
753
+ @app.post("/api/v1/predict/performance", response_model=PerformanceResponse, summary="Predict Campaign Performance")
754
+ async def predict_performance(request: PerformanceRequest):
755
+ if not _performance_predictor: raise HTTPException(status_code=503, detail="Performance predictor is not available.")
756
+ input_data = pd.DataFrame([request.model_dump()])
757
+ prediction_value = _performance_predictor.predict(input_data)[0]
758
+ return PerformanceResponse(predicted_engagement_rate=0.035, predicted_reach=int(prediction_value))
759
+
760
+ @app.post("/generate-outline", response_model=OutlineResponse, summary="Generate a Blog Post Outline")
761
+ async def generate_outline_route(request: OutlineRequest):
762
+ if not _support_agent:
763
+ raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
764
+ try:
765
+ outline_text = _support_agent.generate_content_outline(title=request.title)
766
+ return OutlineResponse(outline=outline_text)
767
+ except Exception as e:
768
+ raise HTTPException(status_code=500, detail=f"An internal error occurred in the AI model: {e}")
769
+
770
+ @app.post("/generate-dashboard-insights", response_model=StrategyResponse, summary="Generate Insights from Dashboard KPIs")
771
+ @cached_response # <--- ✨ NEW: Speed Booster Logic ✨
772
+ async def generate_dashboard_insights_route(request: DashboardInsightsRequest):
773
+ print(f"\n✅ Received request on /generate-dashboard-insights with data: {request.model_dump()}")
774
+ if not _llm_instance:
775
+ raise HTTPException(status_code=503, detail="The Llama model is not available.")
776
+
777
+ # Existing logic remains 100% SAME
778
+ kpis = request.model_dump()
779
+ prompt = f"""
780
+ [SYSTEM]
781
+ You are a senior data analyst at Reachify. You are writing a short, insightful summary for the agency's admin. Identify the most important trends from the week's KPIs. Write 2-3 human-readable bullet points. Be proactive and suggest an action.
782
+
783
+ [THIS WEEK'S KPI DATA]
784
+ - Revenue This Month (so far): ${kpis.get('total_revenue_monthly', 0):.2f}
785
+ - New Users This Week: {kpis.get('new_users_weekly', 0)}
786
+ - Currently Active Campaigns: {kpis.get('active_campaigns', 0)}
787
+ - Items Awaiting Approval: {kpis.get('pending_approvals', 0)}
788
+
789
+ [YOUR INSIGHTFUL BULLET POINTS]
790
+ - """
791
+
792
+ try:
793
+ print("--- Direct Call: Sending composed prompt to LLM...")
794
+ response = _llm_instance(prompt, max_tokens=250, temperature=0.7, stop=["[SYSTEM]", "Human:", "\n\n"], echo=False)
795
+
796
+ insight_text = response['choices'][0]['text'].strip()
797
+
798
+ if not insight_text.startswith('-'):
799
+ insight_text = '- ' + insight_text
800
+
801
+ print("--- Direct Call: Successfully received response from LLM.")
802
+ return StrategyResponse(response=insight_text)
803
+
804
+ except Exception as e:
805
+ print(f"🚨 AN ERROR OCCURRED DIRECTLY IN THE ENDPOINT:")
806
+ traceback.print_exc()
807
+ raise HTTPException(status_code=500, detail=str(e))
808
+
809
+
810
+ @app.get("/", summary="Health Check")
811
+ def read_root():
812
+ return {"status": "Unified AI Service is running"}
813
+
814
+ @app.post("/predict/time-series", response_model=TimeSeriesForecastResponse, summary="Forecast Time Series with Trend Analysis")
815
+ def predict_time_series(request: TimeSeriesForecastRequest):
816
+ print(f"\n✅ Received smart forecast request with context: '{request.business_context}'")
817
+
818
+ if len(request.data) < 5:
819
+ raise HTTPException(status_code=400, detail="Not enough data. At least 5 data points required.")
820
+
821
+ try:
822
+ df = pd.DataFrame([item.model_dump() for item in request.data])
823
+ df['date'] = pd.to_datetime(df['date'])
824
+ df = df.set_index('date').asfreq('MS', method='ffill')
825
+
826
+ model = Holt(df['value'], initialization_method="estimated").fit(optimized=True)
827
+ forecast_result = model.forecast(steps=request.periods_to_predict)
828
+
829
+ smart_forecast_output = []
830
+ last_historical_value = df['value'].iloc[-1]
831
+
832
+ for date, predicted_val in forecast_result.items():
833
+ trend_label = "Stable"
834
+ commentary = None
835
+ percentage_change = ((predicted_val - last_historical_value) / last_historical_value) * 100
836
+
837
+ if percentage_change > 10:
838
+ trend_label = "Strong Growth"
839
+ if "by " in request.business_context:
840
+ reason = request.business_context.split('by ')[-1]
841
+ commentary = f"Strong growth expected, likely driven by {reason}"
842
+ else:
843
+ commentary = "Strong growth expected due to positive trends."
844
+ elif percentage_change > 2:
845
+ trend_label = "Modest Growth"
846
+ elif percentage_change < -5:
847
+ trend_label = "Potential Downturn"
848
+ commentary = "Warning: A potential downturn is detected. This may not account for upcoming campaigns. Review your strategy."
849
+
850
+ smart_forecast_output.append(
851
+ SmartForecastDataPoint(
852
+ date=date.strftime('%Y-%m-%d'),
853
+ predicted_value=round(predicted_val, 2),
854
+ trend=trend_label,
855
+ commentary=commentary
856
+ )
857
+ )
858
+ last_historical_value = predicted_val
859
+
860
+ return TimeSeriesForecastResponse(forecast=smart_forecast_output)
861
+
862
+ except Exception as e:
863
+ traceback.print_exc()
864
+ raise HTTPException(status_code=500, detail=str(e))
865
+
866
+ @app.post("/generate-health-summary", response_model=HealthSummaryResponse, summary="Generates an actionable summary from KPIs")
867
+ def generate_health_summary(request: HealthKpiRequest):
868
+ print(f"\n✅ Received request to generate health summary.")
869
+ if not _llm_instance:
870
+ raise HTTPException(status_code=503, detail="LLM not available for summary.")
871
+
872
+ kpis = request.model_dump()
873
+
874
+ prompt = f"""
875
+ [SYSTEM]
876
+ You are a business analyst. Analyze these KPIs: Platform Revenue (₹{kpis.get('platformRevenue', 0):,.0f}), Active Campaigns ({kpis.get('activeCampaigns', 0)}). Provide one [PROGRESS] point and one [AREA TO WATCH] with a next action. Under 50 words.
877
+ [YOUR ANALYSIS]
878
+ """
879
+
880
+ try:
881
+
882
+ response = _llm_instance(prompt, max_tokens=150, temperature=0.6, stop=["[SYSTEM]"], echo=False)
883
+ summary_text = response['choices'][0]['text'].strip()
884
+ print(f" - ✅ Generated summary: {summary_text}")
885
+ return HealthSummaryResponse(summary=summary_text)
886
+
887
+ except OSError as e:
888
+ print(f"🚨 CRITICAL LLM CRASH CAUGHT (OSError): {e}. Returning a fallback message.")
889
+ traceback.print_exc()
890
+ return HealthSummaryResponse(summary="[AREA TO WATCH]: The AI analyst model is currently unstable and is being reviewed. Manual analysis is recommended.")
891
+ except Exception as e:
892
+ print(f"🚨 An unexpected error occurred during summary generation: {e}")
893
+ traceback.print_exc()
894
+ raise HTTPException(status_code=500, detail=str(e))
895
+
896
+
897
+ @app.post("/generate_team_strategy", response_model=TeamStrategyResponse, summary="Generates a full campaign strategy for the internal team")
898
+ def generate_team_strategy(request: TeamStrategyRequest):
899
+ """
900
+ This endpoint orchestrates the AI/ML logic for the Team Strategist tool.
901
+ It takes campaign details and a list of influencers from the backend.
902
+ """
903
+ print(f"\n✅ Received request on /generate_team_strategy for brand: {request.brand_name}")
904
+
905
+ if not _ai_strategist:
906
+ raise HTTPException(status_code=503, detail="AI Strategist model is not available or failed to load.")
907
+
908
+ try:
909
+ # Step 1: Generate the creative brief using the LLM
910
+ creative_brief_dict = _ai_strategist.generate_campaign_brief(
911
+ brand_name=request.brand_name,
912
+ campaign_goal=request.campaign_goal,
913
+ target_audience=request.target_audience,
914
+ budget_range=request.budget_range
915
+ )
916
+ if "error" in creative_brief_dict:
917
+ raise Exception(f"LLM Error during brief generation: {creative_brief_dict['error']}")
918
+
919
+ # Step 2: Rank the provided influencers using the ML model
920
+ influencer_list_of_dicts = [inf.model_dump() for inf in request.influencers]
921
+ suggested_influencers_list = rank_influencers_by_match(
922
+ influencers=influencer_list_of_dicts,
923
+ campaign_details=request.model_dump(exclude={"influencers"}),
924
+ top_n=3
925
+ )
926
+
927
+ print("✅ Successfully generated brief and ranked influencers.")
928
+ return TeamStrategyResponse(
929
+ success=True,
930
+ strategy=CreativeBrief(**creative_brief_dict),
931
+ suggested_influencers=[InfluencerData(**inf) for inf in suggested_influencers_list]
932
+ )
933
+
934
+ except Exception as e:
935
+ print(f"🚨 An error occurred in /generate_team_strategy endpoint:")
936
+ traceback.print_exc()
937
+ return TeamStrategyResponse(success=False, error=str(e))
938
+
939
+
940
+ @app.post("/strategist/generate-analytics-insights", response_model=AnalyticsInsightsResponse, summary="Generates Actionable Insights from Campaign Analytics")
941
+ async def generate_analytics_insights_route(request: AnalyticsInsightsRequest):
942
+ """
943
+ Receives campaign analytics data and uses the AI Strategist to generate key insights.
944
+ """
945
+ print(f"\n✅ Received request on /strategist/generate-analytics-insights")
946
+ if not _ai_strategist:
947
+ raise HTTPException(status_code=503, detail="The AI Strategist is not available.")
948
+
949
+ try:
950
+ # Pydantic model se data ko dictionary mein convert karein
951
+ analytics_data = request.model_dump()
952
+
953
+ # Naye function ko call karein
954
+ insights_text = _ai_strategist.generate_analytics_insights(analytics_data=analytics_data)
955
+
956
+ return AnalyticsInsightsResponse(insights=insights_text)
957
+
958
+ except Exception as e:
959
+ print(f"🚨 An error occurred in /strategist/generate-analytics-insights endpoint:")
960
+ traceback.print_exc()
961
+ raise HTTPException(status_code=500, detail=str(e))
962
+
963
+ @app.post("/predictor/rank-influencers", response_model=InfluencerRankResponse, summary="Ranks a given list of influencers for a specific campaign")
964
+ async def rank_influencers_route(request: InfluencerRankRequest):
965
+ """
966
+ Backend se campaign details aur sabhi influencers ki list leta hai,
967
+ aur ML model ka istemal karke top 3 ranked influencers wapas bhejta hai.
968
+ """
969
+ print(f"\n✅ Received request on /predictor/rank-influencers for campaign: '{request.campaign_details.description[:30]}...'")
970
+
971
+ # predictor.py se humara model loaded hai, humein use check karne ki zaroorat nahi
972
+ # kyunki wahan pehle se try-except block laga hua hai.
973
+
974
+ try:
975
+ # Step 1: Frontend se aaye Pydantic models ko saaf Python dictionaries mein badlein
976
+ influencers_list = [inf.model_dump() for inf in request.influencers]
977
+ campaign_details_dict = request.campaign_details.model_dump()
978
+
979
+ # Step 2: Humare predictor.py ke function ko call karein
980
+ ranked_list = rank_influencers_by_match(
981
+ influencers=influencers_list,
982
+ campaign_details=campaign_details_dict,
983
+ top_n=5 # Hum top 5 influencers bhejenge
984
+ )
985
+
986
+ # Step 3: Saaf jawab wapas bhejein
987
+ print(f" - ✅ Successfully ranked {len(ranked_list)} influencers.")
988
+ return InfluencerRankResponse(ranked_influencers=ranked_list)
989
+
990
+ except Exception as e:
991
+ print(f"🚨 An error occurred in /predictor/rank-influencers endpoint:")
992
+ traceback.print_exc()
993
+ raise HTTPException(status_code=500, detail=str(e))
994
+
995
+ @app.post("/strategist/generate-weekly-summary", response_model=WeeklySummaryResponse, summary="Generates a Weekly Summary from Metrics")
996
+ def generate_weekly_summary_route(request: WeeklySummaryRequest):
997
+ print(f"\n✅ Received request on the NEW /strategist/generate-weekly-summary endpoint.")
998
+ if not _ai_strategist:
999
+ raise HTTPException(status_code=503, detail="AI Strategist is not initialized.")
1000
+ try:
1001
+ summary_text = _ai_strategist.generate_weekly_summary(metrics=request.model_dump())
1002
+ if not summary_text or "error" in summary_text.lower():
1003
+ raise Exception("AI model failed to generate a valid summary.")
1004
+ return WeeklySummaryResponse(summary=summary_text)
1005
+ except Exception as e:
1006
+ print(f"🚨 An error occurred in /strategist/generate-weekly-summary: {e}")
1007
+ raise HTTPException(status_code=500, detail=str(e))
1008
+
1009
+ @app.post("/predict/payout_forecast", response_model=PayoutForecastOutput, summary="Predicts future influencer payouts for a manager")
1010
+ def predict_payout(data: PayoutForecastInput):
1011
+ """
1012
+ Predicts the estimated influencer payout for the next 30 days
1013
+ based on the total budget of a manager's active campaigns.
1014
+ """
1015
+ print(f"\n✅ Received request on /predict/payout_forecast")
1016
+ if not _payout_forecaster:
1017
+ raise HTTPException(status_code=503, detail="Model is not available. Please train the payout forecaster model first.")
1018
+
1019
+ try:
1020
+ # Prediction ke liye data ko sahi DataFrame format mein convert karo
1021
+ input_df = pd.DataFrame([{'budget': data.total_budget_active_campaigns}])
1022
+
1023
+ # Prediction karo
1024
+ prediction = _payout_forecaster.predict(input_df)[0]
1025
+
1026
+ # Ensure the prediction is never negative
1027
+ forecasted_amount = max(0, float(prediction))
1028
+
1029
+ print(f" - ✅ Generated payout forecast: {forecasted_amount}")
1030
+ return {
1031
+ "forecastedAmount": forecasted_amount,
1032
+ "commentary": "Based on the total budget of your current active campaigns."
1033
+ }
1034
+
1035
+ except Exception as e:
1036
+ print(f"🚨 An error occurred in /predict/payout_forecast endpoint:")
1037
+ traceback.print_exc()
1038
+ raise HTTPException(status_code=500, detail=f"An error occurred during prediction: {str(e)}")
1039
+
1040
+
1041
+ @app.post("/analyze/content_quality", response_model=ContentQualityResponse, summary="Analyzes a caption for a quality score")
1042
+ def analyze_content_quality(request: ContentQualityRequest):
1043
+ """
1044
+ Uses the loaded LLM to analyze a social media caption based on several criteria
1045
+ and returns a quantitative score and qualitative feedback.
1046
+ """
1047
+ print(f"\n✅ Received request on /analyze/content_quality")
1048
+ if not _llm_instance:
1049
+ raise HTTPException(status_code=503, detail="The Llama model is not available.")
1050
+
1051
+ caption = request.caption
1052
+
1053
+ # This is a very structured prompt that asks the LLM to act as a specialist
1054
+ # and return a JSON object, which is easier and more reliable to parse.
1055
+ prompt = f"""
1056
+ [SYSTEM]
1057
+ You are a social media expert. Analyze the following caption based on four criteria: Readability, Engagement, Call to Action (CTA), and Hashtag Strategy.
1058
+ For each criterion, provide a score from 1 (poor) to 10 (excellent).
1059
+ Also, provide a final overall score (average of the four scores) and short, actionable feedback.
1060
+ Respond ONLY with a valid JSON object in the following format:
1061
+ {{
1062
+ "overall_score": <float>,
1063
+ "scores": {{
1064
+ "readability": <int>,
1065
+ "engagement": <int>,
1066
+ "call_to_action": <int>,
1067
+ "hashtag_strategy": <int>
1068
+ }},
1069
+ "feedback": "<string>"
1070
+ }}
1071
+
1072
+ [CAPTION TO ANALYZE]
1073
+ "{caption}"
1074
+
1075
+ [YOUR JSON RESPONSE]
1076
+ """
1077
+
1078
+ try:
1079
+ print("--- Sending caption to LLM for quality analysis...")
1080
+ response = _llm_instance(prompt, max_tokens=512, temperature=0.2, stop=["[SYSTEM]", "\n\n"], echo=False)
1081
+
1082
+ # Extract the JSON part of the response
1083
+ json_text = response['choices'][0]['text'].strip()
1084
+
1085
+ # Find the start and end of the JSON object
1086
+ start_index = json_text.find('{')
1087
+ end_index = json_text.rfind('}') + 1
1088
+ if start_index == -1 or end_index == 0:
1089
+ raise ValueError("LLM did not return a valid JSON object.")
1090
+
1091
+ clean_json_text = json_text[start_index:end_index]
1092
+
1093
+ import json
1094
+ analysis_result = json.loads(clean_json_text)
1095
+
1096
+ print("--- Successfully received and parsed JSON response from LLM.")
1097
+ return ContentQualityResponse(**analysis_result)
1098
+
1099
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
1100
+ print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: {json_text}")
1101
+ raise HTTPException(status_code=500, detail="Failed to parse the analysis from the AI model. The model may have returned an unexpected format.")
1102
+ except Exception as e:
1103
+ print(f"🚨 An unexpected error occurred during content analysis: {e}")
1104
+ traceback.print_exc()
1105
+ raise HTTPException(status_code=500, detail=str(e))
1106
+
1107
+ @app.post("/rank/campaigns-for-influencer", response_model=RankCampaignsResponse, summary="Ranks a list of campaigns for one influencer")
1108
+ async def rank_campaigns_for_influencer_route(request: RankCampaignsRequest):
1109
+ """
1110
+ Takes an influencer's profile and a list of campaigns, uses the ML model
1111
+ to predict a 'match score' for each, and returns the list ranked by that score.
1112
+ """
1113
+ print(f"\n✅ Received request on /rank/campaigns-for-influencer for influencer: {request.influencer.id}")
1114
+
1115
+ # 1. Security Check: Model loaded hai ya nahi?
1116
+ if not _influencer_matcher:
1117
+ raise HTTPException(status_code=503, detail="Influencer Matcher model is not available.")
1118
+ if not request.campaigns:
1119
+ return RankCampaignsResponse(ranked_campaigns=[])
1120
+
1121
+ try:
1122
+ # 2. Data Preparation: Model ke liye DataFrame banayein
1123
+ # Model ko wahi columns chahiye jin par woh train hua tha.
1124
+ df_list = []
1125
+ for campaign in request.campaigns:
1126
+ df_list.append({
1127
+ 'influencer_category': request.influencer.category,
1128
+ 'influencer_bio': request.influencer.bio,
1129
+ 'campaign_description': campaign.description,
1130
+ # Hum woh columns bhi denge jo is context me nahi hain, par model ko chahiye
1131
+ 'followers': 50000, # Ek average value
1132
+ 'engagement_rate': 0.04, # Ek acchi value
1133
+ 'country': 'USA', # Ek default value
1134
+ 'niche': request.influencer.category or 'lifestyle'
1135
+ })
1136
+
1137
+ df_to_predict = pd.DataFrame(df_list)
1138
+
1139
+ # 3. 🔥 AI Prediction (The Missing Part) 🔥
1140
+ # Model se har campaign ke liye ek score predict karwayein
1141
+ print(f" - Predicting scores for {len(df_to_predict)} campaigns...")
1142
+ predicted_scores = _influencer_matcher.predict(df_to_predict)
1143
+
1144
+ # 4. Sorting & Ranking
1145
+ # Campaigns ko unke score ke saath combine karein
1146
+ results_with_scores = zip(request.campaigns, predicted_scores)
1147
+
1148
+ # Unhein score ke hisaab se sort karein (zyada score upar)
1149
+ sorted_results = sorted(results_with_scores, key=lambda x: x[1], reverse=True)
1150
+
1151
+ # 5. Final Jawab (Response) taiyaar karein
1152
+ output = [
1153
+ RankedCampaignResult(campaign_id=camp.id, score=float(score))
1154
+ for camp, score in sorted_results
1155
+ ]
1156
+
1157
+ print(f" - ✅ Successfully scored and ranked campaigns.")
1158
+ return RankCampaignsResponse(ranked_campaigns=output)
1159
+
1160
+ except Exception as e:
1161
+ print(f"🚨 An error occurred during campaign ranking:")
1162
+ traceback.print_exc()
1163
+ raise HTTPException(status_code=500, detail=str(e))
1164
+
1165
+ @app.post("/ai/assist/caption", response_model=CaptionAssistResponse, summary="Assists with writing or improving captions")
1166
+ async def caption_assistant_route(request: CaptionAssistRequest):
1167
+ """
1168
+ Takes a caption and performs an action (improve, suggest hashtags, etc.) using the LLM.
1169
+ """
1170
+ print(f"\n✅ Received request on /ai/assist/caption with action: {request.action}")
1171
+ if not _ai_strategist:
1172
+ raise HTTPException(status_code=503, detail="AI Strategist is not available.")
1173
+
1174
+ try:
1175
+ # _ai_strategist ke andar ek naya function banayenge
1176
+ generated_text = _ai_strategist.get_caption_assistance(
1177
+ caption=request.caption,
1178
+ action=request.action,
1179
+ guidelines=request.guidelines
1180
+ )
1181
+ return CaptionAssistResponse(new_text=generated_text)
1182
+
1183
+ except Exception as e:
1184
+ print(f"🚨 An error occurred in /ai/assist/caption endpoint:")
1185
+ traceback.print_exc()
1186
+ raise HTTPException(status_code=500, detail=str(e))
1187
+
1188
+
1189
+ @app.post("/predict/campaign-outcome", response_model=ForecastResponse, summary="Forecasts influencer performance and earnings for a campaign")
1190
+ async def predict_campaign_outcome(request: ForecastRequest):
1191
+ """
1192
+ Takes campaign and influencer stats and uses ML models to predict
1193
+ performance (reach, engagement) and potential earnings.
1194
+ """
1195
+ print(f"\n✅ Received request on /predict/campaign-outcome")
1196
+
1197
+ if not _performance_predictor or not _payout_forecaster:
1198
+ raise HTTPException(status_code=503, detail="Forecasting models are not available.")
1199
+
1200
+ try:
1201
+ # ✅ THE FIX IS HERE: Create a single 'budget' column.
1202
+ # Column names MUST match the training script's columns.
1203
+ input_data = pd.DataFrame([{
1204
+ 'budget': request.budget,
1205
+ 'category': request.category,
1206
+ 'influencer_count': 1,
1207
+ 'platform': 'instagram',
1208
+ 'location': 'USA',
1209
+ 'followers': request.follower_count,
1210
+ 'engagement_rate': request.engagement_rate
1211
+ }])
1212
+
1213
+ # --- Performance Prediction ---
1214
+ print(" - Predicting performance...")
1215
+ # ✅ THE FIX: Pass the columns the model ACTUALLY needs.
1216
+ performance_model_cols = ['budget', 'influencer_count', 'platform', 'location', 'category']
1217
+ reach_prediction = _performance_predictor.predict(input_data[performance_model_cols])[0]
1218
+ engagement_prediction = request.engagement_rate * 100
1219
+
1220
+ perf_forecast = PerformanceForecast(
1221
+ predicted_reach=int(reach_prediction),
1222
+ predicted_engagement_rate=round(engagement_prediction, 2)
1223
+ )
1224
+
1225
+ # --- Payout Prediction ---
1226
+ print(" - Predicting payout...")
1227
+ # This model only needs 'budget'
1228
+ payout_prediction = _payout_forecaster.predict(input_data[['budget']])[0]
1229
+
1230
+ payout_forecast = PayoutForecast(
1231
+ estimated_earning=max(0, float(payout_prediction))
1232
+ )
1233
+
1234
+ print(" - ✅ Successfully generated forecasts.")
1235
+ return ForecastResponse(performance=perf_forecast, payout=payout_forecast)
1236
+
1237
+ except Exception as e:
1238
+ print(f"🚨 An error occurred during outcome prediction:")
1239
+ traceback.print_exc()
1240
+ raise HTTPException(status_code=500, detail=str(e))
1241
+
1242
+ @app.post("/ai/summarize/influencer-analytics", response_model=InfluencerAnalyticsSummaryResponse, summary="Generates a summary for the influencer's analytics page")
1243
+ async def summarize_influencer_analytics(request: InfluencerKpiData):
1244
+ """
1245
+ Takes an influencer's KPIs and uses the AI strategist to create an actionable summary.
1246
+ """
1247
+ print(f"\n✅ Received request on /ai/summarize/influencer-analytics")
1248
+ if not _ai_strategist:
1249
+ raise HTTPException(status_code=503, detail="AI Strategist is not available.")
1250
+
1251
+ try:
1252
+ # Pass the data as a dictionary to the strategist
1253
+ summary_text = _ai_strategist.generate_influencer_analytics_summary(kpis=request.model_dump())
1254
+ return InfluencerAnalyticsSummaryResponse(summary=summary_text)
1255
+
1256
+ except Exception as e:
1257
+ print(f"🚨 An error occurred in the analytics summary endpoint:")
1258
+ traceback.print_exc()
1259
+ raise HTTPException(status_code=500, detail=str(e))
1260
+
1261
+
1262
+ @app.post("/portfolio/curate-with-ai", response_model=CuratePortfolioResponse)
1263
+ def curate_portfolio_with_ai(request: CuratePortfolioRequest):
1264
+ """
1265
+ Accepts a list of approved submissions, scores them based on simple logic,
1266
+ and returns the IDs of the best ones. THIS VERSION DOES NOT USE THE LLM.
1267
+ """
1268
+ print(f"\n✅✅✅ RUNNING FINAL, NON-LLM VERSION of Portfolio Curation ✅✅✅")
1269
+
1270
+ submissions = request.submissions
1271
+
1272
+ if not submissions:
1273
+ return CuratePortfolioResponse(featured_submission_ids=[])
1274
+
1275
+ scored_submissions = []
1276
+ for sub in submissions:
1277
+ # Step 1: Ek score calculate karein
1278
+ score = 0
1279
+ # Likes ke liye points (sabse zaroori)
1280
+ score += (sub.likes or 0) * 0.7
1281
+
1282
+ # Caption lamba hai to extra points
1283
+ if sub.caption and len(sub.caption) > 100:
1284
+ score += 100 # Ek boost
1285
+
1286
+ # Step 2: Har submission ko uske score ke saath save karein
1287
+ scored_submissions.append({'id': sub.id, 'score': score})
1288
+
1289
+ # Step 3: Sabhi submissions ko score ke hisaab se sort karein
1290
+ sorted_submissions = sorted(scored_submissions, key=lambda x: x['score'], reverse=True)
1291
+
1292
+ # Step 4: Sabse behtareen 5 submissions ko chunein (ya jitne bhi hain)
1293
+ top_submissions = sorted_submissions[:5]
1294
+
1295
+ # Step 5: Sirf unki ID waapis bhejein
1296
+ featured_ids = [sub['id'] for sub in top_submissions]
1297
+
1298
+ print(f" - ✅ Scored and selected {len(featured_ids)} posts: {featured_ids}")
1299
+ return CuratePortfolioResponse(featured_submission_ids=featured_ids)
1300
+
1301
+ @app.post("/tasks/prioritize", response_model=TaskPrioritizationResponse)
1302
+ def prioritize_task(request: TaskPrioritizationRequest):
1303
+ """
1304
+ Analyzes a task's title and description to assign a priority level.
1305
+ """
1306
+ if not _llm_instance:
1307
+ raise HTTPException(status_code=503, detail="LLM model is not available.")
1308
+
1309
+ prompt = f"""
1310
+ [INST] You are an expert assistant for a social media influencer. Your job is to assign a priority to a new task based on its title. Use these rules:
1311
+ - If the task mentions "revise", "rejection", "feedback", "contract", or is a deadline, the priority is "high".
1312
+ - If the task is about a "new invitation", "new opportunity", or "message", the priority is "medium".
1313
+ - For anything else like "update profile", "explore campaigns", the priority is "low".
1314
+
1315
+ Respond ONLY with one of the following words: high, medium, or low.
1316
+
1317
+ Task Title: "{request.title}"
1318
+ [/INST]
1319
+ """
1320
+
1321
+ try:
1322
+ print(f" - 🤖 Prioritizing task: '{request.title}'")
1323
+ output = _llm_instance(prompt, max_tokens=10, stop=["[INST]"], echo=False)
1324
+
1325
+ # LLM se aaye response ko saaf karein
1326
+ priority = output['choices'][0]['text'].strip().lower()
1327
+
1328
+ # Ek safety check, taaki LLM kuch galat na bhej de
1329
+ if priority not in ['high', 'medium', 'low']:
1330
+ print(f" - ⚠️ LLM returned invalid priority: '{priority}'. Defaulting to 'medium'.")
1331
+ priority = 'medium'
1332
+
1333
+ print(f" - ✅ AI assigned priority: '{priority}'")
1334
+ return TaskPrioritizationResponse(priority=priority)
1335
+
1336
+ except Exception as e:
1337
+ print(f" - ❌ An unexpected error occurred during task prioritization: {e}")
1338
+ return TaskPrioritizationResponse(priority='medium')
1339
+
1340
+
1341
+ @app.post("/predict/earning-opportunities", response_model=EarningOpportunityResponse, summary="Finds the best earning opportunities for an influencer")
1342
+ async def predict_earning_opportunities(request: EarningOpportunityRequest):
1343
+ """
1344
+ Based on an influencer's follower count, the AI model estimates their
1345
+ potential performance score across various campaign types. (SIMPLIFIED)
1346
+ """
1347
+ print(f"\n✅ Received request on /predict/earning-opportunities (SIMPLIFIED)")
1348
+ if _earnings_optimizer is None or _earnings_encoder is None:
1349
+ raise HTTPException(status_code=503, detail="Earning Optimizer model or encoder is not available.")
1350
+
1351
+ try:
1352
+ # === ✨ THE FIX STARTS HERE ✨ ===
1353
+ # Step 1: Create scenarios in a DataFrame
1354
+ scenarios_list = [
1355
+ {'campaign_niche': niche, 'content_format': c_format, 'follower_count': request.follower_count}
1356
+ for niche in ['Tech', 'Fashion', 'Food', 'Gaming', 'General']
1357
+ for c_format in ['Reel', 'Post', 'Story']
1358
+ ]
1359
+ df_scenarios = pd.DataFrame(scenarios_list)
1360
+
1361
+ # Step 2: Manually encode the categorical features using the saved encoder
1362
+ print(" - Manually encoding data using saved encoder...")
1363
+ categorical_features = ['campaign_niche', 'content_format']
1364
+ encoded_cats = _earnings_encoder.transform(df_scenarios[categorical_features])
1365
+ encoded_df = pd.DataFrame(encoded_cats, columns=_earnings_encoder.get_feature_names_out(categorical_features))
1366
+
1367
+ # Step 3: Combine with numerical features
1368
+ numerical_features = df_scenarios[['follower_count']].reset_index(drop=True)
1369
+ X_final_to_predict = pd.concat([encoded_df, numerical_features], axis=1)
1370
+
1371
+ # Step 4: Predict using the simple model
1372
+ print(f" - Predicting scores for {len(X_final_to_predict)} scenarios...")
1373
+ predicted_scores = _earnings_optimizer.predict(X_final_to_predict)
1374
+ # === ✨ THE FIX ENDS HERE ✨ ===
1375
+
1376
+ # ... (The result formatting part is exactly the same as before)
1377
+ results = []
1378
+ for i, scenario in enumerate(scenarios_list):
1379
+ score = float(predicted_scores[i])
1380
+ comment = "This could be a good opportunity."
1381
+ if score > 0.75: comment = "Excellent Opportunity! Focus on this."
1382
+ elif score < 0.4: comment = "May not be the best fit for you."
1383
+ results.append(Opportunity(
1384
+ campaign_niche=scenario['campaign_niche'],
1385
+ content_format=scenario['content_format'],
1386
+ estimated_score=score,
1387
+ commentary=comment
1388
+ ))
1389
+
1390
+ sorted_results = sorted(results, key=lambda x: x.estimated_score, reverse=True)
1391
+ return EarningOpportunityResponse(opportunities=sorted_results[:5])
1392
+
1393
+ except Exception as e:
1394
+ print("🚨 An error occurred in /predict/earning-opportunities endpoint:")
1395
+ traceback.print_exc()
1396
+ raise HTTPException(status_code=500, detail=str(e))
1397
+
1398
+ @app.post("/predict/post-performance", response_model=PostPerformanceResponse, summary="Predicts likes and comments for a new post")
1399
+ async def predict_post_performance(request: PostPerformanceRequest):
1400
+ """
1401
+ Takes details of a potential post and uses two ML models to predict the
1402
+ number of likes and comments it might receive.
1403
+ """
1404
+ print(f"\n✅ Received request on /predict/post-performance")
1405
+ if not _likes_predictor or not _comments_predictor:
1406
+ raise HTTPException(status_code=503, detail="Performance prediction models are not available.")
1407
+
1408
+ try:
1409
+ # Step 1: Prepare the input data in a DataFrame, just like during training
1410
+ input_data = pd.DataFrame([request.model_dump()])
1411
+
1412
+ # Step 2: Use the models to predict
1413
+ print(" - Predicting likes...")
1414
+ predicted_likes_raw = _likes_predictor.predict(input_data)[0]
1415
+
1416
+ print(" - Predicting comments...")
1417
+ predicted_comments_raw = _comments_predictor.predict(input_data)[0]
1418
+
1419
+ # Step 3: Clean the predictions (e.g., ensure they are not negative)
1420
+ predicted_likes = max(0, int(predicted_likes_raw))
1421
+ predicted_comments = max(0, int(predicted_comments_raw))
1422
+
1423
+ # Step 4: Generate simple, rule-based feedback
1424
+ feedback_messages = []
1425
+ if request.caption_length < 50:
1426
+ feedback_messages.append("Consider writing a slightly longer caption to increase engagement.")
1427
+ elif request.caption_length > 800:
1428
+ feedback_messages.append("This is a long caption! Ensure the first line is very engaging.")
1429
+ else:
1430
+ feedback_messages.append("The caption length is good for engagement.")
1431
+
1432
+ if request.campaign_niche == 'General':
1433
+ feedback_messages.append("Try to target a more specific niche in the future for better performance.")
1434
+
1435
+ feedback_text = " ".join(feedback_messages)
1436
+
1437
+ print(" - ✅ Successfully generated performance prediction and feedback.")
1438
+
1439
+ return PostPerformanceResponse(
1440
+ predicted_likes=predicted_likes,
1441
+ predicted_comments=predicted_comments,
1442
+ feedback=feedback_text
1443
+ )
1444
+
1445
+ except Exception as e:
1446
+ print(f"🚨 An error occurred in the post-performance endpoint:")
1447
+ traceback.print_exc()
1448
+ raise HTTPException(status_code=500, detail=str(e))
1449
+
1450
+
1451
+ @app.get("/analyze/performance-anomalies", response_model=List[AnomalyInsight], summary="Finds unusual performance trends for all influencers")
1452
+ def analyze_anomalies(supabase: Client = Depends(get_supabase_client)):
1453
+ # This endpoint is heavy, so it should have security (e.g., requires an admin API key)
1454
+ print("🤖 Running platform-wide Anomaly Detection...")
1455
+
1456
+ try:
1457
+ # 1. Fetch historical data for all influencers from our new stats table
1458
+ stats_res = supabase.table('daily_influencer_stats').select('*').order('date', desc=True).limit(5000).execute() # Get last ~5000 entries
1459
+ profiles_res = supabase.table('profiles').select('id, full_name').eq('role', 'influencer').execute()
1460
+
1461
+ if not stats_res.data: return []
1462
+
1463
+ all_stats_df = pd.DataFrame(stats_res.data)
1464
+ profiles_map = {p['id']: p['full_name'] for p in profiles_res.data}
1465
+
1466
+ all_insights = []
1467
+
1468
+ # 2. Loop through each influencer
1469
+ for influencer_id, group in all_stats_df.groupby('profile_id'):
1470
+ historical_df = group.sort_values('date')
1471
+ today_stats = historical_df.iloc[-1].to_dict()
1472
+
1473
+ # 3. Call the Anomaly Detector AI
1474
+ insights = find_anomalies(influencer_id, historical_df, today_stats)
1475
+
1476
+ if insights:
1477
+ all_insights.append(AnomalyInsight(
1478
+ influencer_id=influencer_id,
1479
+ influencer_name=profiles_map.get(influencer_id, 'Unknown Influencer'),
1480
+ insights=insights
1481
+ ))
1482
+
1483
+ return all_insights
1484
+
1485
+ except Exception as e:
1486
+ traceback.print_exc()
1487
+ raise HTTPException(status_code=500, detail=str(e))
1488
+
1489
+
1490
+ @app.post("/predict/revenue-forecast", response_model=RevenueForecastResponse, summary="Generates a 3-month revenue forecast")
1491
+ async def predict_revenue_forecast():
1492
+ """
1493
+ (FAST VERSION) Uses the trained Holt's model to forecast revenue and adds simple commentary.
1494
+ """
1495
+ print(f"\n✅ Received request on /predict/revenue-forecast (FAST VERSION)")
1496
+ if not _revenue_forecaster:
1497
+ raise HTTPException(status_code=503, detail="Revenue forecasting model is not available.")
1498
+
1499
+ try:
1500
+ # Step 1: Generate forecast (This is fast)
1501
+ forecast_result = _revenue_forecaster.forecast(steps=3)
1502
+
1503
+ # Step 2: Format the output and add trend analysis (Also fast)
1504
+ forecast_datapoints = []
1505
+ last_historical_value = _revenue_forecaster.model.endog[-1]
1506
+
1507
+ for timestamp, predicted_value in forecast_result.items():
1508
+ trend_label = "Stable"
1509
+ percentage_change = ((predicted_value - last_historical_value) / last_historical_value) * 100
1510
+ if percentage_change > 15: trend_label = "Strong Growth"
1511
+ elif percentage_change > 5: trend_label = "Modest Growth"
1512
+ elif percentage_change < -10: trend_label = "Potential Downturn"
1513
+
1514
+ forecast_datapoints.append(RevenueForecastDatapoint(
1515
+ month=timestamp.strftime('%B %Y'),
1516
+ predicted_revenue=round(predicted_value, 2),
1517
+ trend=trend_label
1518
+ ))
1519
+ last_historical_value = predicted_value
1520
+
1521
+ # Step 3: Use simple, rule-based commentary (This is instant)
1522
+ first_trend = forecast_datapoints[0].trend if forecast_datapoints else "Stable"
1523
+ ai_commentary = "AI Insight: The forecast shows a stable outlook for the coming quarter."
1524
+ if "Growth" in first_trend:
1525
+ ai_commentary = "AI Insight: The model predicts a positive growth trend for the next quarter."
1526
+ elif "Downturn" in first_trend:
1527
+ ai_commentary = "AI Insight: A potential slowdown is predicted. It's a good time to review upcoming campaigns."
1528
+
1529
+ print(" - ✅ Successfully generated revenue forecast (fast method).")
1530
+
1531
+ return RevenueForecastResponse(
1532
+ forecast=forecast_datapoints,
1533
+ ai_commentary=ai_commentary
1534
+ )
1535
+
1536
+ except Exception as e:
1537
+ print(f"🚨 An error occurred in the revenue forecast endpoint:")
1538
+ traceback.print_exc()
1539
+ raise HTTPException(status_code=500, detail=str(e))
1540
+
1541
+
1542
+ @app.post("/predict/influencer-performance", response_model=InfluencerPerformanceResponse, summary="Predicts a holistic performance score for an influencer")
1543
+ async def predict_influencer_performance(stats: InfluencerPerformanceStats):
1544
+ """
1545
+ Takes an influencer's key performance metrics and returns a single,
1546
+ AI-generated performance score from 0-100.
1547
+ """
1548
+ print(f"\n✅ Received request on /predict/influencer-performance")
1549
+ if not _performance_scorer:
1550
+ raise HTTPException(status_code=503, detail="The Performance Scorer model is not available. Please train it first.")
1551
+
1552
+ try:
1553
+ # Input data ko DataFrame mein convert karein, jaisa model ko chahiye
1554
+ input_data = pd.DataFrame([stats.model_dump()])
1555
+
1556
+ # Model se prediction karein
1557
+ score = _performance_scorer.predict(input_data)
1558
+
1559
+ # Score ko saaf karke 0-100 ke beech rakhein
1560
+ predicted_score = max(0, min(100, int(score[0])))
1561
+
1562
+ print(f" - ✅ Successfully predicted performance score: {predicted_score}")
1563
+ return {"performance_score": predicted_score}
1564
+
1565
+ except Exception as e:
1566
+ print(f"🚨 An error occurred in the influencer performance endpoint:")
1567
+ traceback.print_exc()
1568
+ raise HTTPException(status_code=500, detail=str(e))
1569
+
1570
+
1571
+ @app.post("/v1/match/rank-by-similarity", response_model=RankBySimilarityResponse, summary="Generic endpoint to rank documents by text similarity")
1572
+ async def rank_by_similarity_endpoint(request: RankBySimilarityRequest):
1573
+ print(f"\n✅ Received request on /v1/match/rank-by-similarity")
1574
+ try:
1575
+ documents_list = [doc.model_dump(exclude_unset=True) for doc in request.documents]
1576
+ ranked_docs = rank_documents_by_similarity(query=request.query, documents=documents_list)
1577
+ print(f" - ✅ Successfully ranked {len(ranked_docs)} documents.")
1578
+ return RankBySimilarityResponse(ranked_documents=ranked_docs)
1579
+ except Exception as e:
1580
+ print(f"🚨 An error occurred in the ranking endpoint:")
1581
+ import traceback
1582
+ traceback.print_exc()
1583
+ raise HTTPException(status_code=500, detail=str(e))
1584
+
1585
+
1586
+ @app.post("/analyze/content-quality", response_model=ContentQualityResponse, summary="Analyzes a caption for a quality score")
1587
+ def analyze_content_quality(request: ContentQualityRequest):
1588
+ """
1589
+ Uses the loaded LLM to analyze a social media caption and returns a robustly parsed response.
1590
+ """
1591
+ print(f"\n✅ Received request on /analyze/content-quality")
1592
+ if not _llm_instance:
1593
+ raise HTTPException(status_code=503, detail="The Llama model is not available.")
1594
+
1595
+ caption = request.caption
1596
+
1597
+ prompt = f"""
1598
+ [SYSTEM]
1599
+ You are a social media expert. Analyze the following caption... Respond ONLY with a valid JSON object in the following format:
1600
+ {{
1601
+ "overall_score": <float>,
1602
+ "scores": {{ "readability": <int>, "engagement": <int>, "call_to_action": <int>, "hashtag_strategy": <int> }},
1603
+ "feedback": "<string>"
1604
+ }}
1605
+
1606
+ [CAPTION TO ANALYZE]
1607
+ "{caption}"
1608
+
1609
+ [YOUR JSON RESPONSE]
1610
+ """
1611
+
1612
+ try:
1613
+ print("--- Sending caption to LLM for quality analysis...")
1614
+ response = _llm_instance(prompt, max_tokens=512, temperature=0.2, stop=["[SYSTEM]", "\n\n"], echo=False)
1615
+
1616
+ json_text = response['choices'][0]['text'].strip()
1617
+ start_index = json_text.find('{')
1618
+ end_index = json_text.rfind('}') + 1
1619
+ if start_index == -1 or end_index == 0:
1620
+ raise ValueError("LLM did not return a valid JSON object.")
1621
+
1622
+ clean_json_text = json_text[start_index:end_index]
1623
+
1624
+ import json
1625
+ analysis_result_raw = json.loads(clean_json_text)
1626
+
1627
+ final_result = {
1628
+ "overall_score": analysis_result_raw.get("overall_score"),
1629
+ "feedback": analysis_result_raw.get("feedback"),
1630
+ "scores": analysis_result_raw.get("scores") or analysis_result_raw.get("score")
1631
+ }
1632
+
1633
+ print("--- Successfully received and parsed JSON response from LLM.")
1634
+ return ContentQualityResponse(**final_result)
1635
+
1636
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
1637
+ print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: {json_text}")
1638
+ raise HTTPException(status_code=500, detail="Failed to parse analysis from AI model.")
1639
+ except Exception as e:
1640
+ print(f"🚨 An unexpected error occurred during content analysis:")
1641
+ import traceback
1642
+ traceback.print_exc()
1643
+ raise HTTPException(status_code=500, detail=str(e))
1644
+
1645
+
1646
+ @app.post("/generate/daily-briefing", response_model=DailyBriefingResponse, summary="Generates a daily action plan for the Talent Manager")
1647
+ def generate_daily_briefing(data: DailyBriefingData):
1648
+ """
1649
+ Takes various KPIs from the backend, synthesizes them, and uses the LLM
1650
+ to generate a short, actionable daily briefing for a Talent Manager.
1651
+ """
1652
+ print(f"\n✅ Received request on /generate/daily-briefing")
1653
+ if not _llm_instance:
1654
+ raise HTTPException(status_code=503, detail="The Llama model is not available for briefing.")
1655
+
1656
+ # --- ✨ THE FINAL "IDIOT-PROOF" PROMPT FOR TINYLLAMA ---
1657
+ final_prompt = f"""
1658
+ Summarize these key points into 2-3 direct bullet points for a manager.
1659
+
1660
+ DATA:
1661
+ - Influencers without campaigns: {data.on_bench_influencers}
1662
+ - Submissions needing review: {data.pending_submissions + data.revisions_requested}
1663
+ - Total pending money: {data.highest_pending_payout:,.0f} INR
1664
+
1665
+ SUMMARY:
1666
+ - """
1667
+
1668
+ try:
1669
+ print("--- Sending briefing data to LLM (Idiot-Proof prompt)...")
1670
+ # Temperature 0.1 karne se model aur zyada factual aur kam creative hoga
1671
+ response = _llm_instance(final_prompt, max_tokens=150, temperature=0.1, stop=["DATA:"], echo=False)
1672
+
1673
+ briefing_text = response['choices'][0]['text'].strip()
1674
+
1675
+ # Add our own header to make it look nice
1676
+ final_briefing = f"Here are your top priorities for today:\n- {briefing_text}"
1677
+
1678
+ print("--- Successfully generated daily briefing.")
1679
+ return DailyBriefingResponse(briefing_text=final_briefing)
1680
+
1681
+ except Exception as e:
1682
+ print(f"🚨 An unexpected error occurred during briefing generation:")
1683
+ import traceback
1684
+ traceback.print_exc()
1685
+ raise HTTPException(status_code=500, detail="Failed to generate AI briefing.")
1686
+
1687
+
1688
+ @app.post("/summarize-contract", response_model=ContractSummary, summary="Analyzes a PDF contract and extracts key terms")
1689
+ def summarize_contract(request: ContractURL):
1690
+ print(f"\n✅ Received request on /summarize-contract (v3 - ROBUST)")
1691
+ if not _llm_instance:
1692
+ raise HTTPException(status_code=503, detail="The Llama model is not available.")
1693
+
1694
+ try:
1695
+
1696
+ print(" - 📑 Parsing PDF from URL...")
1697
+ contract_text = parse_pdf_from_url(request.pdf_url)
1698
+ contract_text = contract_text[:4000] # Truncate
1699
+ print(f" - ✅ PDF parsed successfully. Truncated to {len(contract_text)} chars.")
1700
+
1701
+ final_prompt = f"""
1702
+ [INST]
1703
+ You are a legal analysis AI. Your task is to extract specific details from a contract. You MUST respond ONLY with a single, valid JSON object. Do not add any text before or after the JSON.
1704
+
1705
+ **RULES FOR THE JSON VALUES:**
1706
+ 1. All values for "payment_details", "deliverables", "deadlines", "exclusivity", and "ownership" MUST be a single, plain string.
1707
+ 2. The value for "summary_points" MUST be a simple list of strings.
1708
+ 3. DO NOT use nested objects. DO NOT use nested lists. Summarize the content into plain text.
1709
+
1710
+ [EXAMPLE of a GOOD RESPONSE]
1711
+ {{
1712
+ "payment_details": "Client agrees to pay Influencer a total fee of $5,000 USD, payable in two installments.",
1713
+ "deliverables": "Influencer must create 2 Instagram Reels and 5 Instagram Stories.",
1714
+ "deadlines": "The deadline for all deliverables is October 30, 2024.",
1715
+ "exclusivity": "Influencer agrees to an exclusivity period of 30 days post-campaign.",
1716
+ "ownership": "The Client retains ownership of all created content.",
1717
+ "summary_points": [
1718
+ "Total payment is $5,000 USD.",
1719
+ "Deliverables: 2 Reels, 5 Stories.",
1720
+ "A 30-day exclusivity period applies after the campaign."
1721
+ ]
1722
+ }}
1723
+ [/EXAMPLE]
1724
+
1725
+ Now, based on these strict rules, analyze the following text:
1726
+
1727
+ [CONTRACT TEXT]
1728
+ {contract_text}
1729
+ [/CONTRACT TEXT]
1730
+
1731
+ [YOUR JSON RESPONSE]
1732
+ """
1733
+
1734
+ print(" - 📞 Calling LLM with the new, stricter prompt...")
1735
+ response = _llm_instance(
1736
+ final_prompt,
1737
+ max_tokens=1024,
1738
+ temperature=0.0, # Set to 0 for maximum factuality
1739
+ echo=False
1740
+ )
1741
+
1742
+ raw_response_text = response['choices'][0]['text'].strip()
1743
+
1744
+ print(" - ⚙️ Parsing JSON response from LLM...")
1745
+ try:
1746
+ start_index = raw_response_text.find('{')
1747
+ end_index = raw_response_text.rfind('}') + 1
1748
+ clean_json_text = raw_response_text[start_index:end_index]
1749
+ summary_data = json.loads(clean_json_text)
1750
+
1751
+ except Exception as e:
1752
+ print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: '{raw_response_text}'")
1753
+ raise HTTPException(status_code=500, detail="Failed to parse analysis from the AI model.")
1754
+
1755
+ print("--- ✅ Successfully generated contract summary from LLM.")
1756
+
1757
+ # We now return the raw dictionary. FastAPI will validate it against our simple ContractSummary model.
1758
+ return summary_data
1759
+
1760
+ except Exception as e:
1761
+ traceback.print_exc()
1762
+ raise HTTPException(status_code=500, detail="An internal server error occurred in the AI.")
1763
+
1764
+
1765
+ @app.post("/predict/influencer-performance-score", response_model=InfluencerPerformanceResponse, summary="Predicts a holistic performance score for an influencer")
1766
+ async def predict_influencer_performance_score(stats: InfluencerPerformanceStats):
1767
+ """
1768
+ Backend se influencer ki stats leta hai aur pre-trained model ka use karke
1769
+ ek performance score (0-100) return karta hai.
1770
+ """
1771
+ print(f"\n✅ Received request on /predict/influencer-performance-score")
1772
+
1773
+ # Safety Check: Kya model load hua tha startup par?
1774
+ if _performance_scorer is None:
1775
+ print(" - ❌ ERROR: The Performance Scorer model (_performance_scorer) is not loaded.")
1776
+ raise HTTPException(
1777
+ status_code=503,
1778
+ detail="The Performance Scorer model is not available. Please ensure 'performance_scorer_v1.joblib' exists and is loaded."
1779
+ )
1780
+
1781
+ try:
1782
+ # Step 1: Backend se aaye data ko Pandas DataFrame mein badlo.
1783
+ # Column ke naam training ke waqt use hue naamo se bilkul match hone chahiye.
1784
+ input_data = pd.DataFrame([stats.model_dump()])
1785
+ print(f" - Input data for model: \n{input_data}")
1786
+
1787
+ # Step 2: Loaded model se prediction karo.
1788
+ predicted_score_raw = _performance_scorer.predict(input_data)
1789
+
1790
+ # Step 3: Jawab ko saaf-suthra karo.
1791
+ # Score ko integer banao aur 0 se 100 ke beech rakho.
1792
+ predicted_score = max(0, min(100, int(predicted_score_raw[0])))
1793
+
1794
+ print(f" - ✅ Successfully predicted performance score: {predicted_score}")
1795
+
1796
+ # Step 4: Sahi format mein jawab wapas bhejo.
1797
+ return InfluencerPerformanceResponse(performance_score=predicted_score)
1798
+
1799
+ except Exception as e:
1800
+ print(f"🚨 An error occurred in the /predict/influencer-performance-score endpoint:")
1801
+ traceback.print_exc()
1802
+ raise HTTPException(status_code=500, detail=str(e))
1803
+
1804
+
1805
+ @app.post("/ai/coach/generate-growth-plan", response_model=AIGrowthPlanResponse, summary="Generates personalized growth tips for a single influencer")
1806
+ def generate_growth_plan_route(request: AIGrowthPlanRequest):
1807
+ """
1808
+ Backend se ek influencer ka live performance data leta hai aur LLM ka use karke
1809
+ personalized improvement tips generate karta hai.
1810
+ """
1811
+ print(f"\n✅ Received request on /ai/coach/generate-growth-plan for: {request.fullName}")
1812
+ if not _ai_strategist:
1813
+ raise HTTPException(status_code=503, detail="AI Strategist is not available.")
1814
+
1815
+ try:
1816
+ # Pydantic model ko dictionary mein convert karke strategist ko bhejein
1817
+ insights_list = _ai_strategist.generate_influencer_growth_plan(request.model_dump())
1818
+
1819
+ return AIGrowthPlanResponse(insights=insights_list)
1820
+
1821
+ except Exception as e:
1822
+ print(f"🚨 An error occurred in the Growth Plan endpoint: {e}")
1823
+ traceback.print_exc()
1824
+ raise HTTPException(status_code=500, detail=str(e))
1825
+
1826
+
1827
+ @app.post("/analyze/brand-asset-colors", response_model=BrandAssetAnalysisResponse, summary="Extracts dominant colors from a logo URL")
1828
+ def analyze_brand_asset_colors(request: BrandAssetAnalysisRequest):
1829
+ """
1830
+ Takes an image URL (logo/product), downloads it in memory,
1831
+ and uses AI (KMeans Clustering) to extract the main brand colors.
1832
+ """
1833
+ print(f"\n✅ Received request on /analyze/brand-asset-colors")
1834
+ try:
1835
+ # Utility function call
1836
+ colors = extract_colors_from_url(request.file_url)
1837
+
1838
+ print(f" - ✅ Extracted colors: {colors}")
1839
+ return BrandAssetAnalysisResponse(dominant_colors=colors)
1840
+
1841
+ except Exception as e:
1842
+ print(f"🚨 An error occurred during color extraction:")
1843
+ traceback.print_exc()
1844
+ # Fail gracefully
1845
+ return BrandAssetAnalysisResponse(dominant_colors=["#000000"])
1846
+
1847
+
1848
+ @app.post("/generate/service-blueprint", response_model=ServiceBlueprintResponse, summary="Generates an AI project plan for a service")
1849
+ async def generate_service_blueprint_route(request: ServiceBlueprintRequest):
1850
+ """
1851
+ Takes a service type and user requirements, then uses the AI Strategist
1852
+ to generate a structured project plan (blueprint).
1853
+ """
1854
+ print(f"\n✅ Received request on /generate/service-blueprint for type: {request.service_type}")
1855
+ if not _ai_strategist:
1856
+ raise HTTPException(status_code=503, detail="AI Strategist is not available.")
1857
+
1858
+ try:
1859
+ # Call the new method in our strategist
1860
+ blueprint_data = _ai_strategist.generate_service_blueprint(
1861
+ service_type=request.service_type,
1862
+ requirements=request.requirements
1863
+ )
1864
+
1865
+ # Check if the AI returned an error internally
1866
+ if "error" in blueprint_data:
1867
+ raise HTTPException(status_code=500, detail=blueprint_data["error"])
1868
+
1869
+ return ServiceBlueprintResponse(**blueprint_data)
1870
+
1871
+ except HTTPException as http_exc:
1872
+ # Re-raise known HTTP exceptions
1873
+ raise http_exc
1874
+ except Exception as e:
1875
+ print(f"🚨 An unexpected error occurred in the blueprint endpoint:")
1876
+ traceback.print_exc()
1877
+ raise HTTPException(status_code=500, detail="An internal server error occurred while generating the blueprint.")
1878
+
1879
+
1880
+ @app.post("/generate/growth-plan", response_model=ServiceBlueprintResponse, summary="Generates an AI management plan for an influencer")
1881
+ async def generate_growth_plan_route(request: GrowthPlanRequest):
1882
+ """
1883
+ Takes influencer goals and uses the AI Strategist to generate a growth plan.
1884
+ """
1885
+ print(f"\n✅ Naya Endpoint Hit: /generate/growth-plan for handle: {request.platform_handle}")
1886
+ if not _ai_strategist:
1887
+ raise HTTPException(status_code=503, detail="AI Strategist is not available.")
1888
+
1889
+ try:
1890
+ # Naye, alag function ko call karo
1891
+ blueprint_data = _ai_strategist.generate_growth_plan(
1892
+ platform_handle=request.platform_handle,
1893
+ goals=request.goals,
1894
+ challenges=request.challenges
1895
+ )
1896
+
1897
+ if "error" in blueprint_data:
1898
+ raise HTTPException(status_code=500, detail=blueprint_data["error"])
1899
+
1900
+ return ServiceBlueprintResponse(**blueprint_data)
1901
+
1902
+ except HTTPException as http_exc:
1903
+ raise http_exc
1904
+ except Exception as e:
1905
+ print(f"🚨 Unexpected error in growth plan endpoint: {e}")
1906
+ traceback.print_exc()
1907
+ raise HTTPException(status_code=500, detail="An internal server error occurred.")
1908
+
1909
+
1910
+ @app.post("/submit_summary_job")
1911
+ def submit_summary_job(request: AISummaryJobRequest, background_tasks: BackgroundTasks):
1912
+ """
1913
+ Accepts a job, responds INSTANTLY, and runs the AI in the background.
1914
+ """
1915
+ print(f" - ✅ Job accepted for check-in ID: {request.checkin_id}. Starting in background...")
1916
+ background_tasks.add_task(process_summary_in_background, request.checkin_id, request.raw_text)
1917
+ return {"message": "Job accepted", "checkin_id": request.checkin_id}
1918
+
1919
+
1920
+ @app.post("/generate/weekly-plan", response_model=WeeklyPlanResponse, summary="Generates 3 content tasks for an influencer")
1921
+ def generate_weekly_plan_route(request: WeeklyPlanRequest): # <--- async hata diya
1922
+ """
1923
+ Takes influencer context (mood, niche, trends) and generates 3 tailored content options.
1924
+ """
1925
+ print(f"\n✅ Received request on /generate/weekly-plan")
1926
+ if not _ai_strategist:
1927
+ raise HTTPException(status_code=503, detail="AI Strategist is not available.")
1928
+
1929
+ try:
1930
+ # Convert Pydantic model to dict
1931
+ context_dict = request.context.model_dump()
1932
+
1933
+ # Call Strategist (Ab ye thread pool mein chalega)
1934
+ plan_data = _ai_strategist.generate_weekly_content_plan(context_dict)
1935
+
1936
+ return WeeklyPlanResponse(**plan_data)
1937
+
1938
+ except Exception as e:
1939
+ print(f"🚨 Error in weekly plan endpoint: {e}")
1940
+ traceback.print_exc()
1941
+ raise HTTPException(status_code=500, detail=str(e))
1942
+
1943
+
1944
+ @app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
1945
+ def creative_chat_endpoint(request: CreativeChatRequest):
1946
+ """
1947
+ Takes user message, history, and task context.
1948
+ Returns a short, punchy, expert response using RAG + LLM.
1949
+ """
1950
+ try:
1951
+ # Convert Pydantic history to list of dicts
1952
+ history_list = [m.model_dump() for m in request.history]
1953
+
1954
+ response_text = director.chat(
1955
+ user_message=request.message,
1956
+ history=history_list,
1957
+ task_context=request.task_context
1958
+ )
1959
+ return {"reply": response_text}
1960
+ except Exception as e:
1961
+ print(f"🚨 Creative Chat Error: {e}")
1962
+ raise HTTPException(status_code=500, detail="AI Director is busy.")
1963
+
1964
+
1965
+ @app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
1966
+ def finalize_script_endpoint(request: FinalizeScriptRequest):
1967
+ """
1968
+ Summarizes the conversation into a shoot plan.
1969
+ This version is ROBUST and handles messy LLM output.
1970
+ """
1971
+ try:
1972
+ history_list = [m.model_dump() for m in request.history]
1973
+
1974
+ raw_text = director.generate_final_plan(
1975
+ task_context=request.task_context,
1976
+ history=history_list
1977
+ )
1978
+
1979
+ print(f" - 🤖 Raw Final Plan from LLM:\n---\n{raw_text}\n---")
1980
+
1981
+ # --- IDIOT-PROOF PARSING LOGIC ---
1982
+ plan = { "hook": "", "script": "", "visuals": [], "tools": [] }
1983
+
1984
+ # Try to find JSON first
1985
+ try:
1986
+ import json
1987
+ json_match = re.search(r'\{.*\}', raw_text, re.DOTALL)
1988
+ if json_match:
1989
+ parsed = json.loads(json_match.group(0))
1990
+ plan["hook"] = parsed.get("hook", "")
1991
+ plan["script"] = parsed.get("script", "")
1992
+ plan["visuals"] = parsed.get("visuals", [])
1993
+ plan["tools"] = parsed.get("tools", [])
1994
+
1995
+ # Agar ek bhi cheez mil gayi to return kar do
1996
+ if plan["hook"] or plan["script"]:
1997
+ return FinalScriptResponse(**plan)
1998
+ except:
1999
+ pass # JSON parsing fail hua to aage badho
2000
+
2001
+ # Fallback to Regex if no JSON found
2002
+ hook_match = re.search(r"Hook:?\s*\"(.*?)\"", raw_text, re.IGNORECASE)
2003
+ script_match = re.search(r"Script:?\s*\"(.*?)\"", raw_text, re.IGNORECASE)
2004
+
2005
+ plan["hook"] = hook_match.group(1) if hook_match else "Start with a bang!"
2006
+
2007
+ # Agar script nahi mili to poora raw text hi script maan lo
2008
+ plan["script"] = script_match.group(1) if script_match else raw_text
2009
+
2010
+ plan["visuals"] = ["Close up shot", "Wide shot"]
2011
+ plan["tools"] = ["CapCut"]
2012
+
2013
+ return FinalScriptResponse(**plan)
2014
+
2015
+ except Exception as e:
2016
+ print(f"🚨 Finalize Script Error: {e}")
2017
+ raise HTTPException(status_code=500, detail="Failed to generate final plan.")
core/__init__.py ADDED
File without changes
core/anomaly_detector.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: ai-service/core/anomaly_detector.py (NEW FILE)
2
+
3
+ import pandas as pd
4
+ from statsmodels.tsa.seasonal import seasonal_decompose
5
+
6
+ def find_anomalies(influencer_id: str, historical_data: pd.DataFrame, today_stats: dict) -> list[str]:
7
+ insights = []
8
+ df = historical_data.copy()
9
+
10
+ if len(df) < 30: # Need at least 30 days of data for meaningful analysis
11
+ return ["Not enough historical data to analyze trends yet."]
12
+
13
+ df.set_index('date', inplace=True)
14
+
15
+ # Calculate 90-day averages
16
+ avg_engagement_90d = df['avg_engagement_rate'].tail(90).mean()
17
+ today_engagement = today_stats.get('avg_engagement_rate', 0)
18
+
19
+ # Anomaly 1: Performance Spikes/Dips
20
+ percentage_change = ((today_engagement - avg_engagement_90d) / avg_engagement_90d) * 100
21
+ if percentage_change > 100:
22
+ insights.append(f"🚀 High Performer Alert: Engagement rate spiked to {today_engagement:.2f}%, which is {percentage_change:.0f}% above the 90-day average. A recent post may be going viral.")
23
+ elif percentage_change < -50:
24
+ insights.append(f"⚠️ Performance Dip Alert: Engagement has dropped by {abs(percentage_change):.0f}%. It's worth checking in with this influencer.")
25
+
26
+ # Anomaly 2: Follower Growth
27
+ follower_change = today_stats.get('follower_count', 0) - df['follower_count'].tail(7).iloc[0]
28
+ if follower_change > 5000: # Example threshold
29
+ insights.append(f"📈 Follower Growth Spike: Gained {follower_change} followers this week. This is unusually high.")
30
+
31
+ return insights
core/creative_chat.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from llama_cpp import Llama
4
+ import json
5
+ import re
6
+
7
+ # Path setup to import VectorStore from the parent directory
8
+ current_dir = os.path.dirname(os.path.abspath(__file__))
9
+ parent_dir = os.path.dirname(current_dir)
10
+ sys.path.append(parent_dir)
11
+
12
+ from core.rag.store import VectorStore
13
+
14
+ class CreativeDirector:
15
+ def __init__(self):
16
+ """Initialize Model and Memory once to save time."""
17
+
18
+ # Using TinyLlama as it is faster on CPU
19
+ model_path = os.path.join(parent_dir, "llm_model", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
20
+
21
+ if not os.path.exists(model_path):
22
+ raise FileNotFoundError(f"❌ Model not found at: {model_path}. Please check the llm_model folder.")
23
+
24
+ print("🧠 Loading AI Director (TinyLlama - SUPER FAST MODE)...")
25
+
26
+ self.llm = Llama(
27
+ model_path=model_path,
28
+ n_ctx=512,
29
+ n_batch=32,
30
+ n_threads=4,
31
+ verbose=False
32
+ )
33
+ self.memory = VectorStore(collection_name="creative_mind")
34
+ print("✅ AI Director is Online.")
35
+
36
+ def chat(self, user_message: str, history: list, task_context: str):
37
+ """Main Chat Logic with RAG, optimized for speed."""
38
+
39
+ print(f" - 🧠 Thinking...")
40
+
41
+ retrieved_docs = self.memory.search(user_message, n_results=1)
42
+ expert_knowledge = retrieved_docs[0][:150] if retrieved_docs else "Be creative and direct."
43
+
44
+ prompt = f"""Instruction: Act as a Viral Content Expert. Give 1 short tip for "{task_context}".
45
+ Context: {expert_knowledge}
46
+ User: {user_message}
47
+ Response:"""
48
+
49
+ try:
50
+ response = self.llm(
51
+ prompt,
52
+ max_tokens=50,
53
+ stop=["Instruction:", "User:", "\n\n"],
54
+ temperature=0.7,
55
+ echo=False
56
+ )
57
+
58
+ reply = response['choices'][0]['text'].strip()
59
+ if not reply:
60
+ return "Try showing a 'before vs after' comparison. It always works!"
61
+
62
+ print(f" - 🗣️ Reply: {reply}")
63
+ return reply
64
+
65
+ except Exception as e:
66
+ print(f" - ❌ AI Chat Error: {e}")
67
+ return "My AI brain is a bit slow today. Please ask again!"
68
+
69
+ def generate_final_plan(self, task_context: str, history: list):
70
+ """Generates the final script using simple text fallback."""
71
+ print(f" - 🎬 Generating final plan for: {task_context}")
72
+
73
+ conversation_summary = "\n".join([f"- {msg['content']}" for msg in history[-3:]])
74
+
75
+ # ✅ PROMPT: Ask for plain text with specific labels
76
+ prompt = f"""Instruction: Create a video script for "{task_context}".
77
+ Chat Summary: {conversation_summary}
78
+
79
+ Format your answer exactly like this:
80
+ HOOK: (Write hook here)
81
+ SCRIPT: (Write script here)
82
+ VISUALS: (Write visuals here)
83
+ TOOLS: (Write tools here)
84
+
85
+ Response:"""
86
+
87
+ try:
88
+ response = self.llm(
89
+ prompt,
90
+ max_tokens=300,
91
+ stop=["Instruction:", "Response:"],
92
+ temperature=0.6,
93
+ echo=False
94
+ )
95
+
96
+ raw_text = response['choices'][0]['text'].strip()
97
+ print(f" - 🤖 Raw Text: {raw_text}")
98
+
99
+ # ✅ ROBUST PARSING (Regex)
100
+ # Text me se 'HOOK:', 'SCRIPT:' dhund kar nikalenge
101
+ hook_match = re.search(r'HOOK:\s*(.*?)(?=\nSCRIPT:)', raw_text, re.DOTALL | re.IGNORECASE)
102
+ script_match = re.search(r'SCRIPT:\s*(.*?)(?=\nVISUALS:)', raw_text, re.DOTALL | re.IGNORECASE)
103
+ visuals_match = re.search(r'VISUALS:\s*(.*?)(?=\nTOOLS:)', raw_text, re.DOTALL | re.IGNORECASE)
104
+ tools_match = re.search(r'TOOLS:\s*(.*)', raw_text, re.DOTALL | re.IGNORECASE)
105
+
106
+ # Agar match na mile to poora text script me daal do
107
+ return {
108
+ "hook": hook_match.group(1).strip() if hook_match else "Start with a bang!",
109
+ "script": script_match.group(1).strip() if script_match else raw_text,
110
+ "visuals": [visuals_match.group(1).strip()] if visuals_match else ["Talking Head"],
111
+ "tools": [tools_match.group(1).strip()] if tools_match else ["CapCut"]
112
+ }
113
+
114
+ except Exception as e:
115
+ print(f" - ❌ Final Plan Generation Error: {e}")
116
+ # Fallback JSON
117
+ return {
118
+ "hook": "Error generating plan.",
119
+ "script": "Please try again later.",
120
+ "visuals": [],
121
+ "tools": []
122
+ }
123
+
124
+ # Create a single instance to be used by the API
125
+ director = CreativeDirector()
core/document_parser.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: ai-service/core/document_parser.py
2
+
3
+ import fitz # PyMuPDF library
4
+ import requests
5
+ import io
6
+
7
+ def parse_pdf_from_url(pdf_url: str) -> str:
8
+ """
9
+ Downloads a PDF from a URL, extracts all text, and returns it as a single string.
10
+ """
11
+ print(f" - 📑 Downloading and parsing PDF from URL...")
12
+ try:
13
+ # Step 1: Download the PDF content from the URL
14
+ response = requests.get(pdf_url, timeout=30)
15
+ response.raise_for_status() # Raise an exception for bad status codes
16
+
17
+ pdf_bytes = response.content
18
+
19
+ # Step 2: Open the PDF from memory using PyMuPDF
20
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
21
+
22
+ full_text = ""
23
+ # Step 3: Iterate through each page and extract text
24
+ for page_num in range(len(doc)):
25
+ page = doc.load_page(page_num)
26
+ full_text += page.get_text("text") + "\n\n"
27
+
28
+ doc.close()
29
+
30
+ print(f" - ✅ PDF parsed successfully. Total characters: {len(full_text)}")
31
+ return full_text
32
+
33
+ except requests.exceptions.RequestException as e:
34
+ print(f" - ❌ FAILED to download PDF: {e}")
35
+ raise ConnectionError(f"Could not download the file from the provided URL: {pdf_url}") from e
36
+ except Exception as e:
37
+ print(f" - ❌ FAILED to parse PDF: {e}")
38
+ raise ValueError("The provided file could not be parsed as a valid PDF.") from e
core/guardrails/safety.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class SafetyGuard:
2
+ # Is list ko baad mein badha sakte ho
3
+ BLACKLIST_WORDS = {
4
+ "ignore instructions", "system prompt", "password", "secret key",
5
+ "hack", "bypass"
6
+ }
7
+
8
+ @staticmethod
9
+ def validate_input(text: str) -> bool:
10
+ """Input clean hai ya nahi check karna."""
11
+ if not text or len(text.strip()) == 0:
12
+ return False
13
+
14
+ lower_text = text.lower()
15
+ for word in SafetyGuard.BLACKLIST_WORDS:
16
+ if word in lower_text:
17
+ return False # Unsafe found
18
+ return True
19
+
20
+ @staticmethod
21
+ def sanitize(text: str) -> str:
22
+ """Extra spaces clean karna."""
23
+ return " ".join(text.split())
core/inference/cache.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cachetools import TTLCache
2
+ import functools
3
+
4
+ # Cache up to 100 items for 1 hour (3600 seconds)
5
+ memory_cache = TTLCache(maxsize=100, ttl=3600)
6
+
7
+ def cached_response(func):
8
+ """Decorator to cache function outputs."""
9
+ @functools.wraps(func)
10
+ def wrapper(*args, **kwargs):
11
+ # Key unique hona chahiye, e.g., query string
12
+ try:
13
+ # Sirf pehle argument (query) ko key banate hain simple rakhne ke liye
14
+ cache_key = str(args[0]) if args else "default"
15
+
16
+ if cache_key in memory_cache:
17
+ # print(f"⚡ Using Cache for: {cache_key[:30]}...")
18
+ return memory_cache[cache_key]
19
+ except:
20
+ pass # Agar cache key banane me error aaye to ignore karo
21
+
22
+ result = func(*args, **kwargs)
23
+
24
+ # Cache save
25
+ if args:
26
+ cache_key = str(args[0])
27
+ memory_cache[cache_key] = result
28
+
29
+ return result
30
+ return wrapper
core/matcher.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: ai-service/core/matcher.py (FINAL VERSION)
2
+
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import torch
5
+ from typing import List, Dict, Any
6
+
7
+ _embedding_model = None
8
+
9
+ def load_embedding_model(model_path: str):
10
+ """Loads the sentence transformer model into memory."""
11
+ global _embedding_model
12
+ if _embedding_model is None:
13
+ print(f" - 🧠 Loading embedding model from: {model_path}")
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ _embedding_model = SentenceTransformer(model_path, device=device)
16
+ print(f" - ✅ Embedding model loaded successfully on '{device}'.")
17
+
18
+ def rank_documents_by_similarity(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
19
+ """
20
+ Ranks a list of documents based on their semantic similarity to a query.
21
+ """
22
+ global _embedding_model
23
+ if _embedding_model is None:
24
+ # This error is critical. If the model isn't loaded, nothing will work.
25
+ raise Exception("CRITICAL: Embedding model is not loaded. Please ensure load_embedding_model() is called on startup.")
26
+
27
+ if not documents:
28
+ return []
29
+
30
+ doc_texts = [doc.get('text', '') for doc in documents]
31
+
32
+ query_embedding = _embedding_model.encode(query, convert_to_tensor=True)
33
+ doc_embeddings = _embedding_model.encode(doc_texts, convert_to_tensor=True)
34
+
35
+ cosine_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
36
+
37
+ # ✨ THE FIX: We MUST add the 'match_score' to each document BEFORE sorting.
38
+ for i, doc in enumerate(documents):
39
+ doc['match_score'] = round(max(0, cosine_scores[i].item() * 100))
40
+
41
+ # Now, sort the documents which already have the 'match_score' key
42
+ sorted_documents = sorted(documents, key=lambda x: x.get('match_score', 0), reverse=True)
43
+
44
+ return sorted_documents
core/predictor.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: ai-service/core/predictor.py (REPLACE EVERYTHING IN YOUR FILE WITH THIS)
2
+
3
+ import joblib
4
+ import pandas as pd
5
+ from typing import List, Dict
6
+
7
+ print(">>> Loading ai-service/core/predictor.py (Version: FINAL, COMPLETE)")
8
+
9
+ try:
10
+ # Model ko load karna
11
+ influencer_pipeline = joblib.load('models/influencer_matcher_v1.joblib')
12
+ print("--- Predictor: Influencer Matcher model loaded successfully. ---")
13
+ except FileNotFoundError as e:
14
+ print(f"--- Predictor FATAL ERROR: Model file not found: {e}. Predictions will fail. ---")
15
+ influencer_pipeline = None
16
+
17
+ # Performance predictor ko bhi yahan theek se load karte hain
18
+ try:
19
+ performance_pipeline = joblib.load('models/performance_predictor_v1.joblib')
20
+ print("--- Predictor: Performance Predictor model loaded successfully. ---")
21
+ except FileNotFoundError:
22
+ performance_pipeline = None
23
+
24
+ def rank_influencers_by_match(influencers: List[Dict], campaign_details: Dict, top_n: int = 5) -> List[Dict]:
25
+ """
26
+ Influencers ko rank karta hai, model ko saari zaroori details dekar.
27
+ """
28
+ print(f"--- Predictor Skill: Ranking {len(influencers)} influencers...")
29
+
30
+ if not influencers or influencer_pipeline is None:
31
+ return []
32
+
33
+ try:
34
+ # Step 1: Influencers ki list se DataFrame banayein
35
+ influencer_df = pd.DataFrame(influencers)
36
+
37
+ # === ✨ YEH AAKHRI AUR SABSE ZAROORI FIX HAI ✨ ===
38
+ # Model ko woh saari jaankari de rahe hain jo use chahiye.
39
+
40
+ # 1. Influencer ki taraf se aane wali jaankari
41
+ features = influencer_df[['category', 'bio']].copy()
42
+
43
+ # 2. Campaign ki taraf se aane wali jaankari (jo har influencer ke liye same hogi)
44
+ # Hum default values bhi de rahe hain agar backend se data na aaye
45
+ features['niche'] = campaign_details.get('category', '') # Assume campaign category maps to niche
46
+ features['country'] = campaign_details.get('location', 'USA')
47
+ features['followers'] = campaign_details.get('followers', 10000)
48
+ features['engagement_rate'] = campaign_details.get('engagement_rate', 0.03)
49
+
50
+ print(f"--- Predictor Skill: Preparing features for model: {features.columns.to_list()}")
51
+
52
+ # Step 2: Sahi features ke saath predict karein
53
+ match_scores = influencer_pipeline.predict(features)
54
+ influencer_df['match_score'] = match_scores
55
+
56
+ # Step 3: Score ke hisaab se sort karke top influencers nikalein
57
+ top_influencers_df = influencer_df.sort_values(by='match_score', ascending=False).head(top_n)
58
+
59
+ # Step 4: Jawab ko saaf-suthre format mein wapas bhejein
60
+ result_cols = ['id', 'name', 'handle', 'followers', 'category', 'bio']
61
+ # Jo columns exist nahi karte, unhein a gracefully handle karein
62
+ final_cols = [col for col in result_cols if col in top_influencers_df.columns]
63
+ results = top_influencers_df[final_cols].to_dict(orient='records')
64
+
65
+ print(f"--- Predictor Skill: Successfully ranked and returning top {len(results)} influencers.")
66
+ return results
67
+
68
+ except Exception as e:
69
+ print(f"--- Predictor Skill ERROR: Failed during prediction process. Error: {e}")
70
+ import traceback
71
+ traceback.print_exc()
72
+ return []
73
+
74
+ def predict_performance(data: dict) -> int:
75
+ """
76
+ Campaign details ke aadhar par 'final_reach' predict karta hai.
77
+ """
78
+ if performance_pipeline is None:
79
+ return 800000 # Fallback value agar model load na ho
80
+
81
+ df = pd.DataFrame(data, index=[0])
82
+ prediction = performance_pipeline.predict(df)
83
+ return int(prediction[0])
core/rag/store.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb.config import Settings
3
+ import os
4
+
5
+ # Path to save the database inside ai-service/data
6
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+ DB_PATH = os.path.join(BASE_DIR, "data", "chroma_db")
8
+
9
+ class VectorStore:
10
+ def __init__(self, collection_name="platform_knowledge"):
11
+ """Initialize persistent ChromaDB client."""
12
+ # Folder create agar nahi hai to
13
+ os.makedirs(DB_PATH, exist_ok=True)
14
+
15
+ self.client = chromadb.PersistentClient(path=DB_PATH)
16
+
17
+ # Create or get collection
18
+ self.collection = self.client.get_or_create_collection(name=collection_name)
19
+
20
+ def add_text(self, text_chunks, metadatas, ids):
21
+ """Text data ko DB mein save karna."""
22
+ try:
23
+ self.collection.upsert(
24
+ documents=text_chunks,
25
+ metadatas=metadatas,
26
+ ids=ids
27
+ )
28
+ return True
29
+ except Exception as e:
30
+ print(f"[RAG Error] Failed to add text: {str(e)}")
31
+ return False
32
+
33
+ def search(self, query, n_results=2):
34
+ """Question ke hisaab se matching data lana."""
35
+ try:
36
+ results = self.collection.query(
37
+ query_texts=[query],
38
+ n_results=n_results
39
+ )
40
+ # Thoda sa formatting taaki clean data mile
41
+ if results['documents']:
42
+ return results['documents'][0] # Return list of matching texts
43
+ return []
44
+ except Exception as e:
45
+ print(f"[RAG Error] Search failed: {str(e)}")
46
+ return []
core/strategist.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: ai-service/core/strategist.py (REPLACE EVERYTHING IN YOUR FILE WITH THIS)
2
+
3
+ import traceback
4
+ from typing import Dict, Any, List
5
+ import json
6
+ import re
7
+ from llama_cpp import Llama
8
+
9
+ try:
10
+ from core.guardrails.safety import SafetyGuard
11
+ except ImportError:
12
+ SafetyGuard = None
13
+ print("⚠️ Safety module not found. Skipping checks.")
14
+
15
+
16
+ class AIStrategist:
17
+ # Saaf __init__ function. Aapke code mein do the, maine ek kar diya hai.
18
+ def __init__(self, llm_instance: Llama, store=None):
19
+ if llm_instance is None:
20
+ raise ValueError("AIStrategist requires a valid Llama instance.")
21
+ self.llm = llm_instance
22
+ self.store = store # Vector DB Store
23
+ print("--- AIStrategist initialized successfully (RAG Ready). ---")
24
+
25
+ def generate_campaign_brief(self, brand_name: str, campaign_goal: str, target_audience: str, budget_range: str) -> Dict[str, Any]:
26
+ """
27
+ Generates a structured, JSON-formatted campaign brief and cleans the output.
28
+ """
29
+ print(f"--- Strategist Skill: Generating campaign brief for brand '{brand_name}'.")
30
+
31
+ prompt = f"""
32
+ [SYSTEM]
33
+ You are an expert campaign strategist. Your task is to generate a creative and actionable campaign brief in a valid JSON object format. Do not add any text before or after the JSON object.
34
+
35
+ [CLIENT INPUT]
36
+ - Brand Name: {brand_name}
37
+ - Primary Goal: {campaign_goal}
38
+ - Target Audience: {target_audience}
39
+ - Budget: {budget_range}
40
+
41
+ [YOUR TASK]
42
+ Generate a JSON object with keys: "title", "description", "goal_kpi", and "content_guidelines" (as a list of strings).
43
+ - "title": A short, catchy campaign title.
44
+ - "description": A one-paragraph summary of the campaign's core idea.
45
+ - "goal_kpi": The single most important Key Performance Indicator (KPI) for this goal.
46
+ - "content_guidelines": A list of 3 creative content ideas for influencers.
47
+
48
+ [JSON OUTPUT]
49
+ """
50
+ try:
51
+ response_dict = self.llm(
52
+ prompt,
53
+ max_tokens=700,
54
+ temperature=0.8,
55
+ stop=["[CLIENT INPUT]", "\n\n", "User:"],
56
+ echo=False
57
+ )
58
+ raw_text = response_dict['choices'][0]['text'].strip()
59
+ if '```json' in raw_text:
60
+ raw_text = raw_text.split('```json\n')[1].split('```')[0]
61
+ elif '{' in raw_text:
62
+ raw_text = raw_text[raw_text.find('{'):raw_text.rfind('}') + 1]
63
+
64
+ json_response = json.loads(raw_text)
65
+ print("--- Strategist Skill: Successfully parsed brief from LLM.")
66
+
67
+ if 'goal_kpi' in json_response and isinstance(json_response['goal_kpi'], list):
68
+ print("--- Strategist Skill: Cleaning up 'goal_kpi' field (list -> string).")
69
+ json_response['goal_kpi'] = json_response['goal_kpi'][0] if json_response['goal_kpi'] else 'N/A'
70
+ if 'title' in json_response and isinstance(json_response['title'], list):
71
+ json_response['title'] = json_response['title'][0] if json_response['title'] else 'AI Generated Title'
72
+ if 'description' in json_response and isinstance(json_response['description'], list):
73
+ json_response['description'] = json_response['description'][0] if json_response['description'] else 'AI Generated Description'
74
+
75
+ return json_response
76
+
77
+ except (json.JSONDecodeError, KeyError) as e:
78
+ response_content = locals().get('raw_text', 'No raw text available')
79
+ print(f"--- Strategist Skill FATAL ERROR: Failed to decode/parse JSON from model. Error: {e}. Raw output: '{response_content}'")
80
+ return {"error": "The AI model returned an invalid format. Please try again."}
81
+ except Exception as e:
82
+ print(f"--- Strategist Skill FATAL ERROR in generate_campaign_brief: {e}")
83
+ traceback.print_exc()
84
+ return {"error": "An internal error occurred in the AI model."}
85
+
86
+ def generate_strategy_from_prompt(self, user_prompt: str) -> str:
87
+ """
88
+ Generates a general strategy from a raw prompt.
89
+ """
90
+ print(f"--- Strategist Skill (General): Received prompt: '{user_prompt[:50]}...'")
91
+ try:
92
+ response = self.llm(
93
+ user_prompt,
94
+ max_tokens=750,
95
+ temperature=0.75,
96
+ stop=["User:", "Client:", "System:"],
97
+ )
98
+ generated_text = response['choices'][0]['text'].strip()
99
+ print("--- Strategist Skill (General): Received response from LLM.")
100
+ return generated_text
101
+ except Exception as e:
102
+ print(f"--- Strategist Skill (General) ERROR: {e}")
103
+ traceback.print_exc()
104
+ return "An error occurred in the AI model while generating the strategy."
105
+
106
+ def generate_weekly_summary(self, metrics: Dict[str, Any]) -> str:
107
+ """
108
+ Generates a concise, human-readable weekly summary from structured metrics data.
109
+ """
110
+ print(f"--- Strategist Skill (Summary): Received metrics for brand {metrics.get('brand_id')}")
111
+ prompt_template = f"""
112
+ You are an expert digital marketing analyst writing a weekly summary for a client. Your tone should be positive, encouraging, and easy to understand. Do not use jargon. Focus on the key results and what they mean.
113
+
114
+ Client's Performance Data for the week of {metrics.get('start_date')} to {metrics.get('end_date')}:
115
+ - Total Ad Spend: ${metrics.get('total_ad_spend', 0):.2f}
116
+ - Clicks from Ads: {metrics.get('total_clicks', 0)}
117
+ - New Social Media Followers: {metrics.get('new_followers', 0)}
118
+ - Top Performing Campaign this week: "{metrics.get('top_performing_campaign', 'N/A')}"
119
+
120
+ Based on this data, write a short summary (about 3-4 sentences). Start with a positive opening and end with an encouraging closing statement.
121
+
122
+ Summary:
123
+ """
124
+ print("--- Strategist Skill (Summary): Sending composed prompt to LLM...")
125
+ try:
126
+ # === INVOKE FIX #1 ===
127
+ response = self.llm(
128
+ prompt_template,
129
+ max_tokens=250,
130
+ temperature=0.6,
131
+ stop=["Client:", "Data:"],
132
+ echo=False
133
+ )
134
+ summary_text = response['choices'][0]['text'].strip()
135
+ print("--- Strategist Skill (Summary): Received response from LLM.")
136
+ if not summary_text:
137
+ return "The AI model returned an empty summary."
138
+ return summary_text
139
+ except Exception as e:
140
+ print(f"--- Strategist Skill (Summary) ERROR: {e}")
141
+ traceback.print_exc()
142
+ return "An error occurred in the AI model while generating the weekly summary."
143
+
144
+ def generate_chat_response(self, prompt: str, context: str) -> str:
145
+ """
146
+ RAG-Enabled Chat Response with Safety Checks
147
+ """
148
+ print(f"--- Strategist Skill (Chat): Processing: '{prompt}'")
149
+
150
+ # 1. SAFETY GUARDRAIL (Fail-Safe)
151
+ if SafetyGuard and not SafetyGuard.validate_input(prompt):
152
+ return "I cannot generate a response as the query contains restricted content."
153
+
154
+ # 2. RAG RETRIEVAL (Knowledge injection)
155
+ retrieved_knowledge = ""
156
+ if self.store:
157
+ try:
158
+ print(" - 🔍 Searching knowledge base...")
159
+ # Search DB for relevant context
160
+ kb_docs = self.store.search(prompt, n_results=1)
161
+ if kb_docs:
162
+ retrieved_knowledge = f"\n[INTERNAL KNOWLEDGE]\n{kb_docs[0]}\n"
163
+ except Exception as e:
164
+ print(f" - ⚠️ RAG Search Warning: {e}")
165
+
166
+ master_prompt = f"""
167
+ [SYSTEM]
168
+ You are a digital marketing strategist AI. Use the Context and Internal Knowledge below to answer the Client.
169
+
170
+ [CONTEXT FROM DASHBOARD]
171
+ {context}
172
+
173
+ {retrieved_knowledge}
174
+
175
+ [CLIENT'S QUESTION]
176
+ {prompt}
177
+
178
+ [YOUR RESPONSE]
179
+ """
180
+ try:
181
+ response = self.llm(
182
+ master_prompt,
183
+ max_tokens=500,
184
+ temperature=0.5,
185
+ stop=["[CLIENT'S QUESTION]", "[SYSTEM]"],
186
+ echo=False
187
+ )
188
+ return response['choices'][0]['text'].strip()
189
+ except Exception as e:
190
+ traceback.print_exc()
191
+ return "Internal error in Chat Module."
192
+
193
+ def generate_dashboard_insights(self, kpis: Dict[str, Any]) -> str:
194
+ print(f"--- Strategist Skill (Insights): Received KPIs: {kpis}")
195
+ prompt = f"""
196
+ [SYSTEM]
197
+ You are a senior data analyst at Reachify...
198
+
199
+ [YOUR INSIGHTFUL BULLET POINTS]
200
+ - """
201
+ try:
202
+ response = self.llm(prompt, max_tokens=250, temperature=0.7, stop=["[SYSTEM]", "Human:", "\n\n"], echo=False)
203
+ insight_text = response['choices'][0]['text'].strip()
204
+ if not insight_text.startswith('-'):
205
+ insight_text = '- ' + insight_text
206
+ print("--- Strategist Skill (Insights): Successfully received response from LLM.")
207
+ return insight_text
208
+ except Exception as e:
209
+ print(f"--- Strategist Skill (Insights) ERROR: {e}")
210
+ traceback.print_exc()
211
+ return "- Could not generate AI insights due to an internal model error."
212
+
213
+ def generate_analytics_insights(self, analytics_data: dict) -> str:
214
+ """
215
+ Takes campaign analytics data and generates 3 actionable insights using the LLM.
216
+ """
217
+ print(f"--- Strategist Skill (Analytics Insights): Received analytics data.")
218
+ prompt = f"""
219
+ [SYSTEM]
220
+ You are an expert Campaign Analyst...
221
+
222
+ [YOUR ANALYSIS - 3 ACTIONABLE BULLET POINTS]
223
+ -
224
+ """
225
+ print("--- Strategist Skill (Analytics Insights): Sending composed prompt to LLM...")
226
+ try:
227
+ response = self.llm(prompt, max_tokens=200, temperature=0.6, stop=["[SYSTEM]", "\n\n-"], echo=False)
228
+ insights_text = response['choices'][0]['text'].strip()
229
+ if not insights_text.startswith('-'):
230
+ insights_text = '- ' + insights_text
231
+ print("--- Strategist Skill (Analytics Insights): Successfully received and processed response.")
232
+ return insights_text
233
+ except Exception as e:
234
+ print(f"--- Strategist Skill (Analytics Insights) FATAL ERROR: {e}")
235
+ traceback.print_exc()
236
+ return "- AI insights could not be generated due to an internal model error."
237
+
238
+
239
+ def get_caption_assistance(self, caption: str, action: str, guidelines: str = None) -> str:
240
+ """
241
+ Provides AI assistance for writing captions based on a specified action.
242
+ """
243
+ print(f"--- Strategist Skill (Caption Assist): Received action: '{action}'")
244
+
245
+ system_prompt = "You are a helpful and creative social media marketing assistant for influencers. You are concise and direct."
246
+
247
+ if action == 'improve':
248
+ user_prompt = f"Make the following Instagram caption more engaging and impactful. Keep the core message but enhance the wording.\n\nOriginal:\n---\n{caption}\n\nImproved:"
249
+ elif action == 'hashtags':
250
+ user_prompt = f"Suggest a list of 7 relevant and trending hashtags for the following Instagram post. Provide ONLY the hashtags, starting with # and separated by spaces.\n\nPost Caption:\n---\n{caption}\n\nHashtags:"
251
+ elif action == 'check_guidelines' and guidelines:
252
+ user_prompt = f"Carefully check if the following caption meets ALL the rules in the provided guidelines. Be strict. First, respond with only 'YES' or 'NO'. Then, on a new line, explain which specific rules were broken, or confirm that all rules were followed.\n\nGuidelines:\n---\n{guidelines}\n\nCaption to Check:\n---\n{caption}\n\nAnalysis:"
253
+ else:
254
+ return "Invalid action or missing guidelines provided to the AI assistant."
255
+
256
+ full_prompt = f"[SYSTEM]\n{system_prompt}\n\n[USER]\n{user_prompt}\n\n[ASSISTANT]\n"
257
+
258
+ try:
259
+ print(f" - Calling LLM for caption assistance (action: {action})...")
260
+ response = self.llm(
261
+ full_prompt,
262
+ max_tokens=256,
263
+ temperature=0.7,
264
+ stop=["[USER]", "[SYSTEM]"],
265
+ echo=False
266
+ )
267
+ generated_text = response['choices'][0]['text'].strip()
268
+ print(f" - ✅ LLM generated response.")
269
+ return generated_text
270
+ except Exception as e:
271
+ print(f"--- Strategist Skill (Caption Assist) ERROR: {e}")
272
+ traceback.print_exc()
273
+ return "An error occurred while getting assistance from the AI."
274
+
275
+
276
+ def generate_influencer_analytics_summary(self, kpis: Dict[str, Any]) -> str:
277
+ """
278
+ Takes an influencer's KPIs and generates a short, encouraging, and actionable summary.
279
+ """
280
+ print(f"--- Strategist Skill (Influencer Analytics): Received KPIs for analysis.")
281
+
282
+ # ✅ THE FIX: A much stricter and more directive prompt.
283
+ prompt = f"""
284
+ [SYSTEM]
285
+ You are "Spark", a friendly AI Analyst for social media influencers.
286
+ Your task is to write a 2-sentence summary of the user's performance.
287
+ - Sentence 1: Start with a positive highlight from the data.
288
+ - Sentence 2: Give ONE simple, actionable tip for what to do next.
289
+ - BE CONCISE and encouraging. DO NOT explain what KPIs are. DO NOT use lists.
290
+
291
+ [INFLUENCER'S DATA]
292
+ - Engagement Rate: {kpis.get('avgEngagementRate', 0.0):.2f}%
293
+ - Total Reach on approved posts: {kpis.get('totalReach', 0)}
294
+ - Approved Posts: {kpis.get('totalSubmissions', 0)}
295
+
296
+ [YOUR 2-SENTENCE SUMMARY]
297
+ """
298
+
299
+ try:
300
+ print(" - Calling LLM for influencer analytics summary (v2 prompt)...")
301
+ response = self.llm(
302
+ prompt,
303
+ max_tokens=100, # We only need a short response
304
+ temperature=0.7,
305
+ stop=["[SYSTEM]", "[USER]", "User:", "System:"],
306
+ echo=False
307
+ )
308
+ summary_text = response['choices'][0]['text'].strip()
309
+
310
+ # Extra cleanup to remove any unwanted AI chit-chat
311
+ if "\n" in summary_text:
312
+ summary_text = summary_text.split('\n')[0]
313
+
314
+ print(" - ✅ LLM generated summary successfully.")
315
+ return summary_text
316
+
317
+ except Exception as e:
318
+ print(f"--- Strategist Skill (Influencer Analytics) ERROR: {e}")
319
+ return "AI summary could not be generated at this time."
320
+
321
+
322
+ def generate_influencer_growth_plan(self, influencer_data: Dict[str, Any]) -> List[str]:
323
+ """
324
+ Influencer ke live data ko analyze karke personalized growth tips deta hai. (CRASH-PROOF VERSION)
325
+ """
326
+ print(f"--- Strategist Skill (Growth Plan): Influencer {influencer_data.get('fullName')} ke liye plan banaya ja raha hai.")
327
+
328
+ # --- FIX IS HERE: Hum pehle values ko aakhri mein badiya se handle kar rahe hain ---
329
+ # Pythonic tareeka: `get()` se value nikalo, agar 'None' hai to 'N/A' use karo.
330
+ best_caption = influencer_data.get('bestPostCaption') or 'N/A'
331
+ worst_caption = influencer_data.get('worstPostCaption') or 'N/A'
332
+ # --- END FIX ---
333
+
334
+ prompt = f"""
335
+ [INST] You are an expert social media coach. Analyze the following data for an influencer named {influencer_data.get('fullName')} and provide ONLY 3 short, actionable tips based on it. Start each tip on a new line.
336
+
337
+ - Niche: {influencer_data.get('category', 'Not specified')}
338
+ - Avg Engagement: {influencer_data.get('avgEngagementRate', 0.0):.2f}%
339
+ - Best Post was about: '{best_caption[:50]}'
340
+ - Worst Post was about: '{worst_caption[:50]}'
341
+
342
+ Your 3 tips:
343
+ [/INST]
344
+ """
345
+
346
+ try:
347
+ print("--- Strategist Skill (Growth Plan): Simplified LLM ko call kiya jaa raha hai...")
348
+ response = self.llm(
349
+ prompt,
350
+ max_tokens=256,
351
+ temperature=0.7,
352
+ stop=["[INST]", "User:", "System:"],
353
+ echo=False
354
+ )
355
+ raw_text = response['choices'][0]['text'].strip()
356
+
357
+ tips = [tip.strip().lstrip('- ').lstrip('1. ').lstrip('2. ').lstrip('3. ') for tip in raw_text.split('\n') if tip.strip()]
358
+
359
+ print(f"--- Strategist Skill (Growth Plan): LLM se tips successfully generate ho gaye: {tips}")
360
+ return tips[:3]
361
+
362
+ except Exception as e:
363
+ print(f"--- Strategist Skill (Growth Plan) FATAL ERROR: {e}")
364
+ traceback.print_exc()
365
+ return ["AI Coach is currently unavailable due to a technical error."]
366
+
367
+
368
+ def generate_service_blueprint(self, service_type: str, requirements: str) -> Dict[str, Any]:
369
+ """
370
+ Analyzes user requirements and generates a structured project blueprint using the LLM.
371
+ (FINAL VERSION: Uses a "perfect example" in the prompt to force the AI into the correct summary format.)
372
+ """
373
+ import re
374
+
375
+ print(f"--- Strategist Skill (Blueprint): Generating plan for '{service_type}' request.")
376
+
377
+ # === THE DEFINITIVE PROMPT WITH A PERFECT EXAMPLE ===
378
+ prompt = f"""
379
+ [SYSTEM]
380
+ You are an expert project planner for a top-tier digital agency.
381
+ Analyze the client's request below and generate a concise project blueprint.
382
+
383
+ YOU MUST FOLLOW THE FORMAT OF THE EXAMPLE BELOW EXACTLY.
384
+ - For DELIVERABLES, provide a list of 4-5 specific features separated by the "|" pipe character.
385
+ - For STACK, PRICE_EST, and TIMELINE, you MUST provide a single, summarized value. DO NOT provide a detailed itemized list for these.
386
+
387
+ [PERFECT EXAMPLE]
388
+ TITLE:: Modern E-Commerce Store for a Fashion Brand
389
+ DELIVERABLES:: Dynamic Product Catalog | Secure Shopping Cart & Checkout | User Account & Order History | Admin Dashboard for Managing Products
390
+ STACK:: Next.js & TailwindCSS (Frontend), Supabase (Backend)
391
+ PRICE_EST:: $8,000 - $12,000
392
+ TIMELINE:: 8-10 Weeks
393
+ [/PERFECT EXAMPLE]
394
+
395
+ [CLIENT REQUEST]
396
+ - Service Type: {service_type}
397
+ - Description: {requirements}
398
+
399
+ [YOUR BLUEPRINT]
400
+ TITLE:: """
401
+
402
+ try:
403
+ response_dict = self.llm(
404
+ prompt,
405
+ max_tokens=400,
406
+ temperature=0.5,
407
+ stop=["[CLIENT REQUEST]", "[SYSTEM]", "[/PERFECT EXAMPLE]"],
408
+ echo=False
409
+ )
410
+ raw_text = "TITLE:: " + response_dict['choices'][0]['text'].strip()
411
+ print(f"--- Strategist Skill (Blueprint): Raw response from LLM:\n---\n{raw_text}\n---")
412
+
413
+ # Initialize with default values
414
+ blueprint = {
415
+ 'title': 'AI Generated Title',
416
+ 'deliverables': ['Analysis in progress...'],
417
+ 'stack': 'To be determined',
418
+ 'price_est': 'Pending',
419
+ 'timeline': 'Pending'
420
+ }
421
+
422
+ # Use regex to find all key::value pairs
423
+ pairs = re.findall(r'(\b[A-Z_]+\b)::(.*?)(?=\n\b[A-Z_]+\b::|$)', raw_text, re.DOTALL)
424
+
425
+ for key, value in pairs:
426
+ key = key.strip().upper()
427
+ value = value.strip()
428
+
429
+ if key == 'TITLE':
430
+ # Only take the first line for summary fields
431
+ blueprint['title'] = value.split('\n')[0].strip()
432
+ elif key == 'STACK':
433
+ blueprint['stack'] = value.split('\n')[0].strip()
434
+ elif key == 'PRICE_EST':
435
+ blueprint['price_est'] = value.split('\n')[0].strip()
436
+ elif key == 'TIMELINE':
437
+ blueprint['timeline'] = value.split('\n')[0].strip()
438
+ elif key == 'DELIVERABLES':
439
+ # Deliverables can be a list
440
+ deliverables_list = [d.strip() for d in value.split('|') if d.strip()]
441
+ if deliverables_list:
442
+ blueprint['deliverables'] = deliverables_list
443
+
444
+ print(f"--- Strategist Skill (Blueprint): Successfully parsed with final parser. Result: {blueprint}")
445
+ return blueprint
446
+
447
+ except Exception as e:
448
+ error_msg = f"A critical error occurred. Error: {e}"
449
+ print(f"--- Strategist Skill FATAL ERROR: {error_msg}")
450
+ return { 'title': 'Error Generating Plan', 'deliverables': ['AI model failed to respond.'], 'stack': 'N/A', 'price_est': 'N/A', 'timeline': 'N/A' }
451
+
452
+
453
+ def generate_service_blueprint(self, service_type: str, requirements: str) -> Dict[str, Any]:
454
+ """
455
+ Analyzes user requirements and generates a structured project blueprint using the LLM.
456
+ (This version is now only for the 'web-dev' service type)
457
+ """
458
+ print(f"--- Strategist Skill (WEBSITE): Generating plan for '{service_type}' request.")
459
+
460
+ prompt = f"""
461
+ [SYSTEM]
462
+ You are an expert project planner for a top-tier digital agency.
463
+ Analyze the client's request below and generate a concise project blueprint for a WEBSITE.
464
+
465
+ YOU MUST FOLLOW THE FORMAT OF THE EXAMPLE BELOW EXACTLY.
466
+ - For DELIVERABLES, provide a list of 4-5 specific website features separated by "|".
467
+ - For STACK, PRICE_EST, and TIMELINE, you MUST provide a single, summarized value.
468
+
469
+ [PERFECT EXAMPLE]
470
+ TITLE:: Modern E-Commerce Store for a Fashion Brand
471
+ DELIVERABLES:: Dynamic Product Catalog | Secure Shopping Cart & Checkout | User Account & Order History | Admin Dashboard
472
+ STACK:: Next.js & TailwindCSS (Frontend), Supabase (Backend)
473
+ PRICE_EST:: $8,000 - $12,000
474
+ TIMELINE:: 8-10 Weeks
475
+ [/PERFECT EXAMPLE]
476
+
477
+ [CLIENT REQUEST]
478
+ - Service Type: {service_type}
479
+ - Description: {requirements}
480
+
481
+ [YOUR BLUEPRINT]
482
+ TITLE:: """
483
+
484
+ # We now use the helper function to get the response and parse it
485
+ return self._get_ai_response_and_parse(prompt)
486
+
487
+ # --- THIS IS THE NEW, SEPARATE FUNCTION FOR GROWTH MANAGEMENT ---
488
+ def generate_growth_plan(self, platform_handle: str, goals: str, challenges: str) -> Dict[str, any]:
489
+ """
490
+ [NEW & SEPARATE] Creates a 3-month management plan based on influencer's input.
491
+ """
492
+ print(f"--- Strategist Skill (GROWTH): Generating plan for '{platform_handle}'.")
493
+
494
+ prompt = f"""
495
+ [SYSTEM]
496
+ You are a Talent Manager for a top influencer agency. Create a 3-month management plan.
497
+ Follow the example format exactly. Use "|" to separate monthly services.
498
+ Use "Monthly Retainer" for Price Estimate and "Initial Contract Term" for Timeline.
499
+
500
+ [PERFECT EXAMPLE]
501
+ TITLE:: 3-Month YouTube Growth & Monetization Strategy
502
+ DELIVERABLES:: Weekly Content Calendar (2 Videos, 5 Shorts) | SEO Title & Description Writing | Proactive Brand Outreach (5 brands/month) | Monthly Performance Analytics Report
503
+ STACK:: YouTube Studio, TubeBuddy, Notion
504
+ PRICE_EST:: $1,500 / month
505
+ TIMELINE:: 3-Month Initial Contract
506
+ [/PERFECT EXAMPLE]
507
+
508
+ [CLIENT REQUEST]
509
+ - Platform Handle: {platform_handle}
510
+ - Goals: {goals}
511
+ - Challenges: {challenges}
512
+
513
+ [YOUR BLUEPRINT]
514
+ TITLE:: """
515
+
516
+ # This function ALSO uses the same reliable helper
517
+ return self._get_ai_response_and_parse(prompt)
518
+
519
+ # --- THIS IS THE HELPER FUNCTION THAT BOTH METHODS USE ---
520
+ def _get_ai_response_and_parse(self, prompt: str) -> Dict[str, any]:
521
+ """
522
+ Internal helper to call the LLM and parse the key::value format robustly.
523
+ """
524
+ try:
525
+ response_dict = self.llm(
526
+ prompt,
527
+ max_tokens=400,
528
+ temperature=0.5,
529
+ stop=["[CLIENT REQUEST]", "[SYSTEM]", "[/PERFECT EXAMPLE]"],
530
+ echo=False
531
+ )
532
+ raw_text = "TITLE:: " + response_dict['choices'][0]['text'].strip()
533
+ print(f"--- AI Raw Response ---\n{raw_text}\n---")
534
+
535
+ blueprint = {
536
+ 'title': 'AI Generated Plan',
537
+ 'deliverables': ['Analysis in progress...'],
538
+ 'stack': 'To be determined',
539
+ 'price_est': 'Pending',
540
+ 'timeline': 'Pending'
541
+ }
542
+
543
+ pairs = re.findall(r'(\b[A-Z_]+\b)::(.*?)(?=\n\b[A-Z_]+\b::|$)', raw_text, re.DOTALL)
544
+
545
+ for key, value in pairs:
546
+ key, value = key.strip().upper(), value.strip()
547
+ if key == 'TITLE': blueprint['title'] = value.split('\n')[0].strip()
548
+ elif key == 'STACK': blueprint['stack'] = value.split('\n')[0].strip()
549
+ elif key == 'PRICE_EST': blueprint['price_est'] = value.split('\n')[0].strip()
550
+ elif key == 'TIMELINE': blueprint['timeline'] = value.split('\n')[0].strip()
551
+ elif key == 'DELIVERABLES':
552
+ deliverables_list = [d.strip() for d in value.split('|') if d.strip()]
553
+ if deliverables_list: blueprint['deliverables'] = deliverables_list
554
+
555
+ print(f"--- Parser Result ---: {blueprint}")
556
+ return blueprint
557
+
558
+ except Exception as e:
559
+ error_msg = f"A critical error occurred in the AI model or parser. Error: {e}"
560
+ print(f"--- AI FATAL ERROR: {error_msg}")
561
+ return {
562
+ 'title': 'Error Generating Plan',
563
+ 'deliverables': ['AI model failed to respond or there was a system error.'],
564
+ 'stack': 'N/A',
565
+ 'price_est': 'N/A',
566
+ 'timeline': 'N/A'
567
+ }
568
+
569
+
570
+ def generate_weekly_content_plan(self, context: Dict[str, Any]) -> Dict[str, Any]:
571
+ """
572
+ Generates 3 content options (MOCK MODE for Immediate Response).
573
+ Use this until server capacity is upgraded.
574
+ """
575
+ print(f"--- Strategist Skill (Plan): Generating for '{context.get('niche')}'.")
576
+
577
+ niche = context.get("niche", "General")
578
+ trends = [t['name'] for t in context.get("active_trends", [])]
579
+ trend = trends[0] if trends else "Trending Audio"
580
+
581
+ # Simulate dynamic response based on inputs
582
+ return {
583
+ "options": [
584
+ {
585
+ "type": "Viral Bet",
586
+ "title": f"Reel: {trend} Challenge",
587
+ "platform": "Instagram",
588
+ "contentType": "Reel",
589
+ "instructions": f"Use the '{trend}' audio. Show a quick transition related to {niche}. Keep it under 15s.",
590
+ "reasoning": "High viral potential due to current trend momentum."
591
+ },
592
+ {
593
+ "type": "Community",
594
+ "title": "Story: Poll of the Day",
595
+ "platform": "Instagram",
596
+ "contentType": "Story",
597
+ "instructions": "Post a 'This or That' poll related to your niche. Engage with replies.",
598
+ "reasoning": "Boosts engagement rate by encouraging direct interaction."
599
+ },
600
+ {
601
+ "type": "Niche Authority",
602
+ "title": "Carousel: Top 3 Tips",
603
+ "platform": "Instagram",
604
+ "contentType": "Carousel",
605
+ "instructions": f"Share 3 lesser-known tips about {niche}. Use high-quality photos.",
606
+ "reasoning": "Establishes authority and saves value for followers."
607
+ }
608
+ ]
609
+ }
core/support_agent.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: ai-service/core/support_agent.py
2
+
3
+ import traceback
4
+ from typing import Dict, Any
5
+ from llama_cpp import Llama
6
+ from langchain.llms.base import LLM
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_community.vectorstores import Chroma
11
+ from langchain_core.prompts import PromptTemplate
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from dotenv import load_dotenv
14
+
15
+ load_dotenv()
16
+
17
+ class LlamaLangChain(LLM):
18
+ llama_instance: Llama
19
+
20
+ @property
21
+ def _llm_type(self) -> str:
22
+ return "custom"
23
+
24
+ def _call(self, prompt: str, stop: list[str] | None = None, **kwargs) -> str:
25
+ response = self.llama_instance.create_completion(
26
+ prompt, max_tokens=256, stop=stop, stream=False
27
+ )
28
+ return response["choices"][0]["text"]
29
+
30
+ def format_docs(docs):
31
+ return "\n\n".join(doc.page_content for doc in docs)
32
+
33
+
34
+ class SupportAgent:
35
+ def __init__(self, llm_instance: Llama, embedding_path: str, db_path: str):
36
+ print("--- Initializing Support Agent (Optimized for Low RAM) ---")
37
+
38
+ if llm_instance is None:
39
+ raise ValueError("SupportAgent received an invalid LLM instance.")
40
+
41
+ # --- YAHI FINAL FIX HAI ---
42
+ # Ab hum koi naya LlamaCpp nahi bana rahe hain.
43
+ # Humne pehle se chalte hue model ke liye ek chota sa LangChain wrapper banaya hai.
44
+ self.langchain_llm_wrapper = LlamaLangChain(llama_instance=llm_instance)
45
+ # ---------------------------
46
+
47
+ self.embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
48
+ self.vector_store = Chroma(persist_directory=db_path, embedding_function=self.embeddings)
49
+ self.conversations = {}
50
+
51
+ # Router ke liye bhi wahi wrapper istemal karenge
52
+ router_template = """Classify: 'live_data' or 'general_knowledge'. Question: {question} Classification:"""
53
+ self.router_prompt = PromptTemplate.from_template(router_template)
54
+ self.router_chain = self.router_prompt | self.langchain_llm_wrapper | StrOutputParser()
55
+
56
+ print("✅ Agent and core components initialized successfully.")
57
+
58
+
59
+ def _get_or_create_memory(self, conversation_id: str) -> ConversationBufferMemory:
60
+ if conversation_id not in self.conversations:
61
+ self.conversations[conversation_id] = ConversationBufferMemory(
62
+ memory_key="chat_history", return_messages=True, input_key="question", output_key='answer'
63
+ )
64
+ return self.conversations[conversation_id]
65
+
66
+ def answer(self, payload: dict, conversation_id: str) -> dict:
67
+ question = payload.get("question", "")
68
+ memory = self._get_or_create_memory(conversation_id)
69
+
70
+ try:
71
+ # RAG (retrieval-augmented generation) logic for general questions
72
+ general_prompt_template = "Answer based on the CONTEXT.\n[CONTEXT]: {context}\n[USER QUESTION]: {question}\n[YOUR ANSWER]:"
73
+ general_prompt = PromptTemplate.from_template(general_prompt_template)
74
+
75
+ retriever = self.vector_store.as_retriever()
76
+ qa_chain = ConversationalRetrievalChain.from_llm(
77
+ llm=self.langchain_llm_wrapper,
78
+ retriever=retriever,
79
+ memory=memory,
80
+ combine_docs_chain_kwargs={"prompt": general_prompt}
81
+ )
82
+
83
+ result = qa_chain.invoke({"question": question})
84
+ final_answer = result.get("answer", "I don't have information on that topic.").strip()
85
+
86
+ return {"response": final_answer, "context": format_docs(result.get('source_documents', []))}
87
+ except Exception as e:
88
+ traceback.print_exc()
89
+ return {"response": "A critical server error occurred in the agent.", "context": str(e)}
90
+
91
+
92
+ def generate_caption_variant(self, caption: str, action: str) -> str:
93
+ print(f"--- 🚀 Received CAPTION generation request for action: '{action}' ---")
94
+ system_prompt = (
95
+ "You are an expert social media copywriter for an influencer marketing agency. "
96
+ "Your task is to rewrite the provided Instagram caption based on a specific instruction. "
97
+ "Your response must be ONLY the rewritten caption. Do not add any introductory phrases like 'Here is the rewritten caption:'."
98
+ )
99
+ if action == 'improve_writing':
100
+ user_instruction = "Improve the writing. Correct any grammar or spelling mistakes, make the language clearer, and give it a more professional and polished tone."
101
+ elif action == 'make_punchier':
102
+ user_instruction = "Make it punchier. Rewrite it to be shorter, more energetic, and highly engaging. Use 2-3 relevant emojis to add personality."
103
+ elif action == 'generate_alternatives':
104
+ user_instruction = "Generate three new, creative, and completely different caption alternatives for the same topic. Separate each alternative with '---'."
105
+ else:
106
+ return "Error: Invalid action specified."
107
+ final_prompt = f"""[SYSTEM INSTRUCTIONS]
108
+ {system_prompt}
109
+ [USER REQUEST]
110
+ {user_instruction}
111
+ [ORIGINAL CAPTION]
112
+ {caption}
113
+ [YOUR REWRITTEN CAPTION]
114
+ """
115
+ try:
116
+ print("--- Invoking LLM for pure text generation... ---")
117
+ response = self.llm.invoke(final_prompt)
118
+ clean_response = response.strip()
119
+ print(f"✅ LLM Response: {clean_response}")
120
+ return clean_response
121
+ except Exception as e:
122
+ traceback.print_exc()
123
+ return f"An error occurred while generating the caption."
124
+
125
+ # =============================================================
126
+ # === ✨ NEW METHOD STARTS HERE ✨ ===
127
+ # =============================================================
128
+ def generate_marketing_strategy(self, prompt: str) -> str:
129
+ if not self.llm:
130
+ return "Error: The AI model is not available."
131
+ print("--- SupportAgent: Generating marketing strategy from prompt... ---")
132
+ try:
133
+ response = self.llm.invoke(prompt, max_tokens=750, temperature=0.75)
134
+ clean_response = response.strip()
135
+ print("--- SupportAgent: Strategy generated successfully. ---")
136
+ return clean_response
137
+ except Exception as e:
138
+ traceback.print_exc()
139
+ return f"An error occurred while generating the strategy: {e}"
140
+
141
+ # =============================================================
142
+ # === ✨ THIS IS THE NEW METHOD, NOW CORRECTLY PLACED ✨ ===
143
+ # =============================================================
144
+ def generate_content_outline(self, title: str) -> str:
145
+ """
146
+ Takes a content title (e.g., a blog post title) and generates a
147
+ structured outline for it using the LLM.
148
+ """
149
+ if not self.llm:
150
+ return "Error: The AI model is not available."
151
+
152
+ print(f"--- SupportAgent: Generating content outline for title: '{title}' ---")
153
+ prompt = f"""
154
+ You are a professional content writer and editor.
155
+ Your task is to create a detailed, well-structured blog post outline for the following title.
156
+ The outline must be in Markdown format, using headings (#, ##) and bullet points (-).
157
+ Include sections for an Introduction, at least 3 main body points with sub-bullets, and a Conclusion.
158
+ **Title:** "{title}"
159
+ **Your Outline:**
160
+ """
161
+
162
+ try:
163
+ response = self.llm.invoke(prompt, max_tokens=1024, temperature=0.7, stop=["User:", "Title:"])
164
+ clean_response = response.strip()
165
+ print("--- SupportAgent: Content outline generated successfully. ---")
166
+ return clean_response
167
+ except Exception as e:
168
+ traceback.print_exc()
169
+ return f"An error occurred while generating the content outline: {e}"
core/utils.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: ai-service/core/utils.py
2
+
3
+ import os
4
+ import cv2
5
+ import numpy as np
6
+ from sklearn.cluster import KMeans
7
+ import urllib.request
8
+ import ssl
9
+ from supabase import create_client, Client
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables from the root .env file
13
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14
+ load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
15
+
16
+ def get_supabase_client() -> Client:
17
+ """
18
+ Creates and returns a Supabase client.
19
+ Raises ValueError if credentials are not set in the environment.
20
+ """
21
+ try:
22
+ url = os.environ.get("SUPABASE_URL")
23
+ key = os.environ.get("SUPABASE_SERVICE_KEY") # We use service key for backend tasks
24
+ if not url or not key:
25
+ raise ValueError("Supabase credentials (URL or Service Key) are not set.")
26
+ return create_client(url, key)
27
+ except Exception as e:
28
+ print(f"🔴 FATAL: Could not create Supabase client: {e}")
29
+ raise # Re-raise the exception to stop the app if this fails.
30
+
31
+ def extract_colors_from_url(image_url: str, num_colors=4) -> list:
32
+ """
33
+ Downloads an image from a URL and returns the top N dominant colors in Hex format.
34
+ Requires: opencv-python-headless, scikit-learn, numpy
35
+ """
36
+ try:
37
+ print(f"🎨 Extracting colors from: {image_url}")
38
+
39
+ # 1. Handle SSL Context (Dev environments often fail strict SSL)
40
+ context = ssl._create_unverified_context()
41
+
42
+ # 2. Download Image directly to memory
43
+ with urllib.request.urlopen(image_url, context=context) as req:
44
+ arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
45
+ img = cv2.imdecode(arr, -1)
46
+
47
+ if img is None:
48
+ return []
49
+
50
+ # 3. Setup Image (Convert BGR to RGB, Resize for speed)
51
+ # If image has alpha channel (Transparency), remove it
52
+ if img.shape[2] == 4:
53
+ img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
54
+
55
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
56
+ img = cv2.resize(img, (100, 100), interpolation=cv2.INTER_AREA)
57
+
58
+ # Reshape to list of pixels
59
+ img = img.reshape((img.shape[0] * img.shape[1], 3))
60
+
61
+ # 4. Use KMeans clustering to find dominant colors
62
+ kmeans = KMeans(n_clusters=num_colors, n_init='auto')
63
+ kmeans.fit(img)
64
+ colors = kmeans.cluster_centers_
65
+
66
+ # 5. Convert to Hex
67
+ hex_colors = []
68
+ for color in colors:
69
+ hex_code = '#{:02x}{:02x}{:02x}'.format(int(color[0]), int(color[1]), int(color[2]))
70
+ hex_colors.append(hex_code)
71
+
72
+ return hex_colors
73
+
74
+ except Exception as e:
75
+ print(f"⚠️ Error extracting colors: {str(e)}")
76
+ # Return fallback gray/black if extraction fails
77
+ return ["#000000", "#808080"]
embedding_model/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
embedding_model/README.md ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: apache-2.0
4
+ library_name: sentence-transformers
5
+ tags:
6
+ - sentence-transformers
7
+ - feature-extraction
8
+ - sentence-similarity
9
+ - transformers
10
+ datasets:
11
+ - s2orc
12
+ - flax-sentence-embeddings/stackexchange_xml
13
+ - ms_marco
14
+ - gooaq
15
+ - yahoo_answers_topics
16
+ - code_search_net
17
+ - search_qa
18
+ - eli5
19
+ - snli
20
+ - multi_nli
21
+ - wikihow
22
+ - natural_questions
23
+ - trivia_qa
24
+ - embedding-data/sentence-compression
25
+ - embedding-data/flickr30k-captions
26
+ - embedding-data/altlex
27
+ - embedding-data/simple-wiki
28
+ - embedding-data/QQP
29
+ - embedding-data/SPECTER
30
+ - embedding-data/PAQ_pairs
31
+ - embedding-data/WikiAnswers
32
+ pipeline_tag: sentence-similarity
33
+ ---
34
+
35
+
36
+ # all-MiniLM-L6-v2
37
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
38
+
39
+ ## Usage (Sentence-Transformers)
40
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
41
+
42
+ ```
43
+ pip install -U sentence-transformers
44
+ ```
45
+
46
+ Then you can use the model like this:
47
+ ```python
48
+ from sentence_transformers import SentenceTransformer
49
+ sentences = ["This is an example sentence", "Each sentence is converted"]
50
+
51
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
52
+ embeddings = model.encode(sentences)
53
+ print(embeddings)
54
+ ```
55
+
56
+ ## Usage (HuggingFace Transformers)
57
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
58
+
59
+ ```python
60
+ from transformers import AutoTokenizer, AutoModel
61
+ import torch
62
+ import torch.nn.functional as F
63
+
64
+ #Mean Pooling - Take attention mask into account for correct averaging
65
+ def mean_pooling(model_output, attention_mask):
66
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
67
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
68
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
69
+
70
+
71
+ # Sentences we want sentence embeddings for
72
+ sentences = ['This is an example sentence', 'Each sentence is converted']
73
+
74
+ # Load model from HuggingFace Hub
75
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
76
+ model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
77
+
78
+ # Tokenize sentences
79
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
80
+
81
+ # Compute token embeddings
82
+ with torch.no_grad():
83
+ model_output = model(**encoded_input)
84
+
85
+ # Perform pooling
86
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
87
+
88
+ # Normalize embeddings
89
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
90
+
91
+ print("Sentence embeddings:")
92
+ print(sentence_embeddings)
93
+ ```
94
+
95
+ ------
96
+
97
+ ## Background
98
+
99
+ The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
100
+ contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
101
+ 1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
102
+
103
+ We developed this model during the
104
+ [Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
105
+ organized by Hugging Face. We developed this model as part of the project:
106
+ [Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
107
+
108
+ ## Intended uses
109
+
110
+ Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
111
+ the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
112
+
113
+ By default, input text longer than 256 word pieces is truncated.
114
+
115
+
116
+ ## Training procedure
117
+
118
+ ### Pre-training
119
+
120
+ We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
121
+
122
+ ### Fine-tuning
123
+
124
+ We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
125
+ We then apply the cross entropy loss by comparing with true pairs.
126
+
127
+ #### Hyper parameters
128
+
129
+ We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
130
+ We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
131
+ a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
132
+
133
+ #### Training data
134
+
135
+ We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
136
+ We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
137
+
138
+
139
+ | Dataset | Paper | Number of training tuples |
140
+ |--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
141
+ | [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
142
+ | [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
143
+ | [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
144
+ | [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
145
+ | [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
146
+ | [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
147
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs | - | 25,316,456 |
148
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs | - | 21,396,559 |
149
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs | - | 21,396,559 |
150
+ | [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
151
+ | [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
152
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
153
+ | [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
154
+ | [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
155
+ | [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
156
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
157
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
158
+ | [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
159
+ | [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
160
+ | [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
161
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
162
+ | AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
163
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
164
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
165
+ | [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
166
+ | [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
167
+ | [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
168
+ | [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
169
+ | [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
170
+ | [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
171
+ | [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
172
+ | [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
173
+ | **Total** | | **1,170,060,424** |
embedding_model/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "transformers_version": "4.57.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
embedding_model/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.1.1",
4
+ "transformers": "4.57.0",
5
+ "pytorch": "2.5.1+cpu"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
embedding_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1377e9af0ca0b016a9f2aa584d6fc71ab3ea6804fae21ef9fb1416e2944057ac
3
+ size 90864192
embedding_model/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
embedding_model/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
embedding_model/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
embedding_model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
embedding_model/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
embedding_model/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
knowledge_base/brand/01_campaign_creation.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Question: How do I create a new campaign on Reachify?
2
+
3
+ Answer: Creating a new campaign is easy. Follow these steps:
4
+ 1. First, log in to your Brand Dashboard.
5
+ 2. Navigate to the 'Campaigns' section from the sidebar menu.
6
+ 3. Click on the 'Create New Campaign' button.
7
+ 4. Fill out the campaign creation form with all the required details.
8
+ 5. Click 'Submit for Review' to launch your campaign.
9
+
10
+ ## Question: What information is required to create a campaign?
11
+
12
+ Answer: To create a successful campaign, you will need to provide the following details:
13
+ - **Campaign Name:** A clear and concise name for your campaign.
14
+ - **Campaign Goal:** What you want to achieve (e.g., Brand Awareness, Website Traffic, Sales).
15
+ - **Target Audience:** Details about your ideal customer, like age, gender, location, and interests.
16
+ - **Budget:** Your total budget for the campaign. Our AI can also suggest a budget based on your goals.
17
+ - **Campaign Brief:** A detailed description of what you expect from the influencers.
18
+
19
+ ## Question: What happens after I submit a campaign for review?
20
+
21
+ Answer: After you submit a campaign, our internal team performs a quick review to ensure it meets our platform guidelines. This usually takes less than 24 hours. Once approved, our AI will start matching your campaign with the most suitable influencers.
knowledge_base/brand/02_understanding_status.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Guide for Brands on Reachify
2
+
3
+ ## How to Create a Successful Campaign
4
+ 1. **Define a Clear Goal:** Before creating a campaign, know what you want to achieve. Common goals are Brand Awareness, Lead Generation, or Direct Sales.
5
+ 2. **Set a Realistic Budget:** The AI Planner can help you predict a budget, but always have a range in mind.
6
+ 3. **Write a Detailed Brief:** Your campaign description is the most important part. Clearly explain your product, key messages, and what you expect from influencers.
7
+ 4. **Trust the AI:** Use our AI tools to find the right influencers and forecast performance. It saves you time and improves results.
8
+
9
+ ## Understanding Campaign Status
10
+ - **Pending Approval:** The campaign has been created and is waiting for an Admin to approve it.
11
+ - **Active:** The campaign is live and you can now invite influencers.
12
+ - **Completed:** The campaign duration is over. You can now view analytics.
13
+ - **Rejected:** The campaign did not meet our community guidelines and was rejected by an Admin.
knowledge_base/common/first_faq.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Question: What is Reachify?
2
+
3
+ Answer: Reachify is an AI-powered platform that connects brands with influencers to create impactful marketing campaigns. Our technology helps brands find the perfect influencers, predict campaign performance, and manage everything from a single dashboard.
4
+
5
+ ## Question: Is it free to sign up on Reachify?
6
+
7
+ Answer: Yes, signing up and creating a profile is completely free for both Brands and Influencers. For brands, a platform fee is applied only when a campaign is actively running. For influencers, our services are completely free.
8
+
9
+ ## Question: How does the AI influencer matching work?
10
+
11
+ Answer: Our intelligent AI analyzes a brand's campaign goals, target audience, and content brief. It then scans thousands of influencer profiles to find the ones whose audience and content style are the best fit for the campaign, ensuring higher engagement and better results.
12
+
13
+ ## Question: I forgot my password. What should I do?
14
+
15
+ Answer: If you have forgotten your password, simply go to the login page and click on the 'Forgot Password?' link. You will be asked to enter your registered email address, and we will send you a link to reset your password.
16
+
17
+ ## Question: How can I contact customer support?
18
+
19
+ Answer: If you need any help, you can visit the 'Support' or 'Help' section on our website to find answers to common questions. For specific issues, you can raise a support ticket directly from your dashboard or email us at support@reachify-platform.com.
knowledge_base/influencer/02_payments.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Payments on Reachify
2
+
3
+ This document explains how payments are processed for both influencers and brands on the Reachify platform.
4
+
5
+ ## For Influencers: Getting Paid
6
+
7
+ ### How do I receive my payment?
8
+ Payments for completed campaigns are processed automatically. Once a campaign you participated in is marked as "Completed" by the brand or an admin, your payment will be initiated.
9
+
10
+ ### When will I get paid?
11
+ You can expect the payment to be credited to your linked bank account or Stripe account within **15 business days** from the campaign completion date.
12
+
13
+ ### What if my payment is delayed?
14
+ If you do not receive your payment after 15 business days, please follow the dispute process outlined below. Common reasons for delays include incorrect bank details or public holidays.
15
+
16
+ ### How to Raise a Payment Dispute
17
+ If you believe there is an error with your payment amount or if it is delayed, you can raise a dispute.
18
+ 1. Go to your **Influencer Dashboard**.
19
+ 2. Navigate to the **"Completed Campaigns"** section.
20
+ 3. Find the relevant campaign and click on the **"Report Payment Issue"** button.
21
+ 4. Fill out the form with the necessary details. Our finance team will review your case and get back to you within 3-5 business days.
22
+
23
+ ## For Brands: Managing Campaign Payments
24
+
25
+ ### How are campaigns funded?
26
+ When you create and activate a campaign, the total budget for influencer payouts is held in an escrow account. This ensures that funds are secured and available for payout once the campaign is successfully completed.
27
+
28
+ ### What happens if a campaign is cancelled?
29
+ If a campaign is cancelled before any influencers are paid, the funds held in escrow will be refunded to your account's credit balance within 5-7 business days.
30
+
31
+ ### Can I get an invoice?
32
+ Yes. For every campaign, an official invoice is generated. You can download all your invoices from the **"Billing"** section in your **Brand Dashboard**.
models/budget_predictor_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93b36da3d08615203eb44526b12a329e9b755542a7492618505ff4585109066c
3
+ size 161957
models/comments_predictor_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241c23bae4fb40e9ff2aaf309ba722ef4745a294bb834630018e46023b95d2cb
3
+ size 76106
models/earnings_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b8e2bcef7146c7b6b762d9135f63706217f3bd74619ee5a1bdf98d4b05e90f
3
+ size 1355
models/earnings_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c92fcebc17ba0ff12ce0c3ea69aa24b189f03c720c7bb8105af7cad5738ffc4
3
+ size 73314
models/influencer_matcher_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8606d072af703ac6c5fa3b004117ebe0854c8162d74b7458ee52b862c595344
3
+ size 94570
models/likes_predictor_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df899fadc58b0dc3347c7ac876be952c471c0c80e0bab938999711acdab89bc
3
+ size 76102
models/payout_forecaster_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e71a5b9d00ca73a7963f47a776a6a0f1b1647a9d3e4afb8a4bcd522031f60b
3
+ size 880
models/performance_predictor_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21caf9f705f894b2610bbad455c9fa40056b1bbf6f8c0c2f6f138d20d5c1b3ea
3
+ size 168213
models/performance_scorer_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1120936b44b08499dc7e3a83cc5885efd0e64eaf807a0ef09d500f70cc1e13a7
3
+ size 83089
models/revenue_forecaster_v1.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba36b5be7b1c4b075aa9df8883ef922a65772b729ae8b42a537ff96d287fb665
3
+ size 9132
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-dotenv
4
+ pandas
5
+ scikit-learn
6
+ joblib==1.3.2
7
+ langchain
8
+ langchain-community
9
+ langchain-core
10
+ sentence-transformers
11
+ chromadb
12
+ pydantic<3,>=2
13
+ llama-cpp-python
14
+ diskcache
15
+ statsmodels
16
+ supabase
17
+ requests
18
+ xgboost
19
+ sqlalchemy
20
+ psycopg2-binary
21
+ PyMuPDF
22
+ lark
23
+ opencv-python-headless
24
+ huggingface-hub
scripts/download_embedding_model.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ai-service/scripts/download_embedding_model.py
2
+
3
+ from sentence_transformers import SentenceTransformer
4
+ import os
5
+
6
+ # --- Configuration ---
7
+ # Hum is popular model ko use karenge. Yeh chota aur effective hai.
8
+ MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
9
+
10
+ # Path jahan model save hoga. Yeh aapke main.py ke `EMBEDDING_MODEL_PATH` se match karna chahiye.
11
+ SAVE_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'embedding_model')
12
+
13
+ # --- Main Logic ---
14
+ def download_model():
15
+ """
16
+ Downloads the sentence-transformer model from Hugging Face and saves it locally.
17
+ """
18
+ print(f"--- 🚀 Starting download for model: {MODEL_NAME} ---")
19
+
20
+ # Check if the path already exists
21
+ if os.path.exists(SAVE_PATH) and len(os.listdir(SAVE_PATH)) > 0:
22
+ print(f"--- ✅ Model directory already exists and is not empty. Skipping download. ---")
23
+ print(f" Path: {SAVE_PATH}")
24
+ return
25
+
26
+ print(f" Saving model to: {SAVE_PATH}")
27
+
28
+ try:
29
+ # Model download aur save karein
30
+ model = SentenceTransformer(MODEL_NAME)
31
+ model.save(SAVE_PATH)
32
+ print(f"--- ✅ Model downloaded and saved successfully! ---")
33
+ except Exception as e:
34
+ print(f"--- 🚨 ERROR: Failed to download or save the model. ---")
35
+ print(f" Error details: {e}")
36
+ print(f" Please check your internet connection and try again.")
37
+
38
+ if __name__ == "__main__":
39
+ download_model()
scripts/download_model.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ai-service/scripts/download_llm_model.py
2
+ import os
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # --- Configuration for the NEW, FAST Language Model ---
6
+ # Model ka repository on Hugging Face
7
+ REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
8
+ # Model ka file name
9
+ FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
10
+
11
+ # Model ko kahan save karna hai ('llm_model' folder mein)
12
+ SAVE_DIRECTORY = os.path.join(os.path.dirname(__file__), '..', 'llm_model')
13
+
14
+ def download_language_model():
15
+ """
16
+ Downloads the specified GGUF language model from Hugging Face.
17
+ """
18
+ print(f"--- Starting download for Language Model: {FILENAME} ---")
19
+
20
+ # Target file ka poora path
21
+ file_path = os.path.join(SAVE_DIRECTORY, FILENAME)
22
+
23
+ # Check karein ki model pehle se download to nahi hai.
24
+ if os.path.exists(file_path):
25
+ print(f"✅ Model '{FILENAME}' already exists at: {SAVE_DIRECTORY}")
26
+ print("Skipping download.")
27
+ return
28
+
29
+ # Folder banayein agar woh मौजूद nahi hai
30
+ os.makedirs(SAVE_DIRECTORY, exist_ok=True)
31
+
32
+ print(f"Downloading model to: {SAVE_DIRECTORY}")
33
+ print("This may take a moment (approx 700-800MB)...")
34
+
35
+ try:
36
+ # Hugging Face se model download karein
37
+ hf_hub_download(
38
+ repo_id=REPO_ID,
39
+ filename=FILENAME,
40
+ local_dir=SAVE_DIRECTORY,
41
+ local_dir_use_symlinks=False # Important for Windows
42
+ )
43
+
44
+ print("\n" + "="*50)
45
+ print(f"✅ Language Model '{FILENAME}' downloaded successfully!")
46
+ print("="*50 + "\n")
47
+
48
+ except Exception as e:
49
+ print(f"🚨 An error occurred during download: {e}")
50
+
51
+ if __name__ == "__main__":
52
+ download_language_model()
scripts/export_performance_data.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: ai-service/scripts/export_performance_data.py
2
+
3
+ import pandas as pd
4
+ from sqlalchemy import create_engine, text
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import sys
8
+
9
+ # Root directory ko path mein add karein taaki .env file mil sake
10
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
+ sys.path.append(ROOT_DIR)
12
+ load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
13
+
14
+ def export_performance_data():
15
+ """
16
+ Connects to the Supabase database, fetches data from approved submissions,
17
+ and saves it to a CSV file for training the performance prediction model.
18
+ """
19
+ print("--- Starting Performance Data Export Process ---")
20
+ db_url = os.getenv("DATABASE_URL")
21
+ if not db_url:
22
+ print("🔴 ERROR: DATABASE_URL not found. Please check your .env file in the ai-service root.")
23
+ return
24
+
25
+ # Yeh SQL query hamare AI model ke liye 'khana' (training data) nikalegi.
26
+ sql_query = """
27
+ SELECT
28
+ cs.likes,
29
+ cs.comments,
30
+ cs.caption,
31
+ ip.follower_count,
32
+ CASE
33
+ WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
34
+ WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
35
+ WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
36
+ WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
37
+ ELSE 'General'
38
+ END AS campaign_niche,
39
+ CASE
40
+ WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
41
+ WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
42
+ ELSE 'Post'
43
+ END AS content_format
44
+ FROM
45
+ public.campaign_submissions cs
46
+ JOIN
47
+ public.campaigns c ON cs.campaign_id = c.id
48
+ JOIN
49
+ public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
50
+ WHERE
51
+ cs.status = 'approved' -- Sirf approved submissions se seekhein
52
+ AND cs.likes IS NOT NULL -- Jin par likes ka data ho
53
+ AND cs.comments IS NOT NULL -- Jin par comments ka data ho
54
+ AND cs.caption IS NOT NULL -- Jin par caption ho
55
+ AND ip.follower_count > 0; -- Jin influencers ke followers pata ho
56
+ """
57
+ try:
58
+ print("Connecting to Supabase to fetch performance data...")
59
+ engine = create_engine(db_url)
60
+
61
+ with engine.connect() as connection:
62
+ df = pd.DataFrame(connection.execute(text(sql_query)))
63
+
64
+ print(f"✅ Fetched {len(df)} approved submission records from the database.")
65
+
66
+ except Exception as e:
67
+ print(f"🔴 ERROR fetching data: {e}")
68
+ return
69
+
70
+ if df.empty:
71
+ print("⚠️ No valid training data found. A blank CSV will be created.")
72
+ else:
73
+ # Feature Engineering: Caption ki lambai (length) ko ek feature banayein
74
+ df['caption_length'] = df['caption'].str.len()
75
+
76
+ # Sirf zaroori columns ko CSV me save karein
77
+ columns_to_save = ['likes', 'comments', 'follower_count', 'caption_length', 'campaign_niche', 'content_format']
78
+ # Agar koi column na ho (khaali df ke case mein), toh use ignore karein
79
+ df_to_save = df.reindex(columns=columns_to_save).fillna(0)
80
+
81
+ # Data ko /data folder mein save karein
82
+ output_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv')
83
+ df_to_save.to_csv(output_path, index=False)
84
+ print(f"🎉 Success! Performance data saved to {output_path}")
85
+
86
+ if __name__ == '__main__':
87
+ export_performance_data()
scripts/export_revenue_data.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FILE: ai-service/scripts/export_revenue_data.py (NEW FILE)
2
+
3
+ import pandas as pd
4
+ from sqlalchemy import create_engine, text
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ def export_revenue_data():
11
+ """
12
+ Connects to the database, fetches all 'paid' invoices, aggregates
13
+ the revenue by month, and saves it to a time-series CSV file.
14
+ """
15
+ print("--- Starting Revenue Data Export Process ---")
16
+
17
+ # --- Setup to find the .env file in the root directory ---
18
+ try:
19
+ # Assumes the script is in a 'scripts' folder, two levels down from root.
20
+ # E.g. /ai-service/scripts/ -> /ai-service/
21
+ # If your script is elsewhere, adjust the Path().resolve().parents index.
22
+ root_dir = Path(__file__).resolve().parents[1]
23
+ sys.path.append(str(root_dir))
24
+ load_dotenv(dotenv_path=root_dir / '.env')
25
+ db_url = os.getenv("DATABASE_URL")
26
+ if not db_url:
27
+ raise ValueError("DATABASE_URL not found in .env file.")
28
+ except Exception as e:
29
+ print(f"🔴 ERROR setting up environment: {e}")
30
+ return
31
+
32
+ # --- SQL Query to get monthly revenue from paid invoices ---
33
+ # It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'.
34
+ # We assume 'updated_at' is the payment date for this query.
35
+ sql_query = """
36
+ SELECT
37
+ date_trunc('month', updated_at)::date AS month,
38
+ SUM(amount) AS total_revenue
39
+ FROM
40
+ public.invoices
41
+ WHERE
42
+ status = 'paid'
43
+ GROUP BY
44
+ month
45
+ ORDER BY
46
+ month ASC;
47
+ """
48
+ try:
49
+ print("Connecting to Supabase to fetch revenue data...")
50
+ engine = create_engine(db_url)
51
+
52
+ with engine.connect() as connection:
53
+ df = pd.DataFrame(connection.execute(text(sql_query)))
54
+
55
+ print(f"✅ Fetched {len(df)} months of revenue data from the database.")
56
+
57
+ except Exception as e:
58
+ print(f"🔴 ERROR fetching revenue data: {e}")
59
+ return
60
+
61
+ if df.empty:
62
+ print("⚠️ No 'paid' invoices found. An empty CSV will be created.")
63
+ df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers
64
+
65
+ # --- Save the data to the /data folder ---
66
+ output_path = root_dir / 'data' / 'revenue_training_data.csv'
67
+ df.to_csv(output_path, index=False)
68
+ print(f"🎉 Success! Revenue data saved to {output_path}")
69
+
70
+ if __name__ == '__main__':
71
+ export_revenue_data()
scripts/export_training_data.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: ai-service/scripts/export_training_data.py (FINAL VERSION)
2
+
3
+ import pandas as pd
4
+ from sqlalchemy import create_engine, text
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import sys
8
+
9
+ # Get the root directory of the ai-service
10
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
+ sys.path.append(ROOT_DIR)
12
+ load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
13
+
14
+ def export_data():
15
+ """Connects to the database and saves the training data to a CSV file."""
16
+ print("--- Starting Data Export Process ---")
17
+ db_url = os.getenv("DATABASE_URL")
18
+ if not db_url:
19
+ print("🔴 ERROR: DATABASE_URL not found.")
20
+ return
21
+
22
+ # SQL Query waisi hi rahegi
23
+ sql_query = """
24
+ SELECT
25
+ cs.engagement_rate, ip.follower_count, p.amount AS payment_amount,
26
+ CASE
27
+ WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
28
+ WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
29
+ WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
30
+ WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
31
+ ELSE 'General'
32
+ END AS campaign_niche,
33
+ CASE
34
+ WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
35
+ WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
36
+ ELSE 'Post'
37
+ END AS content_format
38
+ FROM public.campaign_submissions cs
39
+ JOIN public.campaigns c ON cs.campaign_id = c.id
40
+ JOIN public.payments p ON c.id = p.campaign_id AND cs.influencer_id = p.influencer_id
41
+ JOIN public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
42
+ WHERE cs.engagement_rate IS NOT NULL AND ip.follower_count > 0 AND p.amount IS NOT NULL;
43
+ """
44
+ try:
45
+ print("Connecting to Supabase to fetch live data...")
46
+ engine = create_engine(db_url)
47
+
48
+ # === ✨ THE FIX IS HERE ✨ ===
49
+ # Hum ab seedhe connection ka istemal karke data nikalenge
50
+ with engine.connect() as connection:
51
+ # `text()` function zaroori hai SQLAlchemy ke naye versions ke liye
52
+ result = connection.execute(text(sql_query))
53
+ rows = result.fetchall() # Saari rows ko ek list mein nikaalo
54
+
55
+ # Agar koi data nahi mila
56
+ if not rows:
57
+ print("⚠️ WARNING: No training data found in the database. An empty CSV will be created.")
58
+ df = pd.DataFrame()
59
+ else:
60
+ # Us list of rows se DataFrame banao
61
+ df = pd.DataFrame(rows, columns=result.keys())
62
+ # === ✨ FIX ENDS HERE ✨ ===
63
+
64
+ print(f"✅ Fetched {len(df)} records from the database.")
65
+
66
+ except Exception as e:
67
+ print(f"🔴 ERROR fetching data: {e}")
68
+ return
69
+
70
+ # Data ko /data folder mein save karo
71
+ output_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv')
72
+ df.to_csv(output_path, index=False)
73
+ print(f"🎉 Success! Data saved to {output_path}")
74
+
75
+ if __name__ == '__main__':
76
+ export_data()
scripts/ingest_data.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import uuid
4
+
5
+ # Ensure we can import from core
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+
8
+ from core.rag.store import VectorStore
9
+
10
+ def ingest_knowledge_base():
11
+ # Initialize DB
12
+ print("🚀 Connecting to Vector Database...")
13
+ try:
14
+ store = VectorStore()
15
+ except Exception as e:
16
+ print(f"❌ Error initializing DB: {e}")
17
+ return
18
+
19
+ base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
20
+
21
+ documents = []
22
+ metadatas = []
23
+ ids = []
24
+
25
+ print(f"📂 Scanning folder: {base_path}")
26
+
27
+ if not os.path.exists(base_path):
28
+ print(f"⚠️ Knowledge base folder not found at {base_path}")
29
+ return
30
+
31
+ # Saari files scan karo recursive tareeke se
32
+ for root, _, files in os.walk(base_path):
33
+ for file in files:
34
+ if file.endswith(".md") or file.endswith(".txt"):
35
+ file_path = os.path.join(root, file)
36
+
37
+ try:
38
+ with open(file_path, "r", encoding="utf-8") as f:
39
+ content = f.read()
40
+ if len(content.strip()) < 10: continue # Skip empty files
41
+
42
+ # Content aur Meta data ready karo
43
+ documents.append(content)
44
+ metadatas.append({"source": file, "category": os.path.basename(root)})
45
+ ids.append(str(uuid.uuid4()))
46
+
47
+ print(f" - Prepared: {file}")
48
+ except Exception as e:
49
+ print(f" - ⚠️ Skipped {file}: {e}")
50
+
51
+ # DB mein daalo
52
+ if documents:
53
+ print(f"💾 Saving {len(documents)} documents to ChromaDB...")
54
+ store.add_text(documents, metadatas, ids)
55
+ print("✅ Knowledge Injection Complete!")
56
+ else:
57
+ print("⚠️ No valid documents found to ingest.")
58
+
59
+ if __name__ == "__main__":
60
+ ingest_knowledge_base()