Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

amitbhatt6075 commited on 22 days ago

Commit

0914e96

0 Parent(s):

Complete fresh start - FINAL UPLOAD

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +36 -0
.gitignore +15 -0
Dockerfile +34 -0
README.md +11 -0
api/__init__.py +0 -0
api/main.py +2017 -0
core/__init__.py +0 -0
core/anomaly_detector.py +31 -0
core/creative_chat.py +125 -0
core/document_parser.py +38 -0
core/guardrails/safety.py +23 -0
core/inference/cache.py +30 -0
core/matcher.py +44 -0
core/predictor.py +83 -0
core/rag/store.py +46 -0
core/strategist.py +609 -0
core/support_agent.py +169 -0
core/utils.py +77 -0
embedding_model/1_Pooling/config.json +10 -0
embedding_model/README.md +173 -0
embedding_model/config.json +25 -0
embedding_model/config_sentence_transformers.json +14 -0
embedding_model/model.safetensors +3 -0
embedding_model/modules.json +20 -0
embedding_model/sentence_bert_config.json +4 -0
embedding_model/special_tokens_map.json +37 -0
embedding_model/tokenizer.json +0 -0
embedding_model/tokenizer_config.json +65 -0
embedding_model/vocab.txt +0 -0
knowledge_base/brand/01_campaign_creation.md +21 -0
knowledge_base/brand/02_understanding_status.md +13 -0
knowledge_base/common/first_faq.md +19 -0
knowledge_base/influencer/02_payments.md +32 -0
models/budget_predictor_v1.joblib +3 -0
models/comments_predictor_v1.joblib +3 -0
models/earnings_encoder.joblib +3 -0
models/earnings_model.joblib +3 -0
models/influencer_matcher_v1.joblib +3 -0
models/likes_predictor_v1.joblib +3 -0
models/payout_forecaster_v1.joblib +3 -0
models/performance_predictor_v1.joblib +3 -0
models/performance_scorer_v1.joblib +3 -0
models/revenue_forecaster_v1.joblib +3 -0
requirements.txt +24 -0
scripts/download_embedding_model.py +39 -0
scripts/download_model.py +52 -0
scripts/export_performance_data.py +87 -0
scripts/export_revenue_data.py +71 -0
scripts/export_training_data.py +76 -0
scripts/ingest_data.py +60 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+# Python cache
+__pycache__/
+*.pyc
+# Virtual Environments
+.env
+.venv
+venv/
+env/
+# IGNORE THE ENTIRE LLM FOLDER
+/llm_model/
+# IGNORE local data files
+/data/

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Start with a stable Python image
+FROM python:3.11-slim
+# Set environment variables for non-interactive installs
+ENV PYTHONUNBUFFERED=1
+ENV APP_HOME=/app
+# Set the working directory
+WORKDIR $APP_HOME
+# Install system dependencies needed for libraries like llama-cpp-python
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Copy ONLY the requirements file to leverage Docker's cache
+COPY requirements.txt ./
+# Install Python dependencies
+# CMAKE_ARGS is needed for llama-cpp-python to build correctly
+ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on"
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy the entire application code into the container
+COPY . .
+# Expose the port the app will run on
+EXPOSE 7860
+# The command to run your FastAPI application
+# This starts the server when the Docker container launches
+CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Reachify Ai Service
+emoji: 🌖
+colorFrom: indigo
+colorTo: indigo
+sdk: docker
+pinned: false
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

api/__init__.py ADDED Viewed

File without changes

api/main.py ADDED Viewed

	@@ -0,0 +1,2017 @@

+# File Location: ai-service/api/main.py
+import os
+import sys
+import joblib
+import pandas as pd
+import json
+import re
+import uuid
+from huggingface_hub import hf_hub_download
+from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
+from supabase import Client
+from pydantic import BaseModel, Field
+from pydantic.config import ConfigDict
+from typing import List, Optional, Any, Dict
+import traceback
+from llama_cpp import Llama
+from statsmodels.tsa.api import Holt
+from dateutil.relativedelta import relativedelta
+from sklearn.preprocessing import LabelEncoder
+from core.support_agent import SupportAgent
+from core.strategist import AIStrategist
+from core.predictor import rank_influencers_by_match
+from core.utils import get_supabase_client
+from core.anomaly_detector import find_anomalies
+from core.matcher import load_embedding_model, rank_documents_by_similarity
+from core.utils import get_supabase_client, extract_colors_from_url
+from core.document_parser import parse_pdf_from_url
+from core.creative_chat import director
+try:
+    from core.rag.store import VectorStore
+    from core.inference.cache import cached_response
+except ImportError:
+    VectorStore = None
+    def cached_response(func): return func
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+MODELS_DIR = os.path.join(ROOT_DIR, 'models')
+# === FIX #2: Dynamic Model Downloading Logic ===
+# This replaces your old static LLAMA_MODEL_PATH
+MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+# Hugging Face Spaces provides a writable directory at /data or we can fall back to /tmp
+MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "llm_model")
+# This will be the final path to our model file once it's downloaded
+LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
+# ===============================================
+EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
+EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
+DB_PATH = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "vector_db_persistent")
+FINAL_EMBEDDING_PATH = EMBEDDING_MODEL_PATH if os.path.exists(EMBEDDING_MODEL_PATH) else EMBEDDING_MODEL_NAME
+_llm_instance: Optional[Llama] = None
+_vector_store: Optional[Any] = None
+_ai_strategist: Optional[AIStrategist] = None
+_support_agent: Optional[SupportAgent] = None
+_budget_predictor = None
+_influencer_matcher = None
+_performance_predictor = None
+_payout_forecaster = None
+_earnings_optimizer = None
+_earnings_encoder = None
+_likes_predictor = None
+_comments_predictor = None
+_revenue_forecaster = None
+_performance_scorer = None
+def to_snake(name: str) -> str:
+    return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
+class ChatMessage(BaseModel):
+    role: str # "user" or "assistant"
+    content: str
+class CreativeChatRequest(BaseModel):
+    message: str
+    history: List[ChatMessage]
+    task_context: str
+class FinalizeScriptRequest(BaseModel):
+    history: List[ChatMessage]
+    task_context: str
+class FinalScriptResponse(BaseModel):
+    hook: str
+    script: str
+    visuals: List[str]
+    tools: List[str]
+class ChatQuery(BaseModel):
+    question: str = Field(..., min_length=1)
+    role: str; live_data: str; conversationId: str
+class ChatAnswer(BaseModel):
+    response: str; context: Optional[str] = None
+class ChatResponseRequest(BaseModel):
+    prompt: str = Field(..., description="The user's direct question.")
+    context: str = Field(..., description="The real-time data context from the backend.")
+class ChatResponsePayload(BaseModel):
+    response: str
+class CaptionRequest(BaseModel):
+    caption: str; action: str
+class CaptionResponse(BaseModel):
+    new_caption: str
+class BudgetRequest(BaseModel):
+    campaign_goal: str; influencer_count: int; platform: str; location: str; category: str; final_reach: int
+class BudgetResponse(BaseModel):
+    predicted_budget_usd: float
+class MatcherRequest(BaseModel):
+    campaign_description: str; target_audience_age: str; target_audience_gender: str; engagement_rate: float; followers: int; country: str; niche: str
+class MatcherResponse(BaseModel):
+    suggested_influencer_ids: List[int]
+class PerformanceRequest(BaseModel):
+    budget_usd: float; influencer_count: int; platform: str; location: str; category: str; budget: float
+class PerformanceResponse(BaseModel):
+    predicted_engagement_rate: float; predicted_reach: int
+class StrategyRequest(BaseModel):
+    prompt: str
+class StrategyResponse(BaseModel):
+    response: str
+class OutlineRequest(BaseModel):
+    title: str
+class OutlineResponse(BaseModel):
+    outline: str
+class TaskPrioritizationRequest(BaseModel):
+    title: str
+    description: Optional[str] = None
+class TaskPrioritizationResponse(BaseModel):
+    priority: str
+class DashboardInsightsRequest(BaseModel):
+    """ The KPI data sent from the main backend. """
+    total_revenue_monthly: float
+    new_users_weekly: int
+    active_campaigns: int
+    pending_approvals: int
+class TimeSeriesDataPoint(BaseModel):
+    date: str
+    value: float
+class TimeSeriesForecastRequest(BaseModel):
+    data: List[TimeSeriesDataPoint]
+    periods_to_predict: int
+    business_context: Optional[str] = "No specific context provided."
+class SmartForecastDataPoint(BaseModel):
+    date: str
+    predicted_value: float
+    trend: str
+    commentary: Optional[str] = None
+class TimeSeriesForecastResponse(BaseModel):
+    forecast: List[SmartForecastDataPoint]
+class HealthKpiRequest(BaseModel):
+    platformRevenue: float
+    activeCampaigns: int
+    totalBrands: int
+class HealthSummaryResponse(BaseModel):
+    summary: str
+class InfluencerData(BaseModel):
+    id: str
+    name: Optional[str] = None
+    handle: Optional[str] = None
+    followers: Optional[int] = 0
+    category: Optional[str] = None
+    bio: Optional[str] = None
+class TeamStrategyRequest(BaseModel):
+    brand_name: str
+    campaign_goal: str
+    target_audience: str
+    budget_range: str
+    influencers: List[InfluencerData]
+class CreativeBrief(BaseModel):
+    title: str
+    description: str
+    goal_kpi: str
+    content_guidelines: List[str]
+class TeamStrategyResponse(BaseModel):
+    success: bool
+    strategy: Optional[CreativeBrief] = None
+    suggested_influencers: Optional[List[InfluencerData]] = None
+    error: Optional[str] = None
+class AnalyticsInsightsRequest(BaseModel):
+    """Data structure for requesting analytics insights."""
+    totalReach: Optional[int] = 0
+    totalLikes: Optional[int] = 0
+    averageEngagementRate: Optional[float] = 0.0
+    topPerformingInfluencer: Optional[str] = "N/A"
+class AnalyticsInsightsResponse(BaseModel):
+    """The response containing the generated insights."""
+    insights: str
+class CampaignDetailsForMatch(BaseModel):
+    """Campaign ki sirf woh details jo matching ke liye zaroori hain."""
+    description: Optional[str] = ""
+    goal_kpi: Optional[str] = ""
+    category: Optional[str] = ""
+class InfluencerRankRequest(BaseModel):
+    """Backend se aane wala request ka format."""
+    campaign_details: CampaignDetailsForMatch
+    influencers: List[InfluencerData]
+class InfluencerRankResponse(BaseModel):
+    """AI-service se wapas jaane wala jawab ka format."""
+    ranked_influencers: List[InfluencerData]
+class WeeklySummaryRequest(BaseModel):
+    start_date: str
+    end_date: str
+    total_ad_spend: float
+    total_clicks: int
+    new_followers: int
+    top_performing_campaign: str
+class WeeklySummaryResponse(BaseModel):
+    summary: str
+class PayoutForecastInput(BaseModel):
+    total_budget_active_campaigns: float = Field(..., description="The sum of budgets for all of a manager's currently active campaigns.")
+class PayoutForecastOutput(BaseModel):
+    forecastedAmount: float
+    commentary: str
+class ContentQualityRequest(BaseModel):
+    caption: str = Field(..., description="The caption text to be analyzed.")
+class ContentQualityScore(BaseModel):
+    readability: int
+    engagement: int
+    call_to_action: int
+    hashtag_strategy: int
+class ContentQualityResponse(BaseModel):
+    overall_score: float
+    scores: ContentQualityScore
+    feedback: str
+class CampaignForRanking(BaseModel):
+    id: int
+    description: Optional[str] = ""
+class InfluencerForRanking(BaseModel):
+    id: str
+    category: Optional[str] = "Fashion"
+    bio: Optional[str] = ""
+class RankCampaignsRequest(BaseModel):
+    influencer: InfluencerForRanking
+    campaigns: List[CampaignForRanking]
+class RankedCampaignResult(BaseModel):
+    campaign_id: int
+    score: float
+class RankCampaignsResponse(BaseModel):
+    ranked_campaigns: List[RankedCampaignResult]
+class CaptionAssistRequest(BaseModel):
+    caption: str
+    action: str = Field(..., description="Action to perform: 'improve', 'hashtags', or 'check_guidelines'")
+    guidelines: Optional[str] = None # For the 'check_guidelines' action
+class CaptionAssistResponse(BaseModel):
+    new_text: str
+class ForecastRequest(BaseModel):
+    budget: float
+    category: str
+    follower_count: int
+    engagement_rate: float
+class PerformanceForecast(BaseModel):
+    predicted_engagement_rate: float
+    predicted_reach: int
+class PayoutForecast(BaseModel):
+    estimated_earning: float
+class ForecastResponse(BaseModel):
+    performance: PerformanceForecast
+    payout: PayoutForecast
+class InfluencerKpiData(BaseModel):
+    totalReach: int
+    totalLikes: int
+    totalComments: int
+    avgEngagementRate: float
+    totalSubmissions: int
+class InfluencerAnalyticsSummaryResponse(BaseModel):
+    summary: str
+class PortfolioOption(BaseModel):
+    id: str
+    contentUrl: str
+    caption: Optional[str] = ""
+    likes: Optional[int] = 0 # Likes ko ab hum use karenge
+    campaign: dict
+class CuratePortfolioRequest(BaseModel):
+    submissions: List[PortfolioOption]
+class CuratePortfolioResponse(BaseModel):
+    featured_submission_ids: List[str]
+class EarningOpportunityRequest(BaseModel):
+    follower_count: int = Field(..., description="Influencer ke current followers")
+class Opportunity(BaseModel):
+    campaign_niche: str
+    content_format: str
+    estimated_score: float
+    commentary: str
+class EarningOpportunityResponse(BaseModel):
+    opportunities: List[Opportunity]
+class PostPerformanceRequest(BaseModel):
+    follower_count: int
+    caption_length: int
+    campaign_niche: str
+    content_format: str
+class PostPerformanceResponse(BaseModel):
+    predicted_likes: int
+    predicted_comments: int
+    feedback: str
+class AnomalyInsight(BaseModel):
+    influencer_id: str
+    influencer_name: str
+    insights: List[str]
+class RevenueForecastDatapoint(BaseModel):
+    month: str
+    predicted_revenue: float
+    trend: str
+class RevenueForecastResponse(BaseModel):
+    forecast: List[RevenueForecastDatapoint]
+    ai_commentary: str
+class InfluencerPerformanceStats(BaseModel):
+    avg_engagement_rate: float
+    on_time_submission_rate: float
+    avg_brand_rating: float
+    monthly_earnings: float
+class InfluencerPerformanceResponse(BaseModel):
+    performance_score: int
+class MatchDocument(BaseModel):
+    id: str
+    text: str
+    match_score: Optional[int] = None
+class RankBySimilarityRequest(BaseModel):
+    query: str
+    documents: List[MatchDocument]
+class RankBySimilarityResponse(BaseModel):
+    ranked_documents: List[MatchDocument]
+class ContentQualityRequest(BaseModel):
+    caption: str = Field(..., description="The caption text to be analyzed.")
+class ContentQualityScore(BaseModel):
+    readability: int
+    engagement: int
+    call_to_action: int
+    hashtag_strategy: int
+class ContentQualityResponse(BaseModel):
+    overall_score: float
+    scores: ContentQualityScore
+    feedback: str
+class DailyBriefingData(BaseModel):
+    roster_size: int
+    on_bench_influencers: int
+    pending_submissions: int
+    revisions_requested: int
+    lowest_ai_score: Optional[int] = None
+    highest_pending_payout: float
+class DailyBriefingResponse(BaseModel):
+    briefing_text: str
+class ContractURL(BaseModel):
+    pdf_url: str
+class ContractSummary(BaseModel):
+    payment_details: str
+    deliverables: str
+    deadlines: str
+    exclusivity: str
+    ownership: str
+    summary_points: List[str]
+class InfluencerPerformanceStats(BaseModel):
+    """
+    Backend se aane wala data. Yeh 'helpers.py' ke 'get_real_stats_for_influencer' se match karta hai.
+    """
+    avg_engagement_rate: float
+    on_time_submission_rate: float
+    avg_brand_rating: float
+    monthly_earnings: float
+class InfluencerPerformanceResponse(BaseModel):
+    """
+    AI-service se wapas jaane wala jawab. Sirf ek score.
+    """
+    performance_score: int
+class AIGrowthPlanRequest(BaseModel):
+    """Backend se aane wala data, jismein influencer ki live stats hain."""
+    fullName: str
+    category: Optional[str] = None
+    avgEngagementRate: float
+    monthlyEarnings: float
+    onTimeSubmissionRate: float
+    bestPostCaption: Optional[str] = None
+    worstPostCaption: Optional[str] = None
+class AIGrowthPlanResponse(BaseModel):
+    """AI service se wapas jaane wala jawab."""
+    insights: List[str]
+class BrandAssetAnalysisRequest(BaseModel):
+    file_url: str = Field(..., description="URL of the logo or brand image")
+    asset_type: str = "logo"
+class BrandAssetAnalysisResponse(BaseModel):
+    dominant_colors: List[str]
+class ServiceBlueprintRequest(BaseModel):
+    service_type: str = Field(..., description="e.g., 'web-dev' or 'growth'")
+    requirements: str = Field(..., min_length=10)
+class ServiceBlueprintResponse(BaseModel):
+    title: str
+    deliverables: List[str]
+    stack: str
+    price_est: str
+    timeline: str
+class GrowthPlanRequest(BaseModel):
+    platform_handle: str
+    goals: str
+    challenges: str
+class AISummaryJobRequest(BaseModel):
+    checkin_id: int
+    raw_text: str
+class WeeklyCheckinSummaryResponse(BaseModel):
+    wins: List[str]
+    challenges: List[str]
+    opportunities: List[str]
+    sentiment: str
+class WeeklyPlanContext(BaseModel):
+    niche: str
+    current_mood: str
+    recent_achievements: List[str]
+    active_trends: List[Dict[str, str]]
+class WeeklyPlanRequest(BaseModel):
+    context: WeeklyPlanContext
+class PlanOption(BaseModel):
+    type: str
+    title: str
+    platform: str
+    contentType: str
+    instructions: str
+    reasoning: str
+class WeeklyPlanResponse(BaseModel):
+    options: List[PlanOption]
+app = FastAPI(title="Reachify AI Service (Deploy-Ready)", version="11.0.0")
+@app.on_event("startup")
+def startup_event():
+    global _llm_instance, _ai_strategist, _support_agent, _vector_store, \
+           _budget_predictor, _influencer_matcher, _performance_predictor, _payout_forecaster, \
+           _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
+           _revenue_forecaster, _performance_scorer
+    print("--- 🚀 AI Service Starting Up (Hugging Face Mode)... ---")
+    # === FIX #3: The Model Download and Loading Logic ===
+    try:
+        # Step 1: Download the model if it doesn't exist
+        os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
+        if not os.path.exists(LLAMA_MODEL_PATH):
+            print(f"   - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
+            hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename=MODEL_FILENAME,
+                local_dir=MODEL_SAVE_DIRECTORY,
+                local_dir_use_symlinks=False # This is safer for containers
+            )
+            print("   - ✅ Model downloaded successfully.")
+        else:
+            print(f"   - LLM model found at {LLAMA_MODEL_PATH}. Skipping download.")
+        # Step 2: Now that the file is guaranteed to be there, load it.
+        print("   - Loading Llama LLM into memory from downloaded file...")
+        _llm_instance = Llama(
+            model_path=LLAMA_MODEL_PATH,
+            n_gpu_layers=0,  # Ensure CPU usage on free tier
+            n_ctx=2048,
+            verbose=False,
+            use_mmap=False
+        )
+        print("   - ✅ LLM Loaded into Memory on CPU.")
+    except Exception as e:
+        print(f"   - ❌ FATAL ERROR: Could not download or load LLM model: {e}")
+        traceback.print_exc()
+        _llm_instance = None # Ensure it is None if it fails
+    # =========================================================
+    # --- All the rest of your startup logic remains EXACTLY THE SAME ---
+    if VectorStore:
+        try:
+            _vector_store = VectorStore()
+            print("   - ✅ RAG Engine Ready.")
+        except Exception:
+            _vector_store = None
+    else:
+        _vector_store = None
+    print("   - Initializing AI Strategist...")
+    _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
+    print("   - ✅ AI Strategist ready.")
+    print("   - Initializing Support Agent...")
+    _support_agent = SupportAgent(
+        llm_instance=_llm_instance,
+        embedding_path=EMBEDDING_MODEL_PATH,
+        db_path=DB_PATH
+    )
+    print("   - ✅ Support Agent ready.")
+    print("   - Loading ML models from joblib files...")
+    model_paths = {
+        'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
+        'matcher': ('_influencer_matcher', 'influencer_matcher_v1.joblib'),
+        'performance': ('_performance_predictor', 'performance_predictor_v1.joblib'),
+        'payout': ('_payout_forecaster', 'payout_forecaster_v1.joblib'),
+        'earnings': ('_earnings_optimizer', 'earnings_model.joblib'),
+        'earnings_encoder': ('_earnings_encoder', 'earnings_encoder.joblib'),
+        'likes_predictor': ('_likes_predictor', 'likes_predictor_v1.joblib'),
+        'comments_predictor': ('_comments_predictor', 'comments_predictor_v1.joblib'),
+        'revenue_forecaster': ('_revenue_forecaster', 'revenue_forecaster_v1.joblib'),
+        'performance_scorer': ('_performance_scorer', 'performance_scorer_v1.joblib'),
+    }
+    for name, (var, file) in model_paths.items():
+        path = os.path.join(MODELS_DIR, file)
+        try:
+            globals()[var] = joblib.load(path)
+            print(f"     - Loaded {name} model.")
+        except FileNotFoundError:
+            globals()[var] = None
+            print(f"   - ⚠️ WARNING: Model '{name}' not found at {path}. Endpoint disabled.")
+    print("   - Initializing Text Embedding Model...")
+    load_embedding_model(EMBEDDING_MODEL_PATH)
+    print("\n--- ✅ AI Service is fully operational! ---")
+@app.get("/", summary="Health Check")
+def read_root():
+    return {"status": "AI Service is running"}
+def _cleanup_llm_response(data: dict) -> dict:
+    """A robust helper to clean common messy JSON outputs from smaller LLMs."""
+    cleaned = { "wins": [], "challenges": [], "opportunities": [], "sentiment": "Mixed" } # Default to Mixed
+    # Clean list-based fields
+    for key in ["wins", "challenges", "opportunities"]:
+        if key in data and isinstance(data[key], list):
+            for item in data[key]:
+                if isinstance(item, str) and item: # Check if string is not empty
+                    cleaned[key].append(item.strip())
+                elif isinstance(item, dict) and 'text' in item and isinstance(item['text'], str) and item['text']:
+                    cleaned[key].append(item['text'].strip())
+    # Clean sentiment field
+    sentiment_data = data.get("sentiment")
+    if isinstance(sentiment_data, str) and sentiment_data:
+        # Sometimes model sends "Positive." with a period, strip it.
+        cleaned["sentiment"] = sentiment_data.strip().replace('.', '')
+    elif isinstance(sentiment_data, dict):
+        if sentiment_data.get('positive'): cleaned["sentiment"] = "Positive"
+        elif sentiment_data.get('negative'): cleaned["sentiment"] = "Negative"
+        else: cleaned["sentiment"] = "Mixed"
+    return cleaned
+def process_summary_in_background(checkin_id: int, raw_text: str):
+    """
+    This is the long-running background task.
+    This version has the final, official prompt format for the Phi-2 model.
+    """
+    print(f"   - ⚙️ BACKGROUND JOB STARTED for check-in ID: {checkin_id}")
+    # Each background task needs its own Supabase client.
+    supabase = get_supabase_client()
+    if not _llm_instance:
+        print(f"   - ❌ JOB FAILED ({checkin_id}): LLM instance was not available during background task.")
+        supabase.table("influencer_weekly_checkins").update({
+            "status": "failed", "error_message": "AI model was not loaded."
+        }).eq("id", checkin_id).execute()
+        return
+    # --- THE OFFICIAL & FINAL PROMPT FOR PHI-2 ---
+    # This format is what the model is trained to understand.
+    # It's direct, simple, and gives the instruction *after* the text.
+    final_prompt = f'''Text: """{raw_text}"""
+Instruct: Analyze the text above and extract key points into a single, valid JSON object with the keys "wins", "challenges", "opportunities", and "sentiment".
+- "wins" should contain 1-2 positive sentences.
+- "challenges" should contain 1-2 negative sentences.
+- "opportunities" should contain 1-2 new ideas.
+- DO NOT repeat sentences across categories.
+- "sentiment" must be ONE word: "Positive", "Negative", or "Mixed".
+Your entire response must ONLY be the JSON object, starting with {{ and ending with }}.
+Output:
+'''
+    try:
+        # Call the pre-loaded Llama instance
+        output = _llm_instance(
+            final_prompt,
+            max_tokens=1024,
+            temperature=0.0,
+            top_p=0.95,
+            top_k=40,
+            repeat_penalty=1.1,
+            stop=["Instruct:", "Text:", "Output:"], # Strict stop tokens matching the prompt
+            echo=False
+        )
+        # Get the raw text from the model's response
+        raw_response_text = output['choices'][0]['text'].strip()
+        print(f"   - 🤖 JOB ({checkin_id}): Official Phi-2 Raw Response:\n---\n{raw_response_text}\n---")
+        # The robust JSON parser
+        json_match = re.search(r'\{.*\}', raw_response_text, re.DOTALL)
+        if not json_match:
+            raise ValueError("No valid JSON object found in the LLM's response. The model may have returned plain text.")
+        # Extract the JSON string and parse it
+        clean_json_text = json_match.group(0)
+        summary_data_raw = json.loads(clean_json_text)
+        # The final cleanup helper to ensure correct formatting
+        cleaned_summary = _cleanup_llm_response(summary_data_raw)
+        # SUCCESS: Update the database with the result and 'completed' status.
+        print(f"   - ✅ JOB ({checkin_id}): COMPLETED. Updating database with: {cleaned_summary}")
+        supabase.table("influencer_weekly_checkins").update({
+            "structured_summary": cleaned_summary,
+            "status": "completed"
+        }).eq("id", checkin_id).execute()
+    except Exception as e:
+        error_message = f"AI model failed: {str(e)}"
+        print(f"   - ❌ JOB FAILED for check-in ID: {checkin_id}. Error: {error_message}")
+        import traceback
+        traceback.print_exc()
+        supabase.table("influencer_weekly_checkins").update({
+            "status": "failed",
+            "error_message": error_message
+        }).eq("id", checkin_id).execute()
+@app.post("/generate-chat-response", response_model=ChatResponsePayload, summary="Interactive AI Strategist Chat")
+async def generate_chat_response_route(request: ChatResponseRequest):
+    print(f"\n✅ Received request on /generate-chat-response")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="The AI Strategist is not available.")
+    try:
+        response_text = _ai_strategist.generate_chat_response(prompt=request.prompt, context=request.context)
+        return ChatResponsePayload(response=response_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/v1/chat", response_model=ChatAnswer, summary="Role-Aware AI Support Agent")
+async def ask_support_agent(query: ChatQuery):
+    if not _support_agent: raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
+    return _support_agent.answer(payload=query.model_dump(), conversation_id=query.conversationId)
+@app.post("/api/v1/generate/caption", response_model=CaptionResponse, summary="Generate variations of a caption")
+async def generate_caption_route(request: CaptionRequest):
+    if not _support_agent: raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
+    new_caption_text = _support_agent.generate_caption_variant(caption=request.caption, action=request.action)
+    return CaptionResponse(new_caption=new_caption_text)
+@app.post("/generate-strategy", response_model=StrategyResponse, summary="Generate a Digital Marketing Strategy")
+async def generate_strategy_route(request: StrategyRequest):
+    if not _support_agent:
+        raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
+    try:
+        strategy_text = _support_agent.generate_marketing_strategy(prompt=request.prompt)
+        return StrategyResponse(response=strategy_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"An internal error occurred in the AI model: {e}")
+@app.post("/api/v1/predict/budget", response_model=BudgetResponse, summary="Predict Campaign Budget")
+async def predict_budget(request: BudgetRequest):
+    if not _budget_predictor: raise HTTPException(status_code=503, detail="Budget predictor is not available.")
+    input_data = pd.DataFrame([request.model_dump()])
+    prediction = _budget_predictor.predict(input_data)[0]
+    return BudgetResponse(predicted_budget_usd=round(prediction, 2))
+@app.post("/api/v1/match/influencers", response_model=MatcherResponse, summary="Match Influencers to Campaign")
+async def match_influencers(request: MatcherRequest):
+    if not _influencer_matcher: raise HTTPException(status_code=503, detail="Influencer matcher is not available.")
+    input_data = pd.DataFrame([request.model_dump()])
+    prediction = _influencer_matcher.predict(input_data)
+    integer_ids = [int(pid) for pid in prediction]
+    return MatcherResponse(suggested_influencer_ids=integer_ids)
+@app.post("/api/v1/predict/performance", response_model=PerformanceResponse, summary="Predict Campaign Performance")
+async def predict_performance(request: PerformanceRequest):
+    if not _performance_predictor: raise HTTPException(status_code=503, detail="Performance predictor is not available.")
+    input_data = pd.DataFrame([request.model_dump()])
+    prediction_value = _performance_predictor.predict(input_data)[0]
+    return PerformanceResponse(predicted_engagement_rate=0.035, predicted_reach=int(prediction_value))
+@app.post("/generate-outline", response_model=OutlineResponse, summary="Generate a Blog Post Outline")
+async def generate_outline_route(request: OutlineRequest):
+    if not _support_agent:
+        raise HTTPException(status_code=503, detail="AI Support Agent is not available.")
+    try:
+        outline_text = _support_agent.generate_content_outline(title=request.title)
+        return OutlineResponse(outline=outline_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"An internal error occurred in the AI model: {e}")
+@app.post("/generate-dashboard-insights", response_model=StrategyResponse, summary="Generate Insights from Dashboard KPIs")
+@cached_response  # <--- ✨ NEW: Speed Booster Logic ✨
+async def generate_dashboard_insights_route(request: DashboardInsightsRequest):
+    print(f"\n✅ Received request on /generate-dashboard-insights with data: {request.model_dump()}")
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="The Llama model is not available.")
+    # Existing logic remains 100% SAME
+    kpis = request.model_dump()
+    prompt = f"""
+[SYSTEM]
+You are a senior data analyst at Reachify. You are writing a short, insightful summary for the agency's admin. Identify the most important trends from the week's KPIs. Write 2-3 human-readable bullet points. Be proactive and suggest an action.
+[THIS WEEK'S KPI DATA]
+- Revenue This Month (so far): ${kpis.get('total_revenue_monthly', 0):.2f}
+- New Users This Week: {kpis.get('new_users_weekly', 0)}
+- Currently Active Campaigns: {kpis.get('active_campaigns', 0)}
+- Items Awaiting Approval: {kpis.get('pending_approvals', 0)}
+[YOUR INSIGHTFUL BULLET POINTS]
+- """
+    try:
+        print("--- Direct Call: Sending composed prompt to LLM...")
+        response = _llm_instance(prompt, max_tokens=250, temperature=0.7, stop=["[SYSTEM]", "Human:", "\n\n"], echo=False)
+        insight_text = response['choices'][0]['text'].strip()
+        if not insight_text.startswith('-'):
+            insight_text = '- ' + insight_text
+        print("--- Direct Call: Successfully received response from LLM.")
+        return StrategyResponse(response=insight_text)
+    except Exception as e:
+        print(f"🚨 AN ERROR OCCURRED DIRECTLY IN THE ENDPOINT:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/", summary="Health Check")
+def read_root():
+    return {"status": "Unified AI Service is running"}
+@app.post("/predict/time-series", response_model=TimeSeriesForecastResponse, summary="Forecast Time Series with Trend Analysis")
+def predict_time_series(request: TimeSeriesForecastRequest):
+    print(f"\n✅ Received smart forecast request with context: '{request.business_context}'")
+    if len(request.data) < 5:
+        raise HTTPException(status_code=400, detail="Not enough data. At least 5 data points required.")
+    try:
+        df = pd.DataFrame([item.model_dump() for item in request.data])
+        df['date'] = pd.to_datetime(df['date'])
+        df = df.set_index('date').asfreq('MS', method='ffill')
+        model = Holt(df['value'], initialization_method="estimated").fit(optimized=True)
+        forecast_result = model.forecast(steps=request.periods_to_predict)
+        smart_forecast_output = []
+        last_historical_value = df['value'].iloc[-1]
+        for date, predicted_val in forecast_result.items():
+            trend_label = "Stable"
+            commentary = None
+            percentage_change = ((predicted_val - last_historical_value) / last_historical_value) * 100
+            if percentage_change > 10:
+                trend_label = "Strong Growth"
+                if "by " in request.business_context:
+                    reason = request.business_context.split('by ')[-1]
+                    commentary = f"Strong growth expected, likely driven by {reason}"
+                else:
+                    commentary = "Strong growth expected due to positive trends."
+            elif percentage_change > 2:
+                trend_label = "Modest Growth"
+            elif percentage_change < -5:
+                trend_label = "Potential Downturn"
+                commentary = "Warning: A potential downturn is detected. This may not account for upcoming campaigns. Review your strategy."
+            smart_forecast_output.append(
+                SmartForecastDataPoint(
+                    date=date.strftime('%Y-%m-%d'),
+                    predicted_value=round(predicted_val, 2),
+                    trend=trend_label,
+                    commentary=commentary
+                )
+            )
+            last_historical_value = predicted_val
+        return TimeSeriesForecastResponse(forecast=smart_forecast_output)
+    except Exception as e:
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/generate-health-summary", response_model=HealthSummaryResponse, summary="Generates an actionable summary from KPIs")
+def generate_health_summary(request: HealthKpiRequest):
+    print(f"\n✅ Received request to generate health summary.")
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="LLM not available for summary.")
+    kpis = request.model_dump()
+    prompt = f"""
+[SYSTEM]
+You are a business analyst. Analyze these KPIs: Platform Revenue (₹{kpis.get('platformRevenue', 0):,.0f}), Active Campaigns ({kpis.get('activeCampaigns', 0)}). Provide one [PROGRESS] point and one [AREA TO WATCH] with a next action. Under 50 words.
+[YOUR ANALYSIS]
+"""
+    try:
+        response = _llm_instance(prompt, max_tokens=150, temperature=0.6, stop=["[SYSTEM]"], echo=False)
+        summary_text = response['choices'][0]['text'].strip()
+        print(f"   - ✅ Generated summary: {summary_text}")
+        return HealthSummaryResponse(summary=summary_text)
+    except OSError as e:
+        print(f"🚨 CRITICAL LLM CRASH CAUGHT (OSError): {e}. Returning a fallback message.")
+        traceback.print_exc()
+        return HealthSummaryResponse(summary="[AREA TO WATCH]: The AI analyst model is currently unstable and is being reviewed. Manual analysis is recommended.")
+    except Exception as e:
+        print(f"🚨 An unexpected error occurred during summary generation: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/generate_team_strategy", response_model=TeamStrategyResponse, summary="Generates a full campaign strategy for the internal team")
+def generate_team_strategy(request: TeamStrategyRequest):
+    """
+    This endpoint orchestrates the AI/ML logic for the Team Strategist tool.
+    It takes campaign details and a list of influencers from the backend.
+    """
+    print(f"\n✅ Received request on /generate_team_strategy for brand: {request.brand_name}")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist model is not available or failed to load.")
+    try:
+        # Step 1: Generate the creative brief using the LLM
+        creative_brief_dict = _ai_strategist.generate_campaign_brief(
+            brand_name=request.brand_name,
+            campaign_goal=request.campaign_goal,
+            target_audience=request.target_audience,
+            budget_range=request.budget_range
+        )
+        if "error" in creative_brief_dict:
+            raise Exception(f"LLM Error during brief generation: {creative_brief_dict['error']}")
+        # Step 2: Rank the provided influencers using the ML model
+        influencer_list_of_dicts = [inf.model_dump() for inf in request.influencers]
+        suggested_influencers_list = rank_influencers_by_match(
+            influencers=influencer_list_of_dicts,
+            campaign_details=request.model_dump(exclude={"influencers"}),
+            top_n=3
+        )
+        print("✅ Successfully generated brief and ranked influencers.")
+        return TeamStrategyResponse(
+            success=True,
+            strategy=CreativeBrief(**creative_brief_dict),
+            suggested_influencers=[InfluencerData(**inf) for inf in suggested_influencers_list]
+        )
+    except Exception as e:
+        print(f"🚨 An error occurred in /generate_team_strategy endpoint:")
+        traceback.print_exc()
+        return TeamStrategyResponse(success=False, error=str(e))
+@app.post("/strategist/generate-analytics-insights", response_model=AnalyticsInsightsResponse, summary="Generates Actionable Insights from Campaign Analytics")
+async def generate_analytics_insights_route(request: AnalyticsInsightsRequest):
+    """
+    Receives campaign analytics data and uses the AI Strategist to generate key insights.
+    """
+    print(f"\n✅ Received request on /strategist/generate-analytics-insights")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="The AI Strategist is not available.")
+    try:
+        # Pydantic model se data ko dictionary mein convert karein
+        analytics_data = request.model_dump()
+        # Naye function ko call karein
+        insights_text = _ai_strategist.generate_analytics_insights(analytics_data=analytics_data)
+        return AnalyticsInsightsResponse(insights=insights_text)
+    except Exception as e:
+        print(f"🚨 An error occurred in /strategist/generate-analytics-insights endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predictor/rank-influencers", response_model=InfluencerRankResponse, summary="Ranks a given list of influencers for a specific campaign")
+async def rank_influencers_route(request: InfluencerRankRequest):
+    """
+    Backend se campaign details aur sabhi influencers ki list leta hai,
+    aur ML model ka istemal karke top 3 ranked influencers wapas bhejta hai.
+    """
+    print(f"\n✅ Received request on /predictor/rank-influencers for campaign: '{request.campaign_details.description[:30]}...'")
+    # predictor.py se humara model loaded hai, humein use check karne ki zaroorat nahi
+    # kyunki wahan pehle se try-except block laga hua hai.
+    try:
+        # Step 1: Frontend se aaye Pydantic models ko saaf Python dictionaries mein badlein
+        influencers_list = [inf.model_dump() for inf in request.influencers]
+        campaign_details_dict = request.campaign_details.model_dump()
+        # Step 2: Humare predictor.py ke function ko call karein
+        ranked_list = rank_influencers_by_match(
+            influencers=influencers_list,
+            campaign_details=campaign_details_dict,
+            top_n=5 # Hum top 5 influencers bhejenge
+        )
+        # Step 3: Saaf jawab wapas bhejein
+        print(f"   - ✅ Successfully ranked {len(ranked_list)} influencers.")
+        return InfluencerRankResponse(ranked_influencers=ranked_list)
+    except Exception as e:
+        print(f"🚨 An error occurred in /predictor/rank-influencers endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/strategist/generate-weekly-summary", response_model=WeeklySummaryResponse, summary="Generates a Weekly Summary from Metrics")
+def generate_weekly_summary_route(request: WeeklySummaryRequest):
+    print(f"\n✅ Received request on the NEW /strategist/generate-weekly-summary endpoint.")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not initialized.")
+    try:
+        summary_text = _ai_strategist.generate_weekly_summary(metrics=request.model_dump())
+        if not summary_text or "error" in summary_text.lower():
+            raise Exception("AI model failed to generate a valid summary.")
+        return WeeklySummaryResponse(summary=summary_text)
+    except Exception as e:
+        print(f"🚨 An error occurred in /strategist/generate-weekly-summary: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict/payout_forecast", response_model=PayoutForecastOutput, summary="Predicts future influencer payouts for a manager")
+def predict_payout(data: PayoutForecastInput):
+    """
+    Predicts the estimated influencer payout for the next 30 days
+    based on the total budget of a manager's active campaigns.
+    """
+    print(f"\n✅ Received request on /predict/payout_forecast")
+    if not _payout_forecaster:
+        raise HTTPException(status_code=503, detail="Model is not available. Please train the payout forecaster model first.")
+    try:
+        # Prediction ke liye data ko sahi DataFrame format mein convert karo
+        input_df = pd.DataFrame([{'budget': data.total_budget_active_campaigns}])
+        # Prediction karo
+        prediction = _payout_forecaster.predict(input_df)[0]
+        # Ensure the prediction is never negative
+        forecasted_amount = max(0, float(prediction))
+        print(f"   - ✅ Generated payout forecast: {forecasted_amount}")
+        return {
+            "forecastedAmount": forecasted_amount,
+            "commentary": "Based on the total budget of your current active campaigns."
+        }
+    except Exception as e:
+        print(f"🚨 An error occurred in /predict/payout_forecast endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"An error occurred during prediction: {str(e)}")
+@app.post("/analyze/content_quality", response_model=ContentQualityResponse, summary="Analyzes a caption for a quality score")
+def analyze_content_quality(request: ContentQualityRequest):
+    """
+    Uses the loaded LLM to analyze a social media caption based on several criteria
+    and returns a quantitative score and qualitative feedback.
+    """
+    print(f"\n✅ Received request on /analyze/content_quality")
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="The Llama model is not available.")
+    caption = request.caption
+    # This is a very structured prompt that asks the LLM to act as a specialist
+    # and return a JSON object, which is easier and more reliable to parse.
+    prompt = f"""
+[SYSTEM]
+You are a social media expert. Analyze the following caption based on four criteria: Readability, Engagement, Call to Action (CTA), and Hashtag Strategy.
+For each criterion, provide a score from 1 (poor) to 10 (excellent).
+Also, provide a final overall score (average of the four scores) and short, actionable feedback.
+Respond ONLY with a valid JSON object in the following format:
+{{
+  "overall_score": <float>,
+  "scores": {{
+    "readability": <int>,
+    "engagement": <int>,
+    "call_to_action": <int>,
+    "hashtag_strategy": <int>
+  }},
+  "feedback": "<string>"
+}}
+[CAPTION TO ANALYZE]
+"{caption}"
+[YOUR JSON RESPONSE]
+"""
+    try:
+        print("--- Sending caption to LLM for quality analysis...")
+        response = _llm_instance(prompt, max_tokens=512, temperature=0.2, stop=["[SYSTEM]", "\n\n"], echo=False)
+        # Extract the JSON part of the response
+        json_text = response['choices'][0]['text'].strip()
+        # Find the start and end of the JSON object
+        start_index = json_text.find('{')
+        end_index = json_text.rfind('}') + 1
+        if start_index == -1 or end_index == 0:
+            raise ValueError("LLM did not return a valid JSON object.")
+        clean_json_text = json_text[start_index:end_index]
+        import json
+        analysis_result = json.loads(clean_json_text)
+        print("--- Successfully received and parsed JSON response from LLM.")
+        return ContentQualityResponse(**analysis_result)
+    except (json.JSONDecodeError, KeyError, ValueError) as e:
+        print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: {json_text}")
+        raise HTTPException(status_code=500, detail="Failed to parse the analysis from the AI model. The model may have returned an unexpected format.")
+    except Exception as e:
+        print(f"🚨 An unexpected error occurred during content analysis: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/rank/campaigns-for-influencer", response_model=RankCampaignsResponse, summary="Ranks a list of campaigns for one influencer")
+async def rank_campaigns_for_influencer_route(request: RankCampaignsRequest):
+    """
+    Takes an influencer's profile and a list of campaigns, uses the ML model
+    to predict a 'match score' for each, and returns the list ranked by that score.
+    """
+    print(f"\n✅ Received request on /rank/campaigns-for-influencer for influencer: {request.influencer.id}")
+    # 1. Security Check: Model loaded hai ya nahi?
+    if not _influencer_matcher:
+        raise HTTPException(status_code=503, detail="Influencer Matcher model is not available.")
+    if not request.campaigns:
+        return RankCampaignsResponse(ranked_campaigns=[])
+    try:
+        # 2. Data Preparation: Model ke liye DataFrame banayein
+        # Model ko wahi columns chahiye jin par woh train hua tha.
+        df_list = []
+        for campaign in request.campaigns:
+            df_list.append({
+                'influencer_category': request.influencer.category,
+                'influencer_bio': request.influencer.bio,
+                'campaign_description': campaign.description,
+                # Hum woh columns bhi denge jo is context me nahi hain, par model ko chahiye
+                'followers': 50000, # Ek average value
+                'engagement_rate': 0.04, # Ek acchi value
+                'country': 'USA', # Ek default value
+                'niche': request.influencer.category or 'lifestyle'
+            })
+        df_to_predict = pd.DataFrame(df_list)
+        # 3. 🔥 AI Prediction (The Missing Part) 🔥
+        # Model se har campaign ke liye ek score predict karwayein
+        print(f"   - Predicting scores for {len(df_to_predict)} campaigns...")
+        predicted_scores = _influencer_matcher.predict(df_to_predict)
+        # 4. Sorting & Ranking
+        # Campaigns ko unke score ke saath combine karein
+        results_with_scores = zip(request.campaigns, predicted_scores)
+        # Unhein score ke hisaab se sort karein (zyada score upar)
+        sorted_results = sorted(results_with_scores, key=lambda x: x[1], reverse=True)
+        # 5. Final Jawab (Response) taiyaar karein
+        output = [
+            RankedCampaignResult(campaign_id=camp.id, score=float(score))
+            for camp, score in sorted_results
+        ]
+        print(f"   - ✅ Successfully scored and ranked campaigns.")
+        return RankCampaignsResponse(ranked_campaigns=output)
+    except Exception as e:
+        print(f"🚨 An error occurred during campaign ranking:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/ai/assist/caption", response_model=CaptionAssistResponse, summary="Assists with writing or improving captions")
+async def caption_assistant_route(request: CaptionAssistRequest):
+    """
+    Takes a caption and performs an action (improve, suggest hashtags, etc.) using the LLM.
+    """
+    print(f"\n✅ Received request on /ai/assist/caption with action: {request.action}")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not available.")
+    try:
+        # _ai_strategist ke andar ek naya function banayenge
+        generated_text = _ai_strategist.get_caption_assistance(
+            caption=request.caption,
+            action=request.action,
+            guidelines=request.guidelines
+        )
+        return CaptionAssistResponse(new_text=generated_text)
+    except Exception as e:
+        print(f"🚨 An error occurred in /ai/assist/caption endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict/campaign-outcome", response_model=ForecastResponse, summary="Forecasts influencer performance and earnings for a campaign")
+async def predict_campaign_outcome(request: ForecastRequest):
+    """
+    Takes campaign and influencer stats and uses ML models to predict
+    performance (reach, engagement) and potential earnings.
+    """
+    print(f"\n✅ Received request on /predict/campaign-outcome")
+    if not _performance_predictor or not _payout_forecaster:
+        raise HTTPException(status_code=503, detail="Forecasting models are not available.")
+    try:
+        # ✅ THE FIX IS HERE: Create a single 'budget' column.
+        # Column names MUST match the training script's columns.
+        input_data = pd.DataFrame([{
+            'budget': request.budget,
+            'category': request.category,
+            'influencer_count': 1,
+            'platform': 'instagram',
+            'location': 'USA',
+            'followers': request.follower_count,
+            'engagement_rate': request.engagement_rate
+        }])
+        # --- Performance Prediction ---
+        print("   - Predicting performance...")
+        # ✅ THE FIX: Pass the columns the model ACTUALLY needs.
+        performance_model_cols = ['budget', 'influencer_count', 'platform', 'location', 'category']
+        reach_prediction = _performance_predictor.predict(input_data[performance_model_cols])[0]
+        engagement_prediction = request.engagement_rate * 100
+        perf_forecast = PerformanceForecast(
+            predicted_reach=int(reach_prediction),
+            predicted_engagement_rate=round(engagement_prediction, 2)
+        )
+        # --- Payout Prediction ---
+        print("   - Predicting payout...")
+        # This model only needs 'budget'
+        payout_prediction = _payout_forecaster.predict(input_data[['budget']])[0]
+        payout_forecast = PayoutForecast(
+            estimated_earning=max(0, float(payout_prediction))
+        )
+        print("   - ✅ Successfully generated forecasts.")
+        return ForecastResponse(performance=perf_forecast, payout=payout_forecast)
+    except Exception as e:
+        print(f"🚨 An error occurred during outcome prediction:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/ai/summarize/influencer-analytics", response_model=InfluencerAnalyticsSummaryResponse, summary="Generates a summary for the influencer's analytics page")
+async def summarize_influencer_analytics(request: InfluencerKpiData):
+    """
+    Takes an influencer's KPIs and uses the AI strategist to create an actionable summary.
+    """
+    print(f"\n✅ Received request on /ai/summarize/influencer-analytics")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not available.")
+    try:
+        # Pass the data as a dictionary to the strategist
+        summary_text = _ai_strategist.generate_influencer_analytics_summary(kpis=request.model_dump())
+        return InfluencerAnalyticsSummaryResponse(summary=summary_text)
+    except Exception as e:
+        print(f"🚨 An error occurred in the analytics summary endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/portfolio/curate-with-ai", response_model=CuratePortfolioResponse)
+def curate_portfolio_with_ai(request: CuratePortfolioRequest):
+    """
+    Accepts a list of approved submissions, scores them based on simple logic,
+    and returns the IDs of the best ones. THIS VERSION DOES NOT USE THE LLM.
+    """
+    print(f"\n✅✅✅ RUNNING FINAL, NON-LLM VERSION of Portfolio Curation ✅✅✅")
+    submissions = request.submissions
+    if not submissions:
+        return CuratePortfolioResponse(featured_submission_ids=[])
+    scored_submissions = []
+    for sub in submissions:
+        # Step 1: Ek score calculate karein
+        score = 0
+        # Likes ke liye points (sabse zaroori)
+        score += (sub.likes or 0) * 0.7
+        # Caption lamba hai to extra points
+        if sub.caption and len(sub.caption) > 100:
+            score += 100 # Ek boost
+        # Step 2: Har submission ko uske score ke saath save karein
+        scored_submissions.append({'id': sub.id, 'score': score})
+    # Step 3: Sabhi submissions ko score ke hisaab se sort karein
+    sorted_submissions = sorted(scored_submissions, key=lambda x: x['score'], reverse=True)
+    # Step 4: Sabse behtareen 5 submissions ko chunein (ya jitne bhi hain)
+    top_submissions = sorted_submissions[:5]
+    # Step 5: Sirf unki ID waapis bhejein
+    featured_ids = [sub['id'] for sub in top_submissions]
+    print(f"   - ✅ Scored and selected {len(featured_ids)} posts: {featured_ids}")
+    return CuratePortfolioResponse(featured_submission_ids=featured_ids)
+@app.post("/tasks/prioritize", response_model=TaskPrioritizationResponse)
+def prioritize_task(request: TaskPrioritizationRequest):
+    """
+    Analyzes a task's title and description to assign a priority level.
+    """
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="LLM model is not available.")
+    prompt = f"""
+[INST] You are an expert assistant for a social media influencer. Your job is to assign a priority to a new task based on its title. Use these rules:
+- If the task mentions "revise", "rejection", "feedback", "contract", or is a deadline, the priority is "high".
+- If the task is about a "new invitation", "new opportunity", or "message", the priority is "medium".
+- For anything else like "update profile", "explore campaigns", the priority is "low".
+Respond ONLY with one of the following words: high, medium, or low.
+Task Title: "{request.title}"
+[/INST]
+"""
+    try:
+        print(f"   - 🤖 Prioritizing task: '{request.title}'")
+        output = _llm_instance(prompt, max_tokens=10, stop=["[INST]"], echo=False)
+        # LLM se aaye response ko saaf karein
+        priority = output['choices'][0]['text'].strip().lower()
+        # Ek safety check, taaki LLM kuch galat na bhej de
+        if priority not in ['high', 'medium', 'low']:
+            print(f"   - ⚠️ LLM returned invalid priority: '{priority}'. Defaulting to 'medium'.")
+            priority = 'medium'
+        print(f"   - ✅ AI assigned priority: '{priority}'")
+        return TaskPrioritizationResponse(priority=priority)
+    except Exception as e:
+        print(f"   - ❌ An unexpected error occurred during task prioritization: {e}")
+        return TaskPrioritizationResponse(priority='medium')
+@app.post("/predict/earning-opportunities", response_model=EarningOpportunityResponse, summary="Finds the best earning opportunities for an influencer")
+async def predict_earning_opportunities(request: EarningOpportunityRequest):
+    """
+    Based on an influencer's follower count, the AI model estimates their
+    potential performance score across various campaign types. (SIMPLIFIED)
+    """
+    print(f"\n✅ Received request on /predict/earning-opportunities (SIMPLIFIED)")
+    if _earnings_optimizer is None or _earnings_encoder is None:
+        raise HTTPException(status_code=503, detail="Earning Optimizer model or encoder is not available.")
+    try:
+        # === ✨ THE FIX STARTS HERE ✨ ===
+        # Step 1: Create scenarios in a DataFrame
+        scenarios_list = [
+            {'campaign_niche': niche, 'content_format': c_format, 'follower_count': request.follower_count}
+            for niche in ['Tech', 'Fashion', 'Food', 'Gaming', 'General']
+            for c_format in ['Reel', 'Post', 'Story']
+        ]
+        df_scenarios = pd.DataFrame(scenarios_list)
+        # Step 2: Manually encode the categorical features using the saved encoder
+        print("   - Manually encoding data using saved encoder...")
+        categorical_features = ['campaign_niche', 'content_format']
+        encoded_cats = _earnings_encoder.transform(df_scenarios[categorical_features])
+        encoded_df = pd.DataFrame(encoded_cats, columns=_earnings_encoder.get_feature_names_out(categorical_features))
+        # Step 3: Combine with numerical features
+        numerical_features = df_scenarios[['follower_count']].reset_index(drop=True)
+        X_final_to_predict = pd.concat([encoded_df, numerical_features], axis=1)
+        # Step 4: Predict using the simple model
+        print(f"   - Predicting scores for {len(X_final_to_predict)} scenarios...")
+        predicted_scores = _earnings_optimizer.predict(X_final_to_predict)
+        # === ✨ THE FIX ENDS HERE ✨ ===
+        # ... (The result formatting part is exactly the same as before)
+        results = []
+        for i, scenario in enumerate(scenarios_list):
+            score = float(predicted_scores[i])
+            comment = "This could be a good opportunity."
+            if score > 0.75: comment = "Excellent Opportunity! Focus on this."
+            elif score < 0.4: comment = "May not be the best fit for you."
+            results.append(Opportunity(
+                campaign_niche=scenario['campaign_niche'],
+                content_format=scenario['content_format'],
+                estimated_score=score,
+                commentary=comment
+            ))
+        sorted_results = sorted(results, key=lambda x: x.estimated_score, reverse=True)
+        return EarningOpportunityResponse(opportunities=sorted_results[:5])
+    except Exception as e:
+        print("🚨 An error occurred in /predict/earning-opportunities endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict/post-performance", response_model=PostPerformanceResponse, summary="Predicts likes and comments for a new post")
+async def predict_post_performance(request: PostPerformanceRequest):
+    """
+    Takes details of a potential post and uses two ML models to predict the
+    number of likes and comments it might receive.
+    """
+    print(f"\n✅ Received request on /predict/post-performance")
+    if not _likes_predictor or not _comments_predictor:
+        raise HTTPException(status_code=503, detail="Performance prediction models are not available.")
+    try:
+        # Step 1: Prepare the input data in a DataFrame, just like during training
+        input_data = pd.DataFrame([request.model_dump()])
+        # Step 2: Use the models to predict
+        print("   - Predicting likes...")
+        predicted_likes_raw = _likes_predictor.predict(input_data)[0]
+        print("   - Predicting comments...")
+        predicted_comments_raw = _comments_predictor.predict(input_data)[0]
+        # Step 3: Clean the predictions (e.g., ensure they are not negative)
+        predicted_likes = max(0, int(predicted_likes_raw))
+        predicted_comments = max(0, int(predicted_comments_raw))
+        # Step 4: Generate simple, rule-based feedback
+        feedback_messages = []
+        if request.caption_length < 50:
+            feedback_messages.append("Consider writing a slightly longer caption to increase engagement.")
+        elif request.caption_length > 800:
+            feedback_messages.append("This is a long caption! Ensure the first line is very engaging.")
+        else:
+            feedback_messages.append("The caption length is good for engagement.")
+        if request.campaign_niche == 'General':
+            feedback_messages.append("Try to target a more specific niche in the future for better performance.")
+        feedback_text = " ".join(feedback_messages)
+        print("   - ✅ Successfully generated performance prediction and feedback.")
+        return PostPerformanceResponse(
+            predicted_likes=predicted_likes,
+            predicted_comments=predicted_comments,
+            feedback=feedback_text
+        )
+    except Exception as e:
+        print(f"🚨 An error occurred in the post-performance endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/analyze/performance-anomalies", response_model=List[AnomalyInsight], summary="Finds unusual performance trends for all influencers")
+def analyze_anomalies(supabase: Client = Depends(get_supabase_client)):
+    # This endpoint is heavy, so it should have security (e.g., requires an admin API key)
+    print("🤖 Running platform-wide Anomaly Detection...")
+    try:
+        # 1. Fetch historical data for all influencers from our new stats table
+        stats_res = supabase.table('daily_influencer_stats').select('*').order('date', desc=True).limit(5000).execute() # Get last ~5000 entries
+        profiles_res = supabase.table('profiles').select('id, full_name').eq('role', 'influencer').execute()
+        if not stats_res.data: return []
+        all_stats_df = pd.DataFrame(stats_res.data)
+        profiles_map = {p['id']: p['full_name'] for p in profiles_res.data}
+        all_insights = []
+        # 2. Loop through each influencer
+        for influencer_id, group in all_stats_df.groupby('profile_id'):
+            historical_df = group.sort_values('date')
+            today_stats = historical_df.iloc[-1].to_dict()
+            # 3. Call the Anomaly Detector AI
+            insights = find_anomalies(influencer_id, historical_df, today_stats)
+            if insights:
+                all_insights.append(AnomalyInsight(
+                    influencer_id=influencer_id,
+                    influencer_name=profiles_map.get(influencer_id, 'Unknown Influencer'),
+                    insights=insights
+                ))
+        return all_insights
+    except Exception as e:
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict/revenue-forecast", response_model=RevenueForecastResponse, summary="Generates a 3-month revenue forecast")
+async def predict_revenue_forecast():
+    """
+    (FAST VERSION) Uses the trained Holt's model to forecast revenue and adds simple commentary.
+    """
+    print(f"\n✅ Received request on /predict/revenue-forecast (FAST VERSION)")
+    if not _revenue_forecaster:
+        raise HTTPException(status_code=503, detail="Revenue forecasting model is not available.")
+    try:
+        # Step 1: Generate forecast (This is fast)
+        forecast_result = _revenue_forecaster.forecast(steps=3)
+        # Step 2: Format the output and add trend analysis (Also fast)
+        forecast_datapoints = []
+        last_historical_value = _revenue_forecaster.model.endog[-1]
+        for timestamp, predicted_value in forecast_result.items():
+            trend_label = "Stable"
+            percentage_change = ((predicted_value - last_historical_value) / last_historical_value) * 100
+            if percentage_change > 15: trend_label = "Strong Growth"
+            elif percentage_change > 5: trend_label = "Modest Growth"
+            elif percentage_change < -10: trend_label = "Potential Downturn"
+            forecast_datapoints.append(RevenueForecastDatapoint(
+                month=timestamp.strftime('%B %Y'),
+                predicted_revenue=round(predicted_value, 2),
+                trend=trend_label
+            ))
+            last_historical_value = predicted_value
+        # Step 3: Use simple, rule-based commentary (This is instant)
+        first_trend = forecast_datapoints[0].trend if forecast_datapoints else "Stable"
+        ai_commentary = "AI Insight: The forecast shows a stable outlook for the coming quarter."
+        if "Growth" in first_trend:
+            ai_commentary = "AI Insight: The model predicts a positive growth trend for the next quarter."
+        elif "Downturn" in first_trend:
+            ai_commentary = "AI Insight: A potential slowdown is predicted. It's a good time to review upcoming campaigns."
+        print("   - ✅ Successfully generated revenue forecast (fast method).")
+        return RevenueForecastResponse(
+            forecast=forecast_datapoints,
+            ai_commentary=ai_commentary
+        )
+    except Exception as e:
+        print(f"🚨 An error occurred in the revenue forecast endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict/influencer-performance", response_model=InfluencerPerformanceResponse, summary="Predicts a holistic performance score for an influencer")
+async def predict_influencer_performance(stats: InfluencerPerformanceStats):
+    """
+    Takes an influencer's key performance metrics and returns a single,
+    AI-generated performance score from 0-100.
+    """
+    print(f"\n✅ Received request on /predict/influencer-performance")
+    if not _performance_scorer:
+        raise HTTPException(status_code=503, detail="The Performance Scorer model is not available. Please train it first.")
+    try:
+        # Input data ko DataFrame mein convert karein, jaisa model ko chahiye
+        input_data = pd.DataFrame([stats.model_dump()])
+        # Model se prediction karein
+        score = _performance_scorer.predict(input_data)
+        # Score ko saaf karke 0-100 ke beech rakhein
+        predicted_score = max(0, min(100, int(score[0])))
+        print(f"   - ✅ Successfully predicted performance score: {predicted_score}")
+        return {"performance_score": predicted_score}
+    except Exception as e:
+        print(f"🚨 An error occurred in the influencer performance endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/v1/match/rank-by-similarity", response_model=RankBySimilarityResponse, summary="Generic endpoint to rank documents by text similarity")
+async def rank_by_similarity_endpoint(request: RankBySimilarityRequest):
+    print(f"\n✅ Received request on /v1/match/rank-by-similarity")
+    try:
+        documents_list = [doc.model_dump(exclude_unset=True) for doc in request.documents]
+        ranked_docs = rank_documents_by_similarity(query=request.query, documents=documents_list)
+        print(f"   - ✅ Successfully ranked {len(ranked_docs)} documents.")
+        return RankBySimilarityResponse(ranked_documents=ranked_docs)
+    except Exception as e:
+        print(f"🚨 An error occurred in the ranking endpoint:")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze/content-quality", response_model=ContentQualityResponse, summary="Analyzes a caption for a quality score")
+def analyze_content_quality(request: ContentQualityRequest):
+    """
+    Uses the loaded LLM to analyze a social media caption and returns a robustly parsed response.
+    """
+    print(f"\n✅ Received request on /analyze/content-quality")
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="The Llama model is not available.")
+    caption = request.caption
+    prompt = f"""
+[SYSTEM]
+You are a social media expert. Analyze the following caption... Respond ONLY with a valid JSON object in the following format:
+{{
+  "overall_score": <float>,
+  "scores": {{ "readability": <int>, "engagement": <int>, "call_to_action": <int>, "hashtag_strategy": <int> }},
+  "feedback": "<string>"
+}}
+[CAPTION TO ANALYZE]
+"{caption}"
+[YOUR JSON RESPONSE]
+"""
+    try:
+        print("--- Sending caption to LLM for quality analysis...")
+        response = _llm_instance(prompt, max_tokens=512, temperature=0.2, stop=["[SYSTEM]", "\n\n"], echo=False)
+        json_text = response['choices'][0]['text'].strip()
+        start_index = json_text.find('{')
+        end_index = json_text.rfind('}') + 1
+        if start_index == -1 or end_index == 0:
+            raise ValueError("LLM did not return a valid JSON object.")
+        clean_json_text = json_text[start_index:end_index]
+        import json
+        analysis_result_raw = json.loads(clean_json_text)
+        final_result = {
+            "overall_score": analysis_result_raw.get("overall_score"),
+            "feedback": analysis_result_raw.get("feedback"),
+            "scores": analysis_result_raw.get("scores") or analysis_result_raw.get("score")
+        }
+        print("--- Successfully received and parsed JSON response from LLM.")
+        return ContentQualityResponse(**final_result)
+    except (json.JSONDecodeError, KeyError, ValueError) as e:
+        print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: {json_text}")
+        raise HTTPException(status_code=500, detail="Failed to parse analysis from AI model.")
+    except Exception as e:
+        print(f"🚨 An unexpected error occurred during content analysis:")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/generate/daily-briefing", response_model=DailyBriefingResponse, summary="Generates a daily action plan for the Talent Manager")
+def generate_daily_briefing(data: DailyBriefingData):
+    """
+    Takes various KPIs from the backend, synthesizes them, and uses the LLM
+    to generate a short, actionable daily briefing for a Talent Manager.
+    """
+    print(f"\n✅ Received request on /generate/daily-briefing")
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="The Llama model is not available for briefing.")
+    # --- ✨ THE FINAL "IDIOT-PROOF" PROMPT FOR TINYLLAMA ---
+    final_prompt = f"""
+Summarize these key points into 2-3 direct bullet points for a manager.
+DATA:
+- Influencers without campaigns: {data.on_bench_influencers}
+- Submissions needing review: {data.pending_submissions + data.revisions_requested}
+- Total pending money: {data.highest_pending_payout:,.0f} INR
+SUMMARY:
+- """
+    try:
+        print("--- Sending briefing data to LLM (Idiot-Proof prompt)...")
+        # Temperature 0.1 karne se model aur zyada factual aur kam creative hoga
+        response = _llm_instance(final_prompt, max_tokens=150, temperature=0.1, stop=["DATA:"], echo=False)
+        briefing_text = response['choices'][0]['text'].strip()
+        # Add our own header to make it look nice
+        final_briefing = f"Here are your top priorities for today:\n- {briefing_text}"
+        print("--- Successfully generated daily briefing.")
+        return DailyBriefingResponse(briefing_text=final_briefing)
+    except Exception as e:
+        print(f"🚨 An unexpected error occurred during briefing generation:")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Failed to generate AI briefing.")
+@app.post("/summarize-contract", response_model=ContractSummary, summary="Analyzes a PDF contract and extracts key terms")
+def summarize_contract(request: ContractURL):
+    print(f"\n✅ Received request on /summarize-contract (v3 - ROBUST)")
+    if not _llm_instance:
+        raise HTTPException(status_code=503, detail="The Llama model is not available.")
+    try:
+        print("   - 📑 Parsing PDF from URL...")
+        contract_text = parse_pdf_from_url(request.pdf_url)
+        contract_text = contract_text[:4000] # Truncate
+        print(f"   - ✅ PDF parsed successfully. Truncated to {len(contract_text)} chars.")
+        final_prompt = f"""
+[INST]
+You are a legal analysis AI. Your task is to extract specific details from a contract. You MUST respond ONLY with a single, valid JSON object. Do not add any text before or after the JSON.
+**RULES FOR THE JSON VALUES:**
+1.  All values for "payment_details", "deliverables", "deadlines", "exclusivity", and "ownership" MUST be a single, plain string.
+2.  The value for "summary_points" MUST be a simple list of strings.
+3.  DO NOT use nested objects. DO NOT use nested lists. Summarize the content into plain text.
+[EXAMPLE of a GOOD RESPONSE]
+{{
+  "payment_details": "Client agrees to pay Influencer a total fee of $5,000 USD, payable in two installments.",
+  "deliverables": "Influencer must create 2 Instagram Reels and 5 Instagram Stories.",
+  "deadlines": "The deadline for all deliverables is October 30, 2024.",
+  "exclusivity": "Influencer agrees to an exclusivity period of 30 days post-campaign.",
+  "ownership": "The Client retains ownership of all created content.",
+  "summary_points": [
+    "Total payment is $5,000 USD.",
+    "Deliverables: 2 Reels, 5 Stories.",
+    "A 30-day exclusivity period applies after the campaign."
+  ]
+}}
+[/EXAMPLE]
+Now, based on these strict rules, analyze the following text:
+[CONTRACT TEXT]
+{contract_text}
+[/CONTRACT TEXT]
+[YOUR JSON RESPONSE]
+"""
+        print("   - 📞 Calling LLM with the new, stricter prompt...")
+        response = _llm_instance(
+            final_prompt,
+            max_tokens=1024,
+            temperature=0.0, # Set to 0 for maximum factuality
+            echo=False
+        )
+        raw_response_text = response['choices'][0]['text'].strip()
+        print("   - ⚙️ Parsing JSON response from LLM...")
+        try:
+            start_index = raw_response_text.find('{')
+            end_index = raw_response_text.rfind('}') + 1
+            clean_json_text = raw_response_text[start_index:end_index]
+            summary_data = json.loads(clean_json_text)
+        except Exception as e:
+            print(f"🚨 ERROR parsing LLM response: {e}. Raw response was: '{raw_response_text}'")
+            raise HTTPException(status_code=500, detail="Failed to parse analysis from the AI model.")
+        print("--- ✅ Successfully generated contract summary from LLM.")
+        # We now return the raw dictionary. FastAPI will validate it against our simple ContractSummary model.
+        return summary_data
+    except Exception as e:
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="An internal server error occurred in the AI.")
+@app.post("/predict/influencer-performance-score", response_model=InfluencerPerformanceResponse, summary="Predicts a holistic performance score for an influencer")
+async def predict_influencer_performance_score(stats: InfluencerPerformanceStats):
+    """
+    Backend se influencer ki stats leta hai aur pre-trained model ka use karke
+    ek performance score (0-100) return karta hai.
+    """
+    print(f"\n✅ Received request on /predict/influencer-performance-score")
+    # Safety Check: Kya model load hua tha startup par?
+    if _performance_scorer is None:
+        print("   - ❌ ERROR: The Performance Scorer model (_performance_scorer) is not loaded.")
+        raise HTTPException(
+            status_code=503,
+            detail="The Performance Scorer model is not available. Please ensure 'performance_scorer_v1.joblib' exists and is loaded."
+        )
+    try:
+        # Step 1: Backend se aaye data ko Pandas DataFrame mein badlo.
+        # Column ke naam training ke waqt use hue naamo se bilkul match hone chahiye.
+        input_data = pd.DataFrame([stats.model_dump()])
+        print(f"   - Input data for model: \n{input_data}")
+        # Step 2: Loaded model se prediction karo.
+        predicted_score_raw = _performance_scorer.predict(input_data)
+        # Step 3: Jawab ko saaf-suthra karo.
+        # Score ko integer banao aur 0 se 100 ke beech rakho.
+        predicted_score = max(0, min(100, int(predicted_score_raw[0])))
+        print(f"   - ✅ Successfully predicted performance score: {predicted_score}")
+        # Step 4: Sahi format mein jawab wapas bhejo.
+        return InfluencerPerformanceResponse(performance_score=predicted_score)
+    except Exception as e:
+        print(f"🚨 An error occurred in the /predict/influencer-performance-score endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/ai/coach/generate-growth-plan", response_model=AIGrowthPlanResponse, summary="Generates personalized growth tips for a single influencer")
+def generate_growth_plan_route(request: AIGrowthPlanRequest):
+    """
+    Backend se ek influencer ka live performance data leta hai aur LLM ka use karke
+    personalized improvement tips generate karta hai.
+    """
+    print(f"\n✅ Received request on /ai/coach/generate-growth-plan for: {request.fullName}")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not available.")
+    try:
+        # Pydantic model ko dictionary mein convert karke strategist ko bhejein
+        insights_list = _ai_strategist.generate_influencer_growth_plan(request.model_dump())
+        return AIGrowthPlanResponse(insights=insights_list)
+    except Exception as e:
+        print(f"🚨 An error occurred in the Growth Plan endpoint: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze/brand-asset-colors", response_model=BrandAssetAnalysisResponse, summary="Extracts dominant colors from a logo URL")
+def analyze_brand_asset_colors(request: BrandAssetAnalysisRequest):
+    """
+    Takes an image URL (logo/product), downloads it in memory,
+    and uses AI (KMeans Clustering) to extract the main brand colors.
+    """
+    print(f"\n✅ Received request on /analyze/brand-asset-colors")
+    try:
+        # Utility function call
+        colors = extract_colors_from_url(request.file_url)
+        print(f"   - ✅ Extracted colors: {colors}")
+        return BrandAssetAnalysisResponse(dominant_colors=colors)
+    except Exception as e:
+        print(f"🚨 An error occurred during color extraction:")
+        traceback.print_exc()
+        # Fail gracefully
+        return BrandAssetAnalysisResponse(dominant_colors=["#000000"])
+@app.post("/generate/service-blueprint", response_model=ServiceBlueprintResponse, summary="Generates an AI project plan for a service")
+async def generate_service_blueprint_route(request: ServiceBlueprintRequest):
+    """
+    Takes a service type and user requirements, then uses the AI Strategist
+    to generate a structured project plan (blueprint).
+    """
+    print(f"\n✅ Received request on /generate/service-blueprint for type: {request.service_type}")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not available.")
+    try:
+        # Call the new method in our strategist
+        blueprint_data = _ai_strategist.generate_service_blueprint(
+            service_type=request.service_type,
+            requirements=request.requirements
+        )
+        # Check if the AI returned an error internally
+        if "error" in blueprint_data:
+            raise HTTPException(status_code=500, detail=blueprint_data["error"])
+        return ServiceBlueprintResponse(**blueprint_data)
+    except HTTPException as http_exc:
+        # Re-raise known HTTP exceptions
+        raise http_exc
+    except Exception as e:
+        print(f"🚨 An unexpected error occurred in the blueprint endpoint:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="An internal server error occurred while generating the blueprint.")
+@app.post("/generate/growth-plan", response_model=ServiceBlueprintResponse, summary="Generates an AI management plan for an influencer")
+async def generate_growth_plan_route(request: GrowthPlanRequest):
+    """
+    Takes influencer goals and uses the AI Strategist to generate a growth plan.
+    """
+    print(f"\n✅ Naya Endpoint Hit: /generate/growth-plan for handle: {request.platform_handle}")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not available.")
+    try:
+        # Naye, alag function ko call karo
+        blueprint_data = _ai_strategist.generate_growth_plan(
+            platform_handle=request.platform_handle,
+            goals=request.goals,
+            challenges=request.challenges
+        )
+        if "error" in blueprint_data:
+            raise HTTPException(status_code=500, detail=blueprint_data["error"])
+        return ServiceBlueprintResponse(**blueprint_data)
+    except HTTPException as http_exc:
+        raise http_exc
+    except Exception as e:
+        print(f"🚨 Unexpected error in growth plan endpoint: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="An internal server error occurred.")
+@app.post("/submit_summary_job")
+def submit_summary_job(request: AISummaryJobRequest, background_tasks: BackgroundTasks):
+    """
+    Accepts a job, responds INSTANTLY, and runs the AI in the background.
+    """
+    print(f"   - ✅ Job accepted for check-in ID: {request.checkin_id}. Starting in background...")
+    background_tasks.add_task(process_summary_in_background, request.checkin_id, request.raw_text)
+    return {"message": "Job accepted", "checkin_id": request.checkin_id}
+@app.post("/generate/weekly-plan", response_model=WeeklyPlanResponse, summary="Generates 3 content tasks for an influencer")
+def generate_weekly_plan_route(request: WeeklyPlanRequest):  # <--- async hata diya
+    """
+    Takes influencer context (mood, niche, trends) and generates 3 tailored content options.
+    """
+    print(f"\n✅ Received request on /generate/weekly-plan")
+    if not _ai_strategist:
+        raise HTTPException(status_code=503, detail="AI Strategist is not available.")
+    try:
+        # Convert Pydantic model to dict
+        context_dict = request.context.model_dump()
+        # Call Strategist (Ab ye thread pool mein chalega)
+        plan_data = _ai_strategist.generate_weekly_content_plan(context_dict)
+        return WeeklyPlanResponse(**plan_data)
+    except Exception as e:
+        print(f"🚨 Error in weekly plan endpoint: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
+def creative_chat_endpoint(request: CreativeChatRequest):
+    """
+    Takes user message, history, and task context.
+    Returns a short, punchy, expert response using RAG + LLM.
+    """
+    try:
+        # Convert Pydantic history to list of dicts
+        history_list = [m.model_dump() for m in request.history]
+        response_text = director.chat(
+            user_message=request.message,
+            history=history_list,
+            task_context=request.task_context
+        )
+        return {"reply": response_text}
+    except Exception as e:
+        print(f"🚨 Creative Chat Error: {e}")
+        raise HTTPException(status_code=500, detail="AI Director is busy.")
+@app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
+def finalize_script_endpoint(request: FinalizeScriptRequest):
+    """
+    Summarizes the conversation into a shoot plan.
+    This version is ROBUST and handles messy LLM output.
+    """
+    try:
+        history_list = [m.model_dump() for m in request.history]
+        raw_text = director.generate_final_plan(
+            task_context=request.task_context,
+            history=history_list
+        )
+        print(f"   - 🤖 Raw Final Plan from LLM:\n---\n{raw_text}\n---")
+        # --- IDIOT-PROOF PARSING LOGIC ---
+        plan = { "hook": "", "script": "", "visuals": [], "tools": [] }
+        # Try to find JSON first
+        try:
+            import json
+            json_match = re.search(r'\{.*\}', raw_text, re.DOTALL)
+            if json_match:
+                parsed = json.loads(json_match.group(0))
+                plan["hook"] = parsed.get("hook", "")
+                plan["script"] = parsed.get("script", "")
+                plan["visuals"] = parsed.get("visuals", [])
+                plan["tools"] = parsed.get("tools", [])
+                # Agar ek bhi cheez mil gayi to return kar do
+                if plan["hook"] or plan["script"]:
+                    return FinalScriptResponse(**plan)
+        except:
+            pass # JSON parsing fail hua to aage badho
+        # Fallback to Regex if no JSON found
+        hook_match = re.search(r"Hook:?\s*\"(.*?)\"", raw_text, re.IGNORECASE)
+        script_match = re.search(r"Script:?\s*\"(.*?)\"", raw_text, re.IGNORECASE)
+        plan["hook"] = hook_match.group(1) if hook_match else "Start with a bang!"
+        # Agar script nahi mili to poora raw text hi script maan lo
+        plan["script"] = script_match.group(1) if script_match else raw_text
+        plan["visuals"] = ["Close up shot", "Wide shot"]
+        plan["tools"] = ["CapCut"]
+        return FinalScriptResponse(**plan)
+    except Exception as e:
+        print(f"🚨 Finalize Script Error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to generate final plan.")

core/__init__.py ADDED Viewed

File without changes

core/anomaly_detector.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# File: ai-service/core/anomaly_detector.py (NEW FILE)
+import pandas as pd
+from statsmodels.tsa.seasonal import seasonal_decompose
+def find_anomalies(influencer_id: str, historical_data: pd.DataFrame, today_stats: dict) -> list[str]:
+    insights = []
+    df = historical_data.copy()
+    if len(df) < 30: # Need at least 30 days of data for meaningful analysis
+        return ["Not enough historical data to analyze trends yet."]
+    df.set_index('date', inplace=True)
+    # Calculate 90-day averages
+    avg_engagement_90d = df['avg_engagement_rate'].tail(90).mean()
+    today_engagement = today_stats.get('avg_engagement_rate', 0)
+    # Anomaly 1: Performance Spikes/Dips
+    percentage_change = ((today_engagement - avg_engagement_90d) / avg_engagement_90d) * 100
+    if percentage_change > 100:
+        insights.append(f"🚀 High Performer Alert: Engagement rate spiked to {today_engagement:.2f}%, which is {percentage_change:.0f}% above the 90-day average. A recent post may be going viral.")
+    elif percentage_change < -50:
+        insights.append(f"⚠️ Performance Dip Alert: Engagement has dropped by {abs(percentage_change):.0f}%. It's worth checking in with this influencer.")
+    # Anomaly 2: Follower Growth
+    follower_change = today_stats.get('follower_count', 0) - df['follower_count'].tail(7).iloc[0]
+    if follower_change > 5000: # Example threshold
+        insights.append(f"📈 Follower Growth Spike: Gained {follower_change} followers this week. This is unusually high.")
+    return insights

core/creative_chat.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import sys
+from llama_cpp import Llama
+import json
+import re
+# Path setup to import VectorStore from the parent directory
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+sys.path.append(parent_dir)
+from core.rag.store import VectorStore
+class CreativeDirector:
+    def __init__(self):
+        """Initialize Model and Memory once to save time."""
+        # Using TinyLlama as it is faster on CPU
+        model_path = os.path.join(parent_dir, "llm_model", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"❌ Model not found at: {model_path}. Please check the llm_model folder.")
+        print("🧠 Loading AI Director (TinyLlama - SUPER FAST MODE)...")
+        self.llm = Llama(
+            model_path=model_path,
+            n_ctx=512,
+            n_batch=32,
+            n_threads=4,
+            verbose=False
+        )
+        self.memory = VectorStore(collection_name="creative_mind")
+        print("✅ AI Director is Online.")
+    def chat(self, user_message: str, history: list, task_context: str):
+        """Main Chat Logic with RAG, optimized for speed."""
+        print(f"   - 🧠 Thinking...")
+        retrieved_docs = self.memory.search(user_message, n_results=1)
+        expert_knowledge = retrieved_docs[0][:150] if retrieved_docs else "Be creative and direct."
+        prompt = f"""Instruction: Act as a Viral Content Expert. Give 1 short tip for "{task_context}".
+Context: {expert_knowledge}
+User: {user_message}
+Response:"""
+        try:
+            response = self.llm(
+                prompt,
+                max_tokens=50,
+                stop=["Instruction:", "User:", "\n\n"],
+                temperature=0.7,
+                echo=False
+            )
+            reply = response['choices'][0]['text'].strip()
+            if not reply:
+                return "Try showing a 'before vs after' comparison. It always works!"
+            print(f"   - 🗣️ Reply: {reply}")
+            return reply
+        except Exception as e:
+            print(f"   - ❌ AI Chat Error: {e}")
+            return "My AI brain is a bit slow today. Please ask again!"
+    def generate_final_plan(self, task_context: str, history: list):
+        """Generates the final script using simple text fallback."""
+        print(f"   - 🎬 Generating final plan for: {task_context}")
+        conversation_summary = "\n".join([f"- {msg['content']}" for msg in history[-3:]])
+        # ✅ PROMPT: Ask for plain text with specific labels
+        prompt = f"""Instruction: Create a video script for "{task_context}".
+Chat Summary: {conversation_summary}
+Format your answer exactly like this:
+HOOK: (Write hook here)
+SCRIPT: (Write script here)
+VISUALS: (Write visuals here)
+TOOLS: (Write tools here)
+Response:"""
+        try:
+            response = self.llm(
+                prompt,
+                max_tokens=300,
+                stop=["Instruction:", "Response:"],
+                temperature=0.6,
+                echo=False
+            )
+            raw_text = response['choices'][0]['text'].strip()
+            print(f"   - 🤖 Raw Text: {raw_text}")
+            # ✅ ROBUST PARSING (Regex)
+            # Text me se 'HOOK:', 'SCRIPT:' dhund kar nikalenge
+            hook_match = re.search(r'HOOK:\s*(.*?)(?=\nSCRIPT:)', raw_text, re.DOTALL | re.IGNORECASE)
+            script_match = re.search(r'SCRIPT:\s*(.*?)(?=\nVISUALS:)', raw_text, re.DOTALL | re.IGNORECASE)
+            visuals_match = re.search(r'VISUALS:\s*(.*?)(?=\nTOOLS:)', raw_text, re.DOTALL | re.IGNORECASE)
+            tools_match = re.search(r'TOOLS:\s*(.*)', raw_text, re.DOTALL | re.IGNORECASE)
+            # Agar match na mile to poora text script me daal do
+            return {
+                "hook": hook_match.group(1).strip() if hook_match else "Start with a bang!",
+                "script": script_match.group(1).strip() if script_match else raw_text,
+                "visuals": [visuals_match.group(1).strip()] if visuals_match else ["Talking Head"],
+                "tools": [tools_match.group(1).strip()] if tools_match else ["CapCut"]
+            }
+        except Exception as e:
+            print(f"   - ❌ Final Plan Generation Error: {e}")
+            # Fallback JSON
+            return {
+                "hook": "Error generating plan.",
+                "script": "Please try again later.",
+                "visuals": [],
+                "tools": []
+            }
+# Create a single instance to be used by the API
+director = CreativeDirector()

core/document_parser.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# FILE: ai-service/core/document_parser.py
+import fitz  # PyMuPDF library
+import requests
+import io
+def parse_pdf_from_url(pdf_url: str) -> str:
+    """
+    Downloads a PDF from a URL, extracts all text, and returns it as a single string.
+    """
+    print(f"   - 📑 Downloading and parsing PDF from URL...")
+    try:
+        # Step 1: Download the PDF content from the URL
+        response = requests.get(pdf_url, timeout=30)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        pdf_bytes = response.content
+        # Step 2: Open the PDF from memory using PyMuPDF
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        full_text = ""
+        # Step 3: Iterate through each page and extract text
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
+            full_text += page.get_text("text") + "\n\n"
+        doc.close()
+        print(f"   - ✅ PDF parsed successfully. Total characters: {len(full_text)}")
+        return full_text
+    except requests.exceptions.RequestException as e:
+        print(f"   - ❌ FAILED to download PDF: {e}")
+        raise ConnectionError(f"Could not download the file from the provided URL: {pdf_url}") from e
+    except Exception as e:
+        print(f"   - ❌ FAILED to parse PDF: {e}")
+        raise ValueError("The provided file could not be parsed as a valid PDF.") from e

core/guardrails/safety.py ADDED Viewed

	@@ -0,0 +1,23 @@

+class SafetyGuard:
+    # Is list ko baad mein badha sakte ho
+    BLACKLIST_WORDS = {
+        "ignore instructions", "system prompt", "password", "secret key",
+        "hack", "bypass"
+    }
+    @staticmethod
+    def validate_input(text: str) -> bool:
+        """Input clean hai ya nahi check karna."""
+        if not text or len(text.strip()) == 0:
+            return False
+        lower_text = text.lower()
+        for word in SafetyGuard.BLACKLIST_WORDS:
+            if word in lower_text:
+                return False # Unsafe found
+        return True
+    @staticmethod
+    def sanitize(text: str) -> str:
+        """Extra spaces clean karna."""
+        return " ".join(text.split())

core/inference/cache.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from cachetools import TTLCache
+import functools
+# Cache up to 100 items for 1 hour (3600 seconds)
+memory_cache = TTLCache(maxsize=100, ttl=3600)
+def cached_response(func):
+    """Decorator to cache function outputs."""
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        # Key unique hona chahiye, e.g., query string
+        try:
+            # Sirf pehle argument (query) ko key banate hain simple rakhne ke liye
+            cache_key = str(args[0]) if args else "default"
+            if cache_key in memory_cache:
+                # print(f"⚡ Using Cache for: {cache_key[:30]}...")
+                return memory_cache[cache_key]
+        except:
+            pass # Agar cache key banane me error aaye to ignore karo
+        result = func(*args, **kwargs)
+        # Cache save
+        if args:
+             cache_key = str(args[0])
+             memory_cache[cache_key] = result
+        return result
+    return wrapper

core/matcher.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# FILE: ai-service/core/matcher.py (FINAL VERSION)
+from sentence_transformers import SentenceTransformer, util
+import torch
+from typing import List, Dict, Any
+_embedding_model = None
+def load_embedding_model(model_path: str):
+    """Loads the sentence transformer model into memory."""
+    global _embedding_model
+    if _embedding_model is None:
+        print(f"   - 🧠 Loading embedding model from: {model_path}")
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        _embedding_model = SentenceTransformer(model_path, device=device)
+        print(f"   - ✅ Embedding model loaded successfully on '{device}'.")
+def rank_documents_by_similarity(query: str, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Ranks a list of documents based on their semantic similarity to a query.
+    """
+    global _embedding_model
+    if _embedding_model is None:
+        # This error is critical. If the model isn't loaded, nothing will work.
+        raise Exception("CRITICAL: Embedding model is not loaded. Please ensure load_embedding_model() is called on startup.")
+    if not documents:
+        return []
+    doc_texts = [doc.get('text', '') for doc in documents]
+    query_embedding = _embedding_model.encode(query, convert_to_tensor=True)
+    doc_embeddings = _embedding_model.encode(doc_texts, convert_to_tensor=True)
+    cosine_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
+    # ✨ THE FIX: We MUST add the 'match_score' to each document BEFORE sorting.
+    for i, doc in enumerate(documents):
+        doc['match_score'] = round(max(0, cosine_scores[i].item() * 100))
+    # Now, sort the documents which already have the 'match_score' key
+    sorted_documents = sorted(documents, key=lambda x: x.get('match_score', 0), reverse=True)
+    return sorted_documents

core/predictor.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# FILE: ai-service/core/predictor.py (REPLACE EVERYTHING IN YOUR FILE WITH THIS)
+import joblib
+import pandas as pd
+from typing import List, Dict
+print(">>> Loading ai-service/core/predictor.py (Version: FINAL, COMPLETE)")
+try:
+    # Model ko load karna
+    influencer_pipeline = joblib.load('models/influencer_matcher_v1.joblib')
+    print("--- Predictor: Influencer Matcher model loaded successfully. ---")
+except FileNotFoundError as e:
+    print(f"--- Predictor FATAL ERROR: Model file not found: {e}. Predictions will fail. ---")
+    influencer_pipeline = None
+# Performance predictor ko bhi yahan theek se load karte hain
+try:
+    performance_pipeline = joblib.load('models/performance_predictor_v1.joblib')
+    print("--- Predictor: Performance Predictor model loaded successfully. ---")
+except FileNotFoundError:
+    performance_pipeline = None
+def rank_influencers_by_match(influencers: List[Dict], campaign_details: Dict, top_n: int = 5) -> List[Dict]:
+    """
+    Influencers ko rank karta hai, model ko saari zaroori details dekar.
+    """
+    print(f"--- Predictor Skill: Ranking {len(influencers)} influencers...")
+    if not influencers or influencer_pipeline is None:
+        return []
+    try:
+        # Step 1: Influencers ki list se DataFrame banayein
+        influencer_df = pd.DataFrame(influencers)
+        # === ✨ YEH AAKHRI AUR SABSE ZAROORI FIX HAI ✨ ===
+        # Model ko woh saari jaankari de rahe hain jo use chahiye.
+        # 1. Influencer ki taraf se aane wali jaankari
+        features = influencer_df[['category', 'bio']].copy()
+        # 2. Campaign ki taraf se aane wali jaankari (jo har influencer ke liye same hogi)
+        # Hum default values bhi de rahe hain agar backend se data na aaye
+        features['niche'] = campaign_details.get('category', '') # Assume campaign category maps to niche
+        features['country'] = campaign_details.get('location', 'USA')
+        features['followers'] = campaign_details.get('followers', 10000)
+        features['engagement_rate'] = campaign_details.get('engagement_rate', 0.03)
+        print(f"--- Predictor Skill: Preparing features for model: {features.columns.to_list()}")
+        # Step 2: Sahi features ke saath predict karein
+        match_scores = influencer_pipeline.predict(features)
+        influencer_df['match_score'] = match_scores
+        # Step 3: Score ke hisaab se sort karke top influencers nikalein
+        top_influencers_df = influencer_df.sort_values(by='match_score', ascending=False).head(top_n)
+        # Step 4: Jawab ko saaf-suthre format mein wapas bhejein
+        result_cols = ['id', 'name', 'handle', 'followers', 'category', 'bio']
+        # Jo columns exist nahi karte, unhein a gracefully handle karein
+        final_cols = [col for col in result_cols if col in top_influencers_df.columns]
+        results = top_influencers_df[final_cols].to_dict(orient='records')
+        print(f"--- Predictor Skill: Successfully ranked and returning top {len(results)} influencers.")
+        return results
+    except Exception as e:
+        print(f"--- Predictor Skill ERROR: Failed during prediction process. Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return []
+def predict_performance(data: dict) -> int:
+    """
+    Campaign details ke aadhar par 'final_reach' predict karta hai.
+    """
+    if performance_pipeline is None:
+        return 800000 # Fallback value agar model load na ho
+    df = pd.DataFrame(data, index=[0])
+    prediction = performance_pipeline.predict(df)
+    return int(prediction[0])

core/rag/store.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import chromadb
+from chromadb.config import Settings
+import os
+# Path to save the database inside ai-service/data
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+DB_PATH = os.path.join(BASE_DIR, "data", "chroma_db")
+class VectorStore:
+    def __init__(self, collection_name="platform_knowledge"):
+        """Initialize persistent ChromaDB client."""
+        # Folder create agar nahi hai to
+        os.makedirs(DB_PATH, exist_ok=True)
+        self.client = chromadb.PersistentClient(path=DB_PATH)
+        # Create or get collection
+        self.collection = self.client.get_or_create_collection(name=collection_name)
+    def add_text(self, text_chunks, metadatas, ids):
+        """Text data ko DB mein save karna."""
+        try:
+            self.collection.upsert(
+                documents=text_chunks,
+                metadatas=metadatas,
+                ids=ids
+            )
+            return True
+        except Exception as e:
+            print(f"[RAG Error] Failed to add text: {str(e)}")
+            return False
+    def search(self, query, n_results=2):
+        """Question ke hisaab se matching data lana."""
+        try:
+            results = self.collection.query(
+                query_texts=[query],
+                n_results=n_results
+            )
+            # Thoda sa formatting taaki clean data mile
+            if results['documents']:
+                return results['documents'][0] # Return list of matching texts
+            return []
+        except Exception as e:
+            print(f"[RAG Error] Search failed: {str(e)}")
+            return []

core/strategist.py ADDED Viewed

	@@ -0,0 +1,609 @@

+# FILE: ai-service/core/strategist.py (REPLACE EVERYTHING IN YOUR FILE WITH THIS)
+import traceback
+from typing import Dict, Any, List
+import json
+import re
+from llama_cpp import Llama
+try:
+    from core.guardrails.safety import SafetyGuard
+except ImportError:
+    SafetyGuard = None
+    print("⚠️ Safety module not found. Skipping checks.")
+class AIStrategist:
+    # Saaf __init__ function. Aapke code mein do the, maine ek kar diya hai.
+    def __init__(self, llm_instance: Llama, store=None):
+        if llm_instance is None:
+            raise ValueError("AIStrategist requires a valid Llama instance.")
+        self.llm = llm_instance
+        self.store = store # Vector DB Store
+        print("--- AIStrategist initialized successfully (RAG Ready). ---")
+    def generate_campaign_brief(self, brand_name: str, campaign_goal: str, target_audience: str, budget_range: str) -> Dict[str, Any]:
+        """
+        Generates a structured, JSON-formatted campaign brief and cleans the output.
+        """
+        print(f"--- Strategist Skill: Generating campaign brief for brand '{brand_name}'.")
+        prompt = f"""
+[SYSTEM]
+You are an expert campaign strategist. Your task is to generate a creative and actionable campaign brief in a valid JSON object format. Do not add any text before or after the JSON object.
+[CLIENT INPUT]
+- Brand Name: {brand_name}
+- Primary Goal: {campaign_goal}
+- Target Audience: {target_audience}
+- Budget: {budget_range}
+[YOUR TASK]
+Generate a JSON object with keys: "title", "description", "goal_kpi", and "content_guidelines" (as a list of strings).
+- "title": A short, catchy campaign title.
+- "description": A one-paragraph summary of the campaign's core idea.
+- "goal_kpi": The single most important Key Performance Indicator (KPI) for this goal.
+- "content_guidelines": A list of 3 creative content ideas for influencers.
+[JSON OUTPUT]
+"""
+        try:
+            response_dict = self.llm(
+                prompt,
+                max_tokens=700,
+                temperature=0.8,
+                stop=["[CLIENT INPUT]", "\n\n", "User:"],
+                echo=False
+            )
+            raw_text = response_dict['choices'][0]['text'].strip()
+            if '```json' in raw_text:
+                raw_text = raw_text.split('```json\n')[1].split('```')[0]
+            elif '{' in raw_text:
+                raw_text = raw_text[raw_text.find('{'):raw_text.rfind('}') + 1]
+            json_response = json.loads(raw_text)
+            print("--- Strategist Skill: Successfully parsed brief from LLM.")
+            if 'goal_kpi' in json_response and isinstance(json_response['goal_kpi'], list):
+                print("--- Strategist Skill: Cleaning up 'goal_kpi' field (list -> string).")
+                json_response['goal_kpi'] = json_response['goal_kpi'][0] if json_response['goal_kpi'] else 'N/A'
+            if 'title' in json_response and isinstance(json_response['title'], list):
+                json_response['title'] = json_response['title'][0] if json_response['title'] else 'AI Generated Title'
+            if 'description' in json_response and isinstance(json_response['description'], list):
+                json_response['description'] = json_response['description'][0] if json_response['description'] else 'AI Generated Description'
+            return json_response
+        except (json.JSONDecodeError, KeyError) as e:
+            response_content = locals().get('raw_text', 'No raw text available')
+            print(f"--- Strategist Skill FATAL ERROR: Failed to decode/parse JSON from model. Error: {e}. Raw output: '{response_content}'")
+            return {"error": "The AI model returned an invalid format. Please try again."}
+        except Exception as e:
+            print(f"--- Strategist Skill FATAL ERROR in generate_campaign_brief: {e}")
+            traceback.print_exc()
+            return {"error": "An internal error occurred in the AI model."}
+    def generate_strategy_from_prompt(self, user_prompt: str) -> str:
+        """
+        Generates a general strategy from a raw prompt.
+        """
+        print(f"--- Strategist Skill (General): Received prompt: '{user_prompt[:50]}...'")
+        try:
+            response = self.llm(
+                user_prompt,
+                max_tokens=750,
+                temperature=0.75,
+                stop=["User:", "Client:", "System:"],
+            )
+            generated_text = response['choices'][0]['text'].strip()
+            print("--- Strategist Skill (General): Received response from LLM.")
+            return generated_text
+        except Exception as e:
+            print(f"--- Strategist Skill (General) ERROR: {e}")
+            traceback.print_exc()
+            return "An error occurred in the AI model while generating the strategy."
+    def generate_weekly_summary(self, metrics: Dict[str, Any]) -> str:
+        """
+        Generates a concise, human-readable weekly summary from structured metrics data.
+        """
+        print(f"--- Strategist Skill (Summary): Received metrics for brand {metrics.get('brand_id')}")
+        prompt_template = f"""
+You are an expert digital marketing analyst writing a weekly summary for a client. Your tone should be positive, encouraging, and easy to understand. Do not use jargon. Focus on the key results and what they mean.
+Client's Performance Data for the week of {metrics.get('start_date')} to {metrics.get('end_date')}:
+- Total Ad Spend: ${metrics.get('total_ad_spend', 0):.2f}
+- Clicks from Ads: {metrics.get('total_clicks', 0)}
+- New Social Media Followers: {metrics.get('new_followers', 0)}
+- Top Performing Campaign this week: "{metrics.get('top_performing_campaign', 'N/A')}"
+Based on this data, write a short summary (about 3-4 sentences). Start with a positive opening and end with an encouraging closing statement.
+Summary:
+"""
+        print("--- Strategist Skill (Summary): Sending composed prompt to LLM...")
+        try:
+            # === INVOKE FIX #1 ===
+            response = self.llm(
+                prompt_template,
+                max_tokens=250,
+                temperature=0.6,
+                stop=["Client:", "Data:"],
+                echo=False
+            )
+            summary_text = response['choices'][0]['text'].strip()
+            print("--- Strategist Skill (Summary): Received response from LLM.")
+            if not summary_text:
+                return "The AI model returned an empty summary."
+            return summary_text
+        except Exception as e:
+            print(f"--- Strategist Skill (Summary) ERROR: {e}")
+            traceback.print_exc()
+            return "An error occurred in the AI model while generating the weekly summary."
+    def generate_chat_response(self, prompt: str, context: str) -> str:
+        """
+        RAG-Enabled Chat Response with Safety Checks
+        """
+        print(f"--- Strategist Skill (Chat): Processing: '{prompt}'")
+        # 1. SAFETY GUARDRAIL (Fail-Safe)
+        if SafetyGuard and not SafetyGuard.validate_input(prompt):
+            return "I cannot generate a response as the query contains restricted content."
+        # 2. RAG RETRIEVAL (Knowledge injection)
+        retrieved_knowledge = ""
+        if self.store:
+            try:
+                print("   - 🔍 Searching knowledge base...")
+                # Search DB for relevant context
+                kb_docs = self.store.search(prompt, n_results=1)
+                if kb_docs:
+                    retrieved_knowledge = f"\n[INTERNAL KNOWLEDGE]\n{kb_docs[0]}\n"
+            except Exception as e:
+                print(f"   - ⚠️ RAG Search Warning: {e}")
+        master_prompt = f"""
+[SYSTEM]
+You are a digital marketing strategist AI. Use the Context and Internal Knowledge below to answer the Client.
+[CONTEXT FROM DASHBOARD]
+{context}
+{retrieved_knowledge}
+[CLIENT'S QUESTION]
+{prompt}
+[YOUR RESPONSE]
+"""
+        try:
+            response = self.llm(
+                master_prompt,
+                max_tokens=500,
+                temperature=0.5,
+                stop=["[CLIENT'S QUESTION]", "[SYSTEM]"],
+                echo=False
+            )
+            return response['choices'][0]['text'].strip()
+        except Exception as e:
+            traceback.print_exc()
+            return "Internal error in Chat Module."
+    def generate_dashboard_insights(self, kpis: Dict[str, Any]) -> str:
+        print(f"--- Strategist Skill (Insights): Received KPIs: {kpis}")
+        prompt = f"""
+[SYSTEM]
+You are a senior data analyst at Reachify...
+[YOUR INSIGHTFUL BULLET POINTS]
+- """
+        try:
+            response = self.llm(prompt, max_tokens=250, temperature=0.7, stop=["[SYSTEM]", "Human:", "\n\n"], echo=False)
+            insight_text = response['choices'][0]['text'].strip()
+            if not insight_text.startswith('-'):
+                insight_text = '- ' + insight_text
+            print("--- Strategist Skill (Insights): Successfully received response from LLM.")
+            return insight_text
+        except Exception as e:
+            print(f"--- Strategist Skill (Insights) ERROR: {e}")
+            traceback.print_exc()
+            return "- Could not generate AI insights due to an internal model error."
+    def generate_analytics_insights(self, analytics_data: dict) -> str:
+        """
+        Takes campaign analytics data and generates 3 actionable insights using the LLM.
+        """
+        print(f"--- Strategist Skill (Analytics Insights): Received analytics data.")
+        prompt = f"""
+[SYSTEM]
+You are an expert Campaign Analyst...
+[YOUR ANALYSIS - 3 ACTIONABLE BULLET POINTS]
+-
+"""
+        print("--- Strategist Skill (Analytics Insights): Sending composed prompt to LLM...")
+        try:
+            response = self.llm(prompt, max_tokens=200, temperature=0.6, stop=["[SYSTEM]", "\n\n-"], echo=False)
+            insights_text = response['choices'][0]['text'].strip()
+            if not insights_text.startswith('-'):
+                insights_text = '- ' + insights_text
+            print("--- Strategist Skill (Analytics Insights): Successfully received and processed response.")
+            return insights_text
+        except Exception as e:
+            print(f"--- Strategist Skill (Analytics Insights) FATAL ERROR: {e}")
+            traceback.print_exc()
+            return "- AI insights could not be generated due to an internal model error."
+    def get_caption_assistance(self, caption: str, action: str, guidelines: str = None) -> str:
+        """
+        Provides AI assistance for writing captions based on a specified action.
+        """
+        print(f"--- Strategist Skill (Caption Assist): Received action: '{action}'")
+        system_prompt = "You are a helpful and creative social media marketing assistant for influencers. You are concise and direct."
+        if action == 'improve':
+            user_prompt = f"Make the following Instagram caption more engaging and impactful. Keep the core message but enhance the wording.\n\nOriginal:\n---\n{caption}\n\nImproved:"
+        elif action == 'hashtags':
+            user_prompt = f"Suggest a list of 7 relevant and trending hashtags for the following Instagram post. Provide ONLY the hashtags, starting with # and separated by spaces.\n\nPost Caption:\n---\n{caption}\n\nHashtags:"
+        elif action == 'check_guidelines' and guidelines:
+            user_prompt = f"Carefully check if the following caption meets ALL the rules in the provided guidelines. Be strict. First, respond with only 'YES' or 'NO'. Then, on a new line, explain which specific rules were broken, or confirm that all rules were followed.\n\nGuidelines:\n---\n{guidelines}\n\nCaption to Check:\n---\n{caption}\n\nAnalysis:"
+        else:
+            return "Invalid action or missing guidelines provided to the AI assistant."
+        full_prompt = f"[SYSTEM]\n{system_prompt}\n\n[USER]\n{user_prompt}\n\n[ASSISTANT]\n"
+        try:
+            print(f"   - Calling LLM for caption assistance (action: {action})...")
+            response = self.llm(
+                full_prompt,
+                max_tokens=256,
+                temperature=0.7,
+                stop=["[USER]", "[SYSTEM]"],
+                echo=False
+            )
+            generated_text = response['choices'][0]['text'].strip()
+            print(f"   - ✅ LLM generated response.")
+            return generated_text
+        except Exception as e:
+            print(f"--- Strategist Skill (Caption Assist) ERROR: {e}")
+            traceback.print_exc()
+            return "An error occurred while getting assistance from the AI."
+    def generate_influencer_analytics_summary(self, kpis: Dict[str, Any]) -> str:
+        """
+        Takes an influencer's KPIs and generates a short, encouraging, and actionable summary.
+        """
+        print(f"--- Strategist Skill (Influencer Analytics): Received KPIs for analysis.")
+        # ✅ THE FIX: A much stricter and more directive prompt.
+        prompt = f"""
+[SYSTEM]
+You are "Spark", a friendly AI Analyst for social media influencers.
+Your task is to write a 2-sentence summary of the user's performance.
+- Sentence 1: Start with a positive highlight from the data.
+- Sentence 2: Give ONE simple, actionable tip for what to do next.
+- BE CONCISE and encouraging. DO NOT explain what KPIs are. DO NOT use lists.
+[INFLUENCER'S DATA]
+- Engagement Rate: {kpis.get('avgEngagementRate', 0.0):.2f}%
+- Total Reach on approved posts: {kpis.get('totalReach', 0)}
+- Approved Posts: {kpis.get('totalSubmissions', 0)}
+[YOUR 2-SENTENCE SUMMARY]
+"""
+        try:
+            print("   - Calling LLM for influencer analytics summary (v2 prompt)...")
+            response = self.llm(
+                prompt,
+                max_tokens=100, # We only need a short response
+                temperature=0.7,
+                stop=["[SYSTEM]", "[USER]", "User:", "System:"],
+                echo=False
+            )
+            summary_text = response['choices'][0]['text'].strip()
+            # Extra cleanup to remove any unwanted AI chit-chat
+            if "\n" in summary_text:
+                summary_text = summary_text.split('\n')[0]
+            print("   - ✅ LLM generated summary successfully.")
+            return summary_text
+        except Exception as e:
+            print(f"--- Strategist Skill (Influencer Analytics) ERROR: {e}")
+            return "AI summary could not be generated at this time."
+    def generate_influencer_growth_plan(self, influencer_data: Dict[str, Any]) -> List[str]:
+        """
+        Influencer ke live data ko analyze karke personalized growth tips deta hai. (CRASH-PROOF VERSION)
+        """
+        print(f"--- Strategist Skill (Growth Plan): Influencer {influencer_data.get('fullName')} ke liye plan banaya ja raha hai.")
+        # --- FIX IS HERE: Hum pehle values ko aakhri mein badiya se handle kar rahe hain ---
+        # Pythonic tareeka: `get()` se value nikalo, agar 'None' hai to 'N/A' use karo.
+        best_caption = influencer_data.get('bestPostCaption') or 'N/A'
+        worst_caption = influencer_data.get('worstPostCaption') or 'N/A'
+        # --- END FIX ---
+        prompt = f"""
+    [INST] You are an expert social media coach. Analyze the following data for an influencer named {influencer_data.get('fullName')} and provide ONLY 3 short, actionable tips based on it. Start each tip on a new line.
+    - Niche: {influencer_data.get('category', 'Not specified')}
+    - Avg Engagement: {influencer_data.get('avgEngagementRate', 0.0):.2f}%
+    - Best Post was about: '{best_caption[:50]}'
+    - Worst Post was about: '{worst_caption[:50]}'
+    Your 3 tips:
+    [/INST]
+    """
+        try:
+            print("--- Strategist Skill (Growth Plan): Simplified LLM ko call kiya jaa raha hai...")
+            response = self.llm(
+                prompt,
+                max_tokens=256,
+                temperature=0.7,
+                stop=["[INST]", "User:", "System:"],
+                echo=False
+            )
+            raw_text = response['choices'][0]['text'].strip()
+            tips = [tip.strip().lstrip('- ').lstrip('1. ').lstrip('2. ').lstrip('3. ') for tip in raw_text.split('\n') if tip.strip()]
+            print(f"--- Strategist Skill (Growth Plan): LLM se tips successfully generate ho gaye: {tips}")
+            return tips[:3]
+        except Exception as e:
+            print(f"--- Strategist Skill (Growth Plan) FATAL ERROR: {e}")
+            traceback.print_exc()
+            return ["AI Coach is currently unavailable due to a technical error."]
+    def generate_service_blueprint(self, service_type: str, requirements: str) -> Dict[str, Any]:
+        """
+        Analyzes user requirements and generates a structured project blueprint using the LLM.
+        (FINAL VERSION: Uses a "perfect example" in the prompt to force the AI into the correct summary format.)
+        """
+        import re
+        print(f"--- Strategist Skill (Blueprint): Generating plan for '{service_type}' request.")
+        # === THE DEFINITIVE PROMPT WITH A PERFECT EXAMPLE ===
+        prompt = f"""
+[SYSTEM]
+You are an expert project planner for a top-tier digital agency.
+Analyze the client's request below and generate a concise project blueprint.
+YOU MUST FOLLOW THE FORMAT OF THE EXAMPLE BELOW EXACTLY.
+- For DELIVERABLES, provide a list of 4-5 specific features separated by the "|" pipe character.
+- For STACK, PRICE_EST, and TIMELINE, you MUST provide a single, summarized value. DO NOT provide a detailed itemized list for these.
+[PERFECT EXAMPLE]
+TITLE:: Modern E-Commerce Store for a Fashion Brand
+DELIVERABLES:: Dynamic Product Catalog | Secure Shopping Cart & Checkout | User Account & Order History | Admin Dashboard for Managing Products
+STACK:: Next.js & TailwindCSS (Frontend), Supabase (Backend)
+PRICE_EST:: $8,000 - $12,000
+TIMELINE:: 8-10 Weeks
+[/PERFECT EXAMPLE]
+[CLIENT REQUEST]
+- Service Type: {service_type}
+- Description: {requirements}
+[YOUR BLUEPRINT]
+TITLE:: """
+        try:
+            response_dict = self.llm(
+                prompt,
+                max_tokens=400,
+                temperature=0.5,
+                stop=["[CLIENT REQUEST]", "[SYSTEM]", "[/PERFECT EXAMPLE]"],
+                echo=False
+            )
+            raw_text = "TITLE:: " + response_dict['choices'][0]['text'].strip()
+            print(f"--- Strategist Skill (Blueprint): Raw response from LLM:\n---\n{raw_text}\n---")
+            # Initialize with default values
+            blueprint = {
+                'title': 'AI Generated Title',
+                'deliverables': ['Analysis in progress...'],
+                'stack': 'To be determined',
+                'price_est': 'Pending',
+                'timeline': 'Pending'
+            }
+            # Use regex to find all key::value pairs
+            pairs = re.findall(r'(\b[A-Z_]+\b)::(.*?)(?=\n\b[A-Z_]+\b::|$)', raw_text, re.DOTALL)
+            for key, value in pairs:
+                key = key.strip().upper()
+                value = value.strip()
+                if key == 'TITLE':
+                    # Only take the first line for summary fields
+                    blueprint['title'] = value.split('\n')[0].strip()
+                elif key == 'STACK':
+                    blueprint['stack'] = value.split('\n')[0].strip()
+                elif key == 'PRICE_EST':
+                    blueprint['price_est'] = value.split('\n')[0].strip()
+                elif key == 'TIMELINE':
+                    blueprint['timeline'] = value.split('\n')[0].strip()
+                elif key == 'DELIVERABLES':
+                    # Deliverables can be a list
+                    deliverables_list = [d.strip() for d in value.split('|') if d.strip()]
+                    if deliverables_list:
+                         blueprint['deliverables'] = deliverables_list
+            print(f"--- Strategist Skill (Blueprint): Successfully parsed with final parser. Result: {blueprint}")
+            return blueprint
+        except Exception as e:
+            error_msg = f"A critical error occurred. Error: {e}"
+            print(f"--- Strategist Skill FATAL ERROR: {error_msg}")
+            return { 'title': 'Error Generating Plan', 'deliverables': ['AI model failed to respond.'], 'stack': 'N/A', 'price_est': 'N/A', 'timeline': 'N/A' }
+    def generate_service_blueprint(self, service_type: str, requirements: str) -> Dict[str, Any]:
+        """
+        Analyzes user requirements and generates a structured project blueprint using the LLM.
+        (This version is now only for the 'web-dev' service type)
+        """
+        print(f"--- Strategist Skill (WEBSITE): Generating plan for '{service_type}' request.")
+        prompt = f"""
+[SYSTEM]
+You are an expert project planner for a top-tier digital agency.
+Analyze the client's request below and generate a concise project blueprint for a WEBSITE.
+YOU MUST FOLLOW THE FORMAT OF THE EXAMPLE BELOW EXACTLY.
+- For DELIVERABLES, provide a list of 4-5 specific website features separated by "|".
+- For STACK, PRICE_EST, and TIMELINE, you MUST provide a single, summarized value.
+[PERFECT EXAMPLE]
+TITLE:: Modern E-Commerce Store for a Fashion Brand
+DELIVERABLES:: Dynamic Product Catalog | Secure Shopping Cart & Checkout | User Account & Order History | Admin Dashboard
+STACK:: Next.js & TailwindCSS (Frontend), Supabase (Backend)
+PRICE_EST:: $8,000 - $12,000
+TIMELINE:: 8-10 Weeks
+[/PERFECT EXAMPLE]
+[CLIENT REQUEST]
+- Service Type: {service_type}
+- Description: {requirements}
+[YOUR BLUEPRINT]
+TITLE:: """
+        # We now use the helper function to get the response and parse it
+        return self._get_ai_response_and_parse(prompt)
+    # --- THIS IS THE NEW, SEPARATE FUNCTION FOR GROWTH MANAGEMENT ---
+    def generate_growth_plan(self, platform_handle: str, goals: str, challenges: str) -> Dict[str, any]:
+        """
+        [NEW & SEPARATE] Creates a 3-month management plan based on influencer's input.
+        """
+        print(f"--- Strategist Skill (GROWTH): Generating plan for '{platform_handle}'.")
+        prompt = f"""
+[SYSTEM]
+You are a Talent Manager for a top influencer agency. Create a 3-month management plan.
+Follow the example format exactly. Use "|" to separate monthly services.
+Use "Monthly Retainer" for Price Estimate and "Initial Contract Term" for Timeline.
+[PERFECT EXAMPLE]
+TITLE:: 3-Month YouTube Growth & Monetization Strategy
+DELIVERABLES:: Weekly Content Calendar (2 Videos, 5 Shorts) | SEO Title & Description Writing | Proactive Brand Outreach (5 brands/month) | Monthly Performance Analytics Report
+STACK:: YouTube Studio, TubeBuddy, Notion
+PRICE_EST:: $1,500 / month
+TIMELINE:: 3-Month Initial Contract
+[/PERFECT EXAMPLE]
+[CLIENT REQUEST]
+- Platform Handle: {platform_handle}
+- Goals: {goals}
+- Challenges: {challenges}
+[YOUR BLUEPRINT]
+TITLE:: """
+        # This function ALSO uses the same reliable helper
+        return self._get_ai_response_and_parse(prompt)
+    # --- THIS IS THE HELPER FUNCTION THAT BOTH METHODS USE ---
+    def _get_ai_response_and_parse(self, prompt: str) -> Dict[str, any]:
+        """
+        Internal helper to call the LLM and parse the key::value format robustly.
+        """
+        try:
+            response_dict = self.llm(
+                prompt,
+                max_tokens=400,
+                temperature=0.5,
+                stop=["[CLIENT REQUEST]", "[SYSTEM]", "[/PERFECT EXAMPLE]"],
+                echo=False
+            )
+            raw_text = "TITLE:: " + response_dict['choices'][0]['text'].strip()
+            print(f"--- AI Raw Response ---\n{raw_text}\n---")
+            blueprint = {
+                'title': 'AI Generated Plan',
+                'deliverables': ['Analysis in progress...'],
+                'stack': 'To be determined',
+                'price_est': 'Pending',
+                'timeline': 'Pending'
+            }
+            pairs = re.findall(r'(\b[A-Z_]+\b)::(.*?)(?=\n\b[A-Z_]+\b::|$)', raw_text, re.DOTALL)
+            for key, value in pairs:
+                key, value = key.strip().upper(), value.strip()
+                if key == 'TITLE': blueprint['title'] = value.split('\n')[0].strip()
+                elif key == 'STACK': blueprint['stack'] = value.split('\n')[0].strip()
+                elif key == 'PRICE_EST': blueprint['price_est'] = value.split('\n')[0].strip()
+                elif key == 'TIMELINE': blueprint['timeline'] = value.split('\n')[0].strip()
+                elif key == 'DELIVERABLES':
+                    deliverables_list = [d.strip() for d in value.split('|') if d.strip()]
+                    if deliverables_list: blueprint['deliverables'] = deliverables_list
+            print(f"--- Parser Result ---: {blueprint}")
+            return blueprint
+        except Exception as e:
+            error_msg = f"A critical error occurred in the AI model or parser. Error: {e}"
+            print(f"--- AI FATAL ERROR: {error_msg}")
+            return {
+                'title': 'Error Generating Plan',
+                'deliverables': ['AI model failed to respond or there was a system error.'],
+                'stack': 'N/A',
+                'price_est': 'N/A',
+                'timeline': 'N/A'
+            }
+    def generate_weekly_content_plan(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Generates 3 content options (MOCK MODE for Immediate Response).
+        Use this until server capacity is upgraded.
+        """
+        print(f"--- Strategist Skill (Plan): Generating for '{context.get('niche')}'.")
+        niche = context.get("niche", "General")
+        trends = [t['name'] for t in context.get("active_trends", [])]
+        trend = trends[0] if trends else "Trending Audio"
+        # Simulate dynamic response based on inputs
+        return {
+            "options": [
+                {
+                    "type": "Viral Bet",
+                    "title": f"Reel: {trend} Challenge",
+                    "platform": "Instagram",
+                    "contentType": "Reel",
+                    "instructions": f"Use the '{trend}' audio. Show a quick transition related to {niche}. Keep it under 15s.",
+                    "reasoning": "High viral potential due to current trend momentum."
+                },
+                {
+                    "type": "Community",
+                    "title": "Story: Poll of the Day",
+                    "platform": "Instagram",
+                    "contentType": "Story",
+                    "instructions": "Post a 'This or That' poll related to your niche. Engage with replies.",
+                    "reasoning": "Boosts engagement rate by encouraging direct interaction."
+                },
+                {
+                    "type": "Niche Authority",
+                    "title": "Carousel: Top 3 Tips",
+                    "platform": "Instagram",
+                    "contentType": "Carousel",
+                    "instructions": f"Share 3 lesser-known tips about {niche}. Use high-quality photos.",
+                    "reasoning": "Establishes authority and saves value for followers."
+                }
+            ]
+        }

core/support_agent.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# FILE: ai-service/core/support_agent.py
+import traceback
+from typing import Dict, Any
+from llama_cpp import Llama
+from langchain.llms.base import LLM
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from dotenv import load_dotenv
+load_dotenv()
+class LlamaLangChain(LLM):
+    llama_instance: Llama
+    @property
+    def _llm_type(self) -> str:
+        return "custom"
+    def _call(self, prompt: str, stop: list[str] | None = None, **kwargs) -> str:
+        response = self.llama_instance.create_completion(
+            prompt, max_tokens=256, stop=stop, stream=False
+        )
+        return response["choices"][0]["text"]
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+class SupportAgent:
+    def __init__(self, llm_instance: Llama, embedding_path: str, db_path: str):
+        print("--- Initializing Support Agent (Optimized for Low RAM) ---")
+        if llm_instance is None:
+            raise ValueError("SupportAgent received an invalid LLM instance.")
+        # --- YAHI FINAL FIX HAI ---
+        # Ab hum koi naya LlamaCpp nahi bana rahe hain.
+        # Humne pehle se chalte hue model ke liye ek chota sa LangChain wrapper banaya hai.
+        self.langchain_llm_wrapper = LlamaLangChain(llama_instance=llm_instance)
+        # ---------------------------
+        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
+        self.vector_store = Chroma(persist_directory=db_path, embedding_function=self.embeddings)
+        self.conversations = {}
+        # Router ke liye bhi wahi wrapper istemal karenge
+        router_template = """Classify: 'live_data' or 'general_knowledge'. Question: {question} Classification:"""
+        self.router_prompt = PromptTemplate.from_template(router_template)
+        self.router_chain = self.router_prompt | self.langchain_llm_wrapper | StrOutputParser()
+        print("✅ Agent and core components initialized successfully.")
+    def _get_or_create_memory(self, conversation_id: str) -> ConversationBufferMemory:
+        if conversation_id not in self.conversations:
+            self.conversations[conversation_id] = ConversationBufferMemory(
+                memory_key="chat_history", return_messages=True, input_key="question", output_key='answer'
+            )
+        return self.conversations[conversation_id]
+    def answer(self, payload: dict, conversation_id: str) -> dict:
+        question = payload.get("question", "")
+        memory = self._get_or_create_memory(conversation_id)
+        try:
+            # RAG (retrieval-augmented generation) logic for general questions
+            general_prompt_template = "Answer based on the CONTEXT.\n[CONTEXT]: {context}\n[USER QUESTION]: {question}\n[YOUR ANSWER]:"
+            general_prompt = PromptTemplate.from_template(general_prompt_template)
+            retriever = self.vector_store.as_retriever()
+            qa_chain = ConversationalRetrievalChain.from_llm(
+                llm=self.langchain_llm_wrapper,
+                retriever=retriever,
+                memory=memory,
+                combine_docs_chain_kwargs={"prompt": general_prompt}
+            )
+            result = qa_chain.invoke({"question": question})
+            final_answer = result.get("answer", "I don't have information on that topic.").strip()
+            return {"response": final_answer, "context": format_docs(result.get('source_documents', []))}
+        except Exception as e:
+            traceback.print_exc()
+            return {"response": "A critical server error occurred in the agent.", "context": str(e)}
+    def generate_caption_variant(self, caption: str, action: str) -> str:
+        print(f"--- 🚀 Received CAPTION generation request for action: '{action}' ---")
+        system_prompt = (
+            "You are an expert social media copywriter for an influencer marketing agency. "
+            "Your task is to rewrite the provided Instagram caption based on a specific instruction. "
+            "Your response must be ONLY the rewritten caption. Do not add any introductory phrases like 'Here is the rewritten caption:'."
+        )
+        if action == 'improve_writing':
+            user_instruction = "Improve the writing. Correct any grammar or spelling mistakes, make the language clearer, and give it a more professional and polished tone."
+        elif action == 'make_punchier':
+            user_instruction = "Make it punchier. Rewrite it to be shorter, more energetic, and highly engaging. Use 2-3 relevant emojis to add personality."
+        elif action == 'generate_alternatives':
+            user_instruction = "Generate three new, creative, and completely different caption alternatives for the same topic. Separate each alternative with '---'."
+        else:
+            return "Error: Invalid action specified."
+        final_prompt = f"""[SYSTEM INSTRUCTIONS]
+{system_prompt}
+[USER REQUEST]
+{user_instruction}
+[ORIGINAL CAPTION]
+{caption}
+[YOUR REWRITTEN CAPTION]
+"""
+        try:
+            print("--- Invoking LLM for pure text generation... ---")
+            response = self.llm.invoke(final_prompt)
+            clean_response = response.strip()
+            print(f"✅ LLM Response: {clean_response}")
+            return clean_response
+        except Exception as e:
+            traceback.print_exc()
+            return f"An error occurred while generating the caption."
+    # =============================================================
+    # === ✨ NEW METHOD STARTS HERE ✨ ===
+    # =============================================================
+    def generate_marketing_strategy(self, prompt: str) -> str:
+        if not self.llm:
+            return "Error: The AI model is not available."
+        print("--- SupportAgent: Generating marketing strategy from prompt... ---")
+        try:
+            response = self.llm.invoke(prompt, max_tokens=750, temperature=0.75)
+            clean_response = response.strip()
+            print("--- SupportAgent: Strategy generated successfully. ---")
+            return clean_response
+        except Exception as e:
+            traceback.print_exc()
+            return f"An error occurred while generating the strategy: {e}"
+    # =============================================================
+    # === ✨ THIS IS THE NEW METHOD, NOW CORRECTLY PLACED ✨ ===
+    # =============================================================
+    def generate_content_outline(self, title: str) -> str:
+        """
+        Takes a content title (e.g., a blog post title) and generates a
+        structured outline for it using the LLM.
+        """
+        if not self.llm:
+            return "Error: The AI model is not available."
+        print(f"--- SupportAgent: Generating content outline for title: '{title}' ---")
+        prompt = f"""
+        You are a professional content writer and editor.
+        Your task is to create a detailed, well-structured blog post outline for the following title.
+        The outline must be in Markdown format, using headings (#, ##) and bullet points (-).
+        Include sections for an Introduction, at least 3 main body points with sub-bullets, and a Conclusion.
+        **Title:** "{title}"
+        **Your Outline:**
+        """
+        try:
+            response = self.llm.invoke(prompt, max_tokens=1024, temperature=0.7, stop=["User:", "Title:"])
+            clean_response = response.strip()
+            print("--- SupportAgent: Content outline generated successfully. ---")
+            return clean_response
+        except Exception as e:
+            traceback.print_exc()
+            return f"An error occurred while generating the content outline: {e}"

core/utils.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# File: ai-service/core/utils.py
+import os
+import cv2
+import numpy as np
+from sklearn.cluster import KMeans
+import urllib.request
+import ssl
+from supabase import create_client, Client
+from dotenv import load_dotenv
+# Load environment variables from the root .env file
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
+def get_supabase_client() -> Client:
+    """
+    Creates and returns a Supabase client.
+    Raises ValueError if credentials are not set in the environment.
+    """
+    try:
+        url = os.environ.get("SUPABASE_URL")
+        key = os.environ.get("SUPABASE_SERVICE_KEY") # We use service key for backend tasks
+        if not url or not key:
+            raise ValueError("Supabase credentials (URL or Service Key) are not set.")
+        return create_client(url, key)
+    except Exception as e:
+        print(f"🔴 FATAL: Could not create Supabase client: {e}")
+        raise # Re-raise the exception to stop the app if this fails.
+def extract_colors_from_url(image_url: str, num_colors=4) -> list:
+    """
+    Downloads an image from a URL and returns the top N dominant colors in Hex format.
+    Requires: opencv-python-headless, scikit-learn, numpy
+    """
+    try:
+        print(f"🎨 Extracting colors from: {image_url}")
+        # 1. Handle SSL Context (Dev environments often fail strict SSL)
+        context = ssl._create_unverified_context()
+        # 2. Download Image directly to memory
+        with urllib.request.urlopen(image_url, context=context) as req:
+            arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
+            img = cv2.imdecode(arr, -1)
+        if img is None:
+            return []
+        # 3. Setup Image (Convert BGR to RGB, Resize for speed)
+        # If image has alpha channel (Transparency), remove it
+        if img.shape[2] == 4:
+            img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(img, (100, 100), interpolation=cv2.INTER_AREA)
+        # Reshape to list of pixels
+        img = img.reshape((img.shape[0] * img.shape[1], 3))
+        # 4. Use KMeans clustering to find dominant colors
+        kmeans = KMeans(n_clusters=num_colors, n_init='auto')
+        kmeans.fit(img)
+        colors = kmeans.cluster_centers_
+        # 5. Convert to Hex
+        hex_colors = []
+        for color in colors:
+            hex_code = '#{:02x}{:02x}{:02x}'.format(int(color[0]), int(color[1]), int(color[2]))
+            hex_colors.append(hex_code)
+        return hex_colors
+    except Exception as e:
+        print(f"⚠️ Error extracting colors: {str(e)}")
+        # Return fallback gray/black if extraction fails
+        return ["#000000", "#808080"]

embedding_model/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 384,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

embedding_model/README.md ADDED Viewed

	@@ -0,0 +1,173 @@

+---
+language: en
+license: apache-2.0
+library_name: sentence-transformers
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+- transformers
+datasets:
+- s2orc
+- flax-sentence-embeddings/stackexchange_xml
+- ms_marco
+- gooaq
+- yahoo_answers_topics
+- code_search_net
+- search_qa
+- eli5
+- snli
+- multi_nli
+- wikihow
+- natural_questions
+- trivia_qa
+- embedding-data/sentence-compression
+- embedding-data/flickr30k-captions
+- embedding-data/altlex
+- embedding-data/simple-wiki
+- embedding-data/QQP
+- embedding-data/SPECTER
+- embedding-data/PAQ_pairs
+- embedding-data/WikiAnswers
+pipeline_tag: sentence-similarity
+---
+# all-MiniLM-L6-v2
+This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["This is an example sentence", "Each sentence is converted"]
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+embeddings = model.encode(sentences)
+print(embeddings)
+```
+## Usage (HuggingFace Transformers)
+Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
+```python
+from transformers import AutoTokenizer, AutoModel
+import torch
+import torch.nn.functional as F
+#Mean Pooling - Take attention mask into account for correct averaging
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+# Sentences we want sentence embeddings for
+sentences = ['This is an example sentence', 'Each sentence is converted']
+# Load model from HuggingFace Hub
+tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+# Tokenize sentences
+encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+# Compute token embeddings
+with torch.no_grad():
+    model_output = model(**encoded_input)
+# Perform pooling
+sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+# Normalize embeddings
+sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+print("Sentence embeddings:")
+print(sentence_embeddings)
+```
+------
+## Background
+The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
+contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
+1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
+We developed this model during the
+[Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
+organized by Hugging Face. We developed this model as part of the project:
+[Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
+## Intended uses
+Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
+the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
+By default, input text longer than 256 word pieces is truncated.
+## Training procedure
+### Pre-training
+We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
+### Fine-tuning
+We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
+We then apply the cross entropy loss by comparing with true pairs.
+#### Hyper parameters
+We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
+We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
+a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
+#### Training data
+We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
+We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
+| Dataset                                                  | Paper                                    | Number of training tuples  |
+|--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
+| [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
+| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
+| [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
+| [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
+| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
+| [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs  | - | 25,316,456 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs  | - | 21,396,559 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs  | - | 21,396,559 |
+| [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
+| [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
+| [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
+| [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
+| [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
+| [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
+| [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
+| [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
+| AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
+| [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
+| [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
+| [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
+| [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
+| [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
+| [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
+| [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
+| [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
+| **Total** | | **1,170,060,424** |

embedding_model/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.57.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

embedding_model/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "__version__": {
+    "sentence_transformers": "5.1.1",
+    "transformers": "4.57.0",
+    "pytorch": "2.5.1+cpu"
+  },
+  "model_type": "SentenceTransformer",
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

embedding_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1377e9af0ca0b016a9f2aa584d6fc71ab3ea6804fae21ef9fb1416e2944057ac
+size 90864192

embedding_model/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

embedding_model/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 256,
+    "do_lower_case": false
+}

embedding_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

embedding_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

embedding_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 256,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

embedding_model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

knowledge_base/brand/01_campaign_creation.md ADDED Viewed

	@@ -0,0 +1,21 @@

+## Question: How do I create a new campaign on Reachify?
+Answer: Creating a new campaign is easy. Follow these steps:
+1. First, log in to your Brand Dashboard.
+2. Navigate to the 'Campaigns' section from the sidebar menu.
+3. Click on the 'Create New Campaign' button.
+4. Fill out the campaign creation form with all the required details.
+5. Click 'Submit for Review' to launch your campaign.
+## Question: What information is required to create a campaign?
+Answer: To create a successful campaign, you will need to provide the following details:
+- **Campaign Name:** A clear and concise name for your campaign.
+- **Campaign Goal:** What you want to achieve (e.g., Brand Awareness, Website Traffic, Sales).
+- **Target Audience:** Details about your ideal customer, like age, gender, location, and interests.
+- **Budget:** Your total budget for the campaign. Our AI can also suggest a budget based on your goals.
+- **Campaign Brief:** A detailed description of what you expect from the influencers.
+## Question: What happens after I submit a campaign for review?
+Answer: After you submit a campaign, our internal team performs a quick review to ensure it meets our platform guidelines. This usually takes less than 24 hours. Once approved, our AI will start matching your campaign with the most suitable influencers.

knowledge_base/brand/02_understanding_status.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Guide for Brands on Reachify
+## How to Create a Successful Campaign
+1.  **Define a Clear Goal:** Before creating a campaign, know what you want to achieve. Common goals are Brand Awareness, Lead Generation, or Direct Sales.
+2.  **Set a Realistic Budget:** The AI Planner can help you predict a budget, but always have a range in mind.
+3.  **Write a Detailed Brief:** Your campaign description is the most important part. Clearly explain your product, key messages, and what you expect from influencers.
+4.  **Trust the AI:** Use our AI tools to find the right influencers and forecast performance. It saves you time and improves results.
+## Understanding Campaign Status
+- **Pending Approval:** The campaign has been created and is waiting for an Admin to approve it.
+- **Active:** The campaign is live and you can now invite influencers.
+- **Completed:** The campaign duration is over. You can now view analytics.
+- **Rejected:** The campaign did not meet our community guidelines and was rejected by an Admin.

knowledge_base/common/first_faq.md ADDED Viewed

	@@ -0,0 +1,19 @@

+## Question: What is Reachify?
+Answer: Reachify is an AI-powered platform that connects brands with influencers to create impactful marketing campaigns. Our technology helps brands find the perfect influencers, predict campaign performance, and manage everything from a single dashboard.
+## Question: Is it free to sign up on Reachify?
+Answer: Yes, signing up and creating a profile is completely free for both Brands and Influencers. For brands, a platform fee is applied only when a campaign is actively running. For influencers, our services are completely free.
+## Question: How does the AI influencer matching work?
+Answer: Our intelligent AI analyzes a brand's campaign goals, target audience, and content brief. It then scans thousands of influencer profiles to find the ones whose audience and content style are the best fit for the campaign, ensuring higher engagement and better results.
+## Question: I forgot my password. What should I do?
+Answer: If you have forgotten your password, simply go to the login page and click on the 'Forgot Password?' link. You will be asked to enter your registered email address, and we will send you a link to reset your password.
+## Question: How can I contact customer support?
+Answer: If you need any help, you can visit the 'Support' or 'Help' section on our website to find answers to common questions. For specific issues, you can raise a support ticket directly from your dashboard or email us at support@reachify-platform.com.

knowledge_base/influencer/02_payments.md ADDED Viewed

	@@ -0,0 +1,32 @@

+# Payments on Reachify
+This document explains how payments are processed for both influencers and brands on the Reachify platform.
+## For Influencers: Getting Paid
+### How do I receive my payment?
+Payments for completed campaigns are processed automatically. Once a campaign you participated in is marked as "Completed" by the brand or an admin, your payment will be initiated.
+### When will I get paid?
+You can expect the payment to be credited to your linked bank account or Stripe account within **15 business days** from the campaign completion date.
+### What if my payment is delayed?
+If you do not receive your payment after 15 business days, please follow the dispute process outlined below. Common reasons for delays include incorrect bank details or public holidays.
+### How to Raise a Payment Dispute
+If you believe there is an error with your payment amount or if it is delayed, you can raise a dispute.
+1.  Go to your **Influencer Dashboard**.
+2.  Navigate to the **"Completed Campaigns"** section.
+3.  Find the relevant campaign and click on the **"Report Payment Issue"** button.
+4.  Fill out the form with the necessary details. Our finance team will review your case and get back to you within 3-5 business days.
+## For Brands: Managing Campaign Payments
+### How are campaigns funded?
+When you create and activate a campaign, the total budget for influencer payouts is held in an escrow account. This ensures that funds are secured and available for payout once the campaign is successfully completed.
+### What happens if a campaign is cancelled?
+If a campaign is cancelled before any influencers are paid, the funds held in escrow will be refunded to your account's credit balance within 5-7 business days.
+### Can I get an invoice?
+Yes. For every campaign, an official invoice is generated. You can download all your invoices from the **"Billing"** section in your **Brand Dashboard**.

models/budget_predictor_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93b36da3d08615203eb44526b12a329e9b755542a7492618505ff4585109066c
+size 161957

models/comments_predictor_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:241c23bae4fb40e9ff2aaf309ba722ef4745a294bb834630018e46023b95d2cb
+size 76106

models/earnings_encoder.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7b8e2bcef7146c7b6b762d9135f63706217f3bd74619ee5a1bdf98d4b05e90f
+size 1355

models/earnings_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c92fcebc17ba0ff12ce0c3ea69aa24b189f03c720c7bb8105af7cad5738ffc4
+size 73314

models/influencer_matcher_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8606d072af703ac6c5fa3b004117ebe0854c8162d74b7458ee52b862c595344
+size 94570

models/likes_predictor_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5df899fadc58b0dc3347c7ac876be952c471c0c80e0bab938999711acdab89bc
+size 76102

models/payout_forecaster_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22e71a5b9d00ca73a7963f47a776a6a0f1b1647a9d3e4afb8a4bcd522031f60b
+size 880

models/performance_predictor_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21caf9f705f894b2610bbad455c9fa40056b1bbf6f8c0c2f6f138d20d5c1b3ea
+size 168213

models/performance_scorer_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1120936b44b08499dc7e3a83cc5885efd0e64eaf807a0ef09d500f70cc1e13a7
+size 83089

models/revenue_forecaster_v1.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba36b5be7b1c4b075aa9df8883ef922a65772b729ae8b42a537ff96d287fb665
+size 9132

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+fastapi
+uvicorn[standard]
+python-dotenv
+pandas
+scikit-learn
+joblib==1.3.2
+langchain
+langchain-community
+langchain-core
+sentence-transformers
+chromadb
+pydantic<3,>=2
+llama-cpp-python
+diskcache
+statsmodels
+supabase
+requests
+xgboost
+sqlalchemy
+psycopg2-binary
+PyMuPDF
+lark
+opencv-python-headless
+huggingface-hub

scripts/download_embedding_model.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# ai-service/scripts/download_embedding_model.py
+from sentence_transformers import SentenceTransformer
+import os
+# --- Configuration ---
+# Hum is popular model ko use karenge. Yeh chota aur effective hai.
+MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
+# Path jahan model save hoga. Yeh aapke main.py ke `EMBEDDING_MODEL_PATH` se match karna chahiye.
+SAVE_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'embedding_model')
+# --- Main Logic ---
+def download_model():
+    """
+    Downloads the sentence-transformer model from Hugging Face and saves it locally.
+    """
+    print(f"--- 🚀 Starting download for model: {MODEL_NAME} ---")
+    # Check if the path already exists
+    if os.path.exists(SAVE_PATH) and len(os.listdir(SAVE_PATH)) > 0:
+        print(f"--- ✅ Model directory already exists and is not empty. Skipping download. ---")
+        print(f"    Path: {SAVE_PATH}")
+        return
+    print(f"    Saving model to: {SAVE_PATH}")
+    try:
+        # Model download aur save karein
+        model = SentenceTransformer(MODEL_NAME)
+        model.save(SAVE_PATH)
+        print(f"--- ✅ Model downloaded and saved successfully! ---")
+    except Exception as e:
+        print(f"--- 🚨 ERROR: Failed to download or save the model. ---")
+        print(f"    Error details: {e}")
+        print(f"    Please check your internet connection and try again.")
+if __name__ == "__main__":
+    download_model()

scripts/download_model.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# ai-service/scripts/download_llm_model.py
+import os
+from huggingface_hub import hf_hub_download
+# --- Configuration for the NEW, FAST Language Model ---
+# Model ka repository on Hugging Face
+REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+# Model ka file name
+FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+# Model ko kahan save karna hai ('llm_model' folder mein)
+SAVE_DIRECTORY = os.path.join(os.path.dirname(__file__), '..', 'llm_model')
+def download_language_model():
+    """
+    Downloads the specified GGUF language model from Hugging Face.
+    """
+    print(f"--- Starting download for Language Model: {FILENAME} ---")
+    # Target file ka poora path
+    file_path = os.path.join(SAVE_DIRECTORY, FILENAME)
+    # Check karein ki model pehle se download to nahi hai.
+    if os.path.exists(file_path):
+        print(f"✅ Model '{FILENAME}' already exists at: {SAVE_DIRECTORY}")
+        print("Skipping download.")
+        return
+    # Folder banayein agar woh मौजूद nahi hai
+    os.makedirs(SAVE_DIRECTORY, exist_ok=True)
+    print(f"Downloading model to: {SAVE_DIRECTORY}")
+    print("This may take a moment (approx 700-800MB)...")
+    try:
+        # Hugging Face se model download karein
+        hf_hub_download(
+            repo_id=REPO_ID,
+            filename=FILENAME,
+            local_dir=SAVE_DIRECTORY,
+            local_dir_use_symlinks=False # Important for Windows
+        )
+        print("\n" + "="*50)
+        print(f"✅ Language Model '{FILENAME}' downloaded successfully!")
+        print("="*50 + "\n")
+    except Exception as e:
+        print(f"🚨 An error occurred during download: {e}")
+if __name__ == "__main__":
+    download_language_model()

scripts/export_performance_data.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# File: ai-service/scripts/export_performance_data.py
+import pandas as pd
+from sqlalchemy import create_engine, text
+import os
+from dotenv import load_dotenv
+import sys
+# Root directory ko path mein add karein taaki .env file mil sake
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(ROOT_DIR)
+load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
+def export_performance_data():
+    """
+    Connects to the Supabase database, fetches data from approved submissions,
+    and saves it to a CSV file for training the performance prediction model.
+    """
+    print("--- Starting Performance Data Export Process ---")
+    db_url = os.getenv("DATABASE_URL")
+    if not db_url:
+        print("🔴 ERROR: DATABASE_URL not found. Please check your .env file in the ai-service root.")
+        return
+    # Yeh SQL query hamare AI model ke liye 'khana' (training data) nikalegi.
+    sql_query = """
+    SELECT
+        cs.likes,
+        cs.comments,
+        cs.caption,
+        ip.follower_count,
+        CASE
+            WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
+            WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
+            WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
+            WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
+            ELSE 'General'
+        END AS campaign_niche,
+        CASE
+            WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
+            WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
+            ELSE 'Post'
+        END AS content_format
+    FROM
+        public.campaign_submissions cs
+    JOIN
+        public.campaigns c ON cs.campaign_id = c.id
+    JOIN
+        public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
+    WHERE
+        cs.status = 'approved'      -- Sirf approved submissions se seekhein
+        AND cs.likes IS NOT NULL    -- Jin par likes ka data ho
+        AND cs.comments IS NOT NULL -- Jin par comments ka data ho
+        AND cs.caption IS NOT NULL  -- Jin par caption ho
+        AND ip.follower_count > 0;  -- Jin influencers ke followers pata ho
+    """
+    try:
+        print("Connecting to Supabase to fetch performance data...")
+        engine = create_engine(db_url)
+        with engine.connect() as connection:
+            df = pd.DataFrame(connection.execute(text(sql_query)))
+        print(f"✅ Fetched {len(df)} approved submission records from the database.")
+    except Exception as e:
+        print(f"🔴 ERROR fetching data: {e}")
+        return
+    if df.empty:
+        print("⚠️ No valid training data found. A blank CSV will be created.")
+    else:
+        # Feature Engineering: Caption ki lambai (length) ko ek feature banayein
+        df['caption_length'] = df['caption'].str.len()
+    # Sirf zaroori columns ko CSV me save karein
+    columns_to_save = ['likes', 'comments', 'follower_count', 'caption_length', 'campaign_niche', 'content_format']
+    # Agar koi column na ho (khaali df ke case mein), toh use ignore karein
+    df_to_save = df.reindex(columns=columns_to_save).fillna(0)
+    # Data ko /data folder mein save karein
+    output_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv')
+    df_to_save.to_csv(output_path, index=False)
+    print(f"🎉 Success! Performance data saved to {output_path}")
+if __name__ == '__main__':
+    export_performance_data()

scripts/export_revenue_data.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# FILE: ai-service/scripts/export_revenue_data.py (NEW FILE)
+import pandas as pd
+from sqlalchemy import create_engine, text
+import os
+from dotenv import load_dotenv
+import sys
+from pathlib import Path
+def export_revenue_data():
+    """
+    Connects to the database, fetches all 'paid' invoices, aggregates
+    the revenue by month, and saves it to a time-series CSV file.
+    """
+    print("--- Starting Revenue Data Export Process ---")
+    # --- Setup to find the .env file in the root directory ---
+    try:
+        # Assumes the script is in a 'scripts' folder, two levels down from root.
+        # E.g. /ai-service/scripts/ -> /ai-service/
+        # If your script is elsewhere, adjust the Path().resolve().parents index.
+        root_dir = Path(__file__).resolve().parents[1]
+        sys.path.append(str(root_dir))
+        load_dotenv(dotenv_path=root_dir / '.env')
+        db_url = os.getenv("DATABASE_URL")
+        if not db_url:
+            raise ValueError("DATABASE_URL not found in .env file.")
+    except Exception as e:
+         print(f"🔴 ERROR setting up environment: {e}")
+         return
+    # --- SQL Query to get monthly revenue from paid invoices ---
+    # It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'.
+    # We assume 'updated_at' is the payment date for this query.
+    sql_query = """
+    SELECT
+        date_trunc('month', updated_at)::date AS month,
+        SUM(amount) AS total_revenue
+    FROM
+        public.invoices
+    WHERE
+        status = 'paid'
+    GROUP BY
+        month
+    ORDER BY
+        month ASC;
+    """
+    try:
+        print("Connecting to Supabase to fetch revenue data...")
+        engine = create_engine(db_url)
+        with engine.connect() as connection:
+            df = pd.DataFrame(connection.execute(text(sql_query)))
+        print(f"✅ Fetched {len(df)} months of revenue data from the database.")
+    except Exception as e:
+        print(f"🔴 ERROR fetching revenue data: {e}")
+        return
+    if df.empty:
+        print("⚠️ No 'paid' invoices found. An empty CSV will be created.")
+        df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers
+    # --- Save the data to the /data folder ---
+    output_path = root_dir / 'data' / 'revenue_training_data.csv'
+    df.to_csv(output_path, index=False)
+    print(f"🎉 Success! Revenue data saved to {output_path}")
+if __name__ == '__main__':
+    export_revenue_data()

scripts/export_training_data.py ADDED Viewed

	@@ -0,0 +1,76 @@

+# File: ai-service/scripts/export_training_data.py (FINAL VERSION)
+import pandas as pd
+from sqlalchemy import create_engine, text
+import os
+from dotenv import load_dotenv
+import sys
+# Get the root directory of the ai-service
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(ROOT_DIR)
+load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
+def export_data():
+    """Connects to the database and saves the training data to a CSV file."""
+    print("--- Starting Data Export Process ---")
+    db_url = os.getenv("DATABASE_URL")
+    if not db_url:
+        print("🔴 ERROR: DATABASE_URL not found.")
+        return
+    # SQL Query waisi hi rahegi
+    sql_query = """
+    SELECT
+        cs.engagement_rate, ip.follower_count, p.amount AS payment_amount,
+        CASE
+            WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
+            WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
+            WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
+            WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
+            ELSE 'General'
+        END AS campaign_niche,
+        CASE
+            WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
+            WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
+            ELSE 'Post'
+        END AS content_format
+    FROM public.campaign_submissions cs
+    JOIN public.campaigns c ON cs.campaign_id = c.id
+    JOIN public.payments p ON c.id = p.campaign_id AND cs.influencer_id = p.influencer_id
+    JOIN public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
+    WHERE cs.engagement_rate IS NOT NULL AND ip.follower_count > 0 AND p.amount IS NOT NULL;
+    """
+    try:
+        print("Connecting to Supabase to fetch live data...")
+        engine = create_engine(db_url)
+        # === ✨ THE FIX IS HERE ✨ ===
+        # Hum ab seedhe connection ka istemal karke data nikalenge
+        with engine.connect() as connection:
+            # `text()` function zaroori hai SQLAlchemy ke naye versions ke liye
+            result = connection.execute(text(sql_query))
+            rows = result.fetchall() # Saari rows ko ek list mein nikaalo
+            # Agar koi data nahi mila
+            if not rows:
+                print("⚠️ WARNING: No training data found in the database. An empty CSV will be created.")
+                df = pd.DataFrame()
+            else:
+                # Us list of rows se DataFrame banao
+                df = pd.DataFrame(rows, columns=result.keys())
+        # === ✨ FIX ENDS HERE ✨ ===
+        print(f"✅ Fetched {len(df)} records from the database.")
+    except Exception as e:
+        print(f"🔴 ERROR fetching data: {e}")
+        return
+    # Data ko /data folder mein save karo
+    output_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv')
+    df.to_csv(output_path, index=False)
+    print(f"🎉 Success! Data saved to {output_path}")
+if __name__ == '__main__':
+    export_data()

scripts/ingest_data.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import sys
+import uuid
+# Ensure we can import from core
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from core.rag.store import VectorStore
+def ingest_knowledge_base():
+    # Initialize DB
+    print("🚀 Connecting to Vector Database...")
+    try:
+        store = VectorStore()
+    except Exception as e:
+        print(f"❌ Error initializing DB: {e}")
+        return
+    base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
+    documents = []
+    metadatas = []
+    ids = []
+    print(f"📂 Scanning folder: {base_path}")
+    if not os.path.exists(base_path):
+        print(f"⚠️ Knowledge base folder not found at {base_path}")
+        return
+    # Saari files scan karo recursive tareeke se
+    for root, _, files in os.walk(base_path):
+        for file in files:
+            if file.endswith(".md") or file.endswith(".txt"):
+                file_path = os.path.join(root, file)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        content = f.read()
+                        if len(content.strip()) < 10: continue # Skip empty files
+                        # Content aur Meta data ready karo
+                        documents.append(content)
+                        metadatas.append({"source": file, "category": os.path.basename(root)})
+                        ids.append(str(uuid.uuid4()))
+                        print(f"   - Prepared: {file}")
+                except Exception as e:
+                    print(f"   - ⚠️ Skipped {file}: {e}")
+    # DB mein daalo
+    if documents:
+        print(f"💾 Saving {len(documents)} documents to ChromaDB...")
+        store.add_text(documents, metadatas, ids)
+        print("✅ Knowledge Injection Complete!")
+    else:
+        print("⚠️ No valid documents found to ingest.")
+if __name__ == "__main__":
+    ingest_knowledge_base()