Spaces:

Ba7ath-Project
/

ahlya

Running

App Files Files Community

Ba7ath-Project commited on 10 days ago

Commit

91de656

1 Parent(s): 93aec95

Fix et migration database

Browse files

Files changed (9) hide show

app/api/v1/auth.py +125 -125
app/api/v1/companies.py +49 -48
app/api/v1/investigate.py +189 -186
app/api/v1/risk.py +4 -4
app/api/v1/stats.py +4 -4
app/core/utils.py +3 -0
app/services/aggregation.py +72 -70
app/services/osint_links.py +33 -32
app/services/risk_engine.py +174 -150

app/api/v1/auth.py CHANGED Viewed

@@ -1,125 +1,125 @@
-from fastapi import APIRouter, Depends, HTTPException
-from pydantic import BaseModel
-from typing import Optional, List
-from app.core.supabase_client import get_user_client, get_admin_client
-from app.services.auth_service import get_current_active_user, get_current_admin_user, AuthenticatedUserInfo
-router = APIRouter()
-class UserCreate(BaseModel):
-    email: str
-    password: str
-    full_name: str
-    is_admin: bool = False
-class UserUpdate(BaseModel):
-    email: Optional[str] = None
-    password: Optional[str] = None
-    full_name: Optional[str] = None
-    is_admin: Optional[bool] = None
-@router.get("/me")
-async def read_users_me(
-    current_user: AuthenticatedUserInfo = Depends(get_current_active_user)
-):
-    """
-    Example endpoint showing how to query information on behalf of the user,
-    forcing Supabase to apply Row Level Security (RLS) via their JWT.
-    """
-    try:
-        # Initialize client with the user's JWT
-        client = get_user_client(current_user.jwt)
-        # This will securely return only the row matching `auth.uid() = auth_user_id`
-        response = (
-            client.table("users")
-            .select("*")
-            .eq("auth_user_id", current_user.user_id)
-            .single()
-            .execute()
-        )
-        return response.data
-    except Exception as e:
-        # Note: If RLS prevents reading, Supabase might return a PostgREST error.
-        raise HTTPException(status_code=400, detail=str(e))
-@router.get("/users")
-async def list_users(current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)):
-    try:
-        admin_client = get_admin_client()
-        response = admin_client.auth.admin.list_users()
-        users = []
-        for u in response:
-            metadata = getattr(u, "user_metadata", {}) or {}
-            users.append({
-                "id": u.id,
-                "email": u.email,
-                "full_name": metadata.get("full_name", ""),
-                "is_admin": metadata.get("is_admin", False),
-                "created_at": str(u.created_at)
-            })
-        return users
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))
-@router.post("/users")
-async def create_user(
-    user: UserCreate,
-    current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
-):
-    try:
-        admin_client = get_admin_client()
-        response = admin_client.auth.admin.create_user({
-            "email": user.email,
-            "password": user.password,
-            "email_confirm": True,
-            "user_metadata": {
-                "full_name": user.full_name,
-                "is_admin": user.is_admin
-            }
-        })
-        return {"id": response.user.id, "email": response.user.email}
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))
-@router.patch("/users/{user_id}")
-async def update_user(
-    user_id: str,
-    user_update: UserUpdate,
-    current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
-):
-    try:
-        admin_client = get_admin_client()
-        attributes = {}
-        if user_update.email is not None:
-            attributes["email"] = user_update.email
-        if user_update.password:
-            attributes["password"] = user_update.password
-        user_metadata = {}
-        if user_update.full_name is not None:
-            user_metadata["full_name"] = user_update.full_name
-        if user_update.is_admin is not None:
-            user_metadata["is_admin"] = user_update.is_admin
-        if user_metadata:
-            attributes["user_metadata"] = user_metadata
-        response = admin_client.auth.admin.update_user_by_id(user_id, attributes)
-        return {"id": response.user.id}
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))
-@router.delete("/users/{user_id}")
-async def delete_user(
-    user_id: str,
-    current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
-):
-    try:
-        admin_client = get_admin_client()
-        admin_client.auth.admin.delete_user(user_id)
-        return {"status": "success"}
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))

+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+from typing import Optional, List
+from app.core.supabase_client import get_user_client, get_admin_client
+from app.services.auth_service import get_current_active_user, get_current_admin_user, AuthenticatedUserInfo
+router = APIRouter()
+class UserCreate(BaseModel):
+    email: str
+    password: str
+    full_name: str
+    is_admin: bool = False
+class UserUpdate(BaseModel):
+    email: Optional[str] = None
+    password: Optional[str] = None
+    full_name: Optional[str] = None
+    is_admin: Optional[bool] = None
+@router.get("/me")
+async def read_users_me(
+    current_user: AuthenticatedUserInfo = Depends(get_current_active_user)
+):
+    """
+    Example endpoint showing how to query information on behalf of the user,
+    forcing Supabase to apply Row Level Security (RLS) via their JWT.
+    """
+    try:
+        # Initialize client with the user's JWT
+        client = get_user_client(current_user.jwt)
+        # This will securely return only the row matching `auth.uid() = auth_user_id`
+        response = (
+            client.table("users")
+            .select("*")
+            .eq("auth_user_id", current_user.user_id)
+            .single()
+            .execute()
+        )
+        return response.data
+    except Exception as e:
+        # Note: If RLS prevents reading, Supabase might return a PostgREST error.
+        raise HTTPException(status_code=400, detail=str(e))
+@router.get("/users")
+async def list_users(current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)):
+    try:
+        admin_client = get_admin_client()
+        response = admin_client.auth.admin.list_users()
+        users = []
+        for u in response:
+            metadata = getattr(u, "user_metadata", {}) or {}
+            users.append({
+                "id": u.id,
+                "email": u.email,
+                "full_name": metadata.get("full_name", ""),
+                "is_admin": metadata.get("is_admin", False),
+                "created_at": str(u.created_at)
+            })
+        return users
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@router.post("/users")
+async def create_user(
+    user: UserCreate,
+    current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
+):
+    try:
+        admin_client = get_admin_client()
+        response = admin_client.auth.admin.create_user({
+            "email": user.email,
+            "password": user.password,
+            "email_confirm": True,
+            "user_metadata": {
+                "full_name": user.full_name,
+                "is_admin": user.is_admin
+            }
+        })
+        return {"id": response.user.id, "email": response.user.email}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@router.patch("/users/{user_id}")
+async def update_user(
+    user_id: str,
+    user_update: UserUpdate,
+    current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
+):
+    try:
+        admin_client = get_admin_client()
+        attributes = {}
+        if user_update.email is not None:
+            attributes["email"] = user_update.email
+        if user_update.password:
+            attributes["password"] = user_update.password
+        user_metadata = {}
+        if user_update.full_name is not None:
+            user_metadata["full_name"] = user_update.full_name
+        if user_update.is_admin is not None:
+            user_metadata["is_admin"] = user_update.is_admin
+        if user_metadata:
+            attributes["user_metadata"] = user_metadata
+        response = admin_client.auth.admin.update_user_by_id(user_id, attributes)
+        return {"id": response.user.id}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@router.delete("/users/{user_id}")
+async def delete_user(
+    user_id: str,
+    current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
+):
+    try:
+        admin_client = get_admin_client()
+        admin_client.auth.admin.delete_user(user_id)
+        return {"status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))

app/api/v1/companies.py CHANGED Viewed

@@ -1,48 +1,49 @@
-from fastapi import APIRouter, Query
-from typing import List, Optional
-from app.services.data_loader import get_companies_df
-from app.models.schemas import Company, CompanyWithLinks
-from app.services.osint_links import get_company_links
-router = APIRouter()
-@router.get("/", response_model=List[Company])
-async def list_companies(
-    wilaya: Optional[str] = None,
-    group: Optional[str] = None,
-    type: Optional[str] = None,
-    search: Optional[str] = None,
-    limit: int = 50
-):
-    df = await get_companies_df()
-    if df.empty:
-        return []
-    if wilaya:
-        df = df[df['wilaya'] == wilaya]
-    if group:
-        df = df[df['activity_group'] == group]
-    if type:
-        df = df[df['type'] == type]
-    if search:
-        mask = df['name'].str.contains(search, na=False, case=False) | \
-               df['activity_normalized'].str.contains(search, na=False, case=False)
-        df = df[mask]
-    return df.head(limit).to_dict(orient='records')
-@router.get("/{company_id}", response_model=CompanyWithLinks)
-async def read_company(company_id: int):
-    df = await get_companies_df()
-    company = df[df['id'] == company_id]
-    if company.empty:
-        from fastapi import HTTPException
-        raise HTTPException(status_code=404, detail="Company not found")
-    data = company.iloc[0].to_dict()
-    data['osint_links'] = await get_company_links(company_id)
-    return data
-@router.get("/{company_id}/osint_links")
-async def read_company_links(company_id: int):
-    return await get_company_links(company_id)

+from fastapi import APIRouter, Query
+from typing import List, Optional
+from app.services.data_loader import get_companies_df
+from app.models.schemas import Company, CompanyWithLinks
+from app.services.osint_links import get_company_links
+router = APIRouter()
+@router.get("/", response_model=List[Company])
+async def list_companies(
+    wilaya: Optional[str] = None,
+    group: Optional[str] = None,
+    type: Optional[str] = None,
+    search: Optional[str] = None,
+    limit: int = 50
+):
+    df = await get_companies_df()
+    if df.empty:
+        return []
+    if wilaya:
+        df = df[df['wilaya'] == wilaya]
+    if group:
+        df = df[df['activity_group'] == group]
+    if type:
+        df = df[df['type'] == type]
+    if search:
+        mask = df['name'].str.contains(search, na=False, case=False) | \
+               df['activity_normalized'].str.contains(search, na=False, case=False)
+        df = df[mask]
+    return df.head(limit).to_dict(orient='records')
+@router.get("/{company_id}", response_model=CompanyWithLinks)
+async def read_company(company_id: int):
+    df = await get_companies_df()
+    company = df[df['id'] == company_id]
+    if company.empty:
+        from fastapi import HTTPException
+        raise HTTPException(status_code=404, detail="Company not found")
+    data = company.iloc[0].to_dict()
+    data['osint_links'] = await get_company_links(company_id)
+    return data
+@router.get("/{company_id}/osint_links")
+async def read_company_links(company_id: int):
+    return await get_company_links(company_id)

app/api/v1/investigate.py CHANGED Viewed

@@ -1,186 +1,189 @@
-"""
-Ba7ath Investigation Endpoint
-==============================
-POST /api/v1/investigate/{company_id}
-Cross-references Ahlya (CSV), JORT (DB), and RNE (DB) data via Local Rule-Based Engine.
-A ajouter de nouvelles sources comme ISIE.tn et marché public pour les appels d'offres publics.
-"""
-from fastapi import APIRouter, HTTPException, Depends
-from pydantic import BaseModel, Field
-from typing import Optional, List
-from datetime import datetime
-from sqlalchemy.orm import Session
-from app.core.supabase_client import get_user_client
-from app.services.llm_service import llm_service
-from app.services.data_loader import get_companies_df
-from app.services.auth_service import get_current_user
-from app.core.utils import clean_nans
-import logging
-logger = logging.getLogger("ba7ath.investigate")
-router = APIRouter()
-# ── Pydantic Response Models ─────────────────────────────────────────────
-class LLMAnalysis(BaseModel):
-    """The structured output from the analysis engine."""
-    match_score: int = Field(0, ge=0, le=100, description="Score de correspondance (0-100)")
-    status: str = Field("Pending", description="Verified | Suspicious | Conflict | Pending")
-    findings: List[str] = Field(default_factory=list, description="النقاط المتطابقة")
-    red_flags: List[str] = Field(default_factory=list, description="التجاوزات المرصودة")
-    summary_ar: str = Field("", description="ملخص التحقيق بالعربية")
-class InvestigationResult(BaseModel):
-    """Full investigation response."""
-    company_id: str
-    company_name: str
-    wilaya: str
-    analysis: LLMAnalysis
-    sources_used: List[str] = Field(default_factory=list)
-    analyzed_at: str
-    model_used: str = "rule-based-engine"
-# ── Helper: Extract Ahlya data from DB (Legacy named _get_ahlya_data) ──────────────────────────────────
-async def _get_ahlya_data(company_id: str, company_name: str) -> Optional[dict]:
-    """Find the company in the database via DataLoader."""
-    df = await get_companies_df()
-    if df is None or df.empty:
-        return None
-    # Match by 'id' if numeric
-    try:
-        numeric_id = int(company_id)
-        match = df[df["id"] == numeric_id]
-        if not match.empty:
-            return match.iloc[0].to_dict()
-    except ValueError:
-        pass
-    # Fallback to name matching (matching the logic in enriched_companies)
-    normalized_target = company_name.strip().upper()
-    # Note: df['name'] is already uppercase if normalized by SQL view, but safety first
-    match = df[df["name"].astype(str).str.strip().str.upper() == normalized_target]
-    if not match.empty:
-        return match.iloc[0].to_dict()
-    return None
-# ── Main Endpoint ────────────────────────────────────────────────────────
-@router.post(
-    "/{company_id}",
-    response_model=InvestigationResult,
-    summary="تحليل المقارنة المتقاطعة عبر محرك القواعد"
-)
-async def investigate_company(
-    company_id: str
-):
-    """
-    Cross-reference a company's data from Ahlya (DB), JORT (DB enrichment),
-    and RNE (DB enrichment) using a local Rule-Based comparison engine.
-    """
-    logger.info(f"📋 Investigation request for company_id: {company_id}")
-    # ── 1. Retrieve enriched data from Supabase ────────────────────────────
-    client = get_user_client() # Public read
-    resp = client.table("enriched_companies").select("*").eq("company_id", company_id).execute()
-    # Note: If not in enriched_companies, we'll try to build it from basic tables
-    enriched = resp.data[0] if resp.data else {}
-    company_name = enriched.get("company_name", "")
-    wilaya = enriched.get("wilaya", "")
-    enrichment_data = enriched.get("data", {})
-    # ── 2. Retrieve Ahlya/Unified data from DB ──────────────────────────
-    ahlya_data = await _get_ahlya_data(company_id, company_name)
-    if not company_name and ahlya_data:
-        company_name = ahlya_data.get("name", "")
-    if not wilaya and ahlya_data:
-        wilaya = ahlya_data.get("wilaya", "")
-    # Extract JORT and RNE from enrichment data or from unified fetch
-    jort_data = enrichment_data.get("jort", {})
-    if not jort_data and ahlya_data:
-        # Fallback to direct JORT data from joined view
-        jort_data = {
-            "jort_ref": ahlya_data.get("jort_ref"),
-            "jort_date": ahlya_data.get("jort_date"),
-            "capital_social": ahlya_data.get("jort_capital")
-        }
-    rne_data = enrichment_data.get("rne", {})
-    if not rne_data and ahlya_data:
-        # Fallback to RNE data from joined view
-        rne_data = {
-            "rne_id": ahlya_data.get("rne_id"),
-            "tax_id": ahlya_data.get("rne_tax_id"),
-            "capital": ahlya_data.get("rne_capital")
-        }
-    # Track which sources were used
-    sources_used = []
-    if ahlya_data:
-        sources_used.append("أهلية (DB)")
-    if jort_data and (jort_data.get("jort_ref") or jort_data.get("announcements")):
-        sources_used.append("الرائد الرسمي (JORT)")
-    if rne_data and (rne_data.get("tax_id") or rne_data.get("capital_social") or rne_data.get("capital")):
-        sources_used.append("السجل الوطني (RNE)")
-    if not sources_used:
-        raise HTTPException(
-            status_code=404,
-            detail="لا توجد بيانات كافية لإجراء التحليل المتقاطع"
-        )
-    # ── 3. Build the payload for the engine ───────────────────────────────
-    ahlya_payload = ahlya_data or {"company_name": company_name, "wilaya": wilaya}
-    jort_payload = jort_data
-    rne_payload = rne_data
-    # Apply clean_nans (Bouclier Moez Elbey)
-    ahlya_payload = clean_nans(ahlya_payload)
-    jort_payload = clean_nans(jort_payload)
-    rne_payload = clean_nans(rne_payload)
-    # ── 4. Call Local Engine Analysis ─────────────────────────────────────
-    logger.info(
-        f"🚀 Sending to Local Engine: company='{company_name}', "
-        f"sources={sources_used}"
-    )
-    raw_analysis = await llm_service.analyze_cross_check(
-        ahlya_data=ahlya_payload,
-        jort_data=jort_payload,
-        rne_data=rne_payload,
-    )
-    # Parse into Pydantic model (validates schema)
-    analysis = LLMAnalysis(
-        match_score=raw_analysis.get("match_score", 0),
-        status=raw_analysis.get("status", "Pending"),
-        findings=raw_analysis.get("findings", []),
-        red_flags=raw_analysis.get("red_flags", []),
-        summary_ar=raw_analysis.get("summary_ar", ""),
-    )
-    # ── 5. Build response ────────────────────────────────────────────────
-    return InvestigationResult(
-        company_id=company_id,
-        company_name=company_name or "Unknown",
-        wilaya=wilaya or "Unknown",
-        analysis=analysis,
-        sources_used=sources_used,
-        analyzed_at=datetime.utcnow().isoformat(),
-        model_used="rule-based-engine",
-    )

+"""
+Ba7ath Investigation Endpoint
+==============================
+POST /api/v1/investigate/{company_id}
+Cross-references Ahlya (CSV), JORT (DB), and RNE (DB) data via Local Rule-Based Engine.
+A ajouter de nouvelles sources comme ISIE.tn et marché public pour les appels d'offres publics.
+"""
+from fastapi import APIRouter, HTTPException, Depends
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from datetime import datetime
+from sqlalchemy.orm import Session
+from app.core.supabase_client import get_user_client
+from app.services.llm_service import llm_service
+from app.services.data_loader import get_companies_df
+from app.services.auth_service import get_current_user
+from app.core.utils import clean_nans
+import logging
+logger = logging.getLogger("ba7ath.investigate")
+router = APIRouter()
+# ── Pydantic Response Models ─────────────────────────────────────────────
+class LLMAnalysis(BaseModel):
+    """The structured output from the analysis engine."""
+    match_score: int = Field(0, ge=0, le=100, description="Score de correspondance (0-100)")
+    status: str = Field("Pending", description="Verified | Suspicious | Conflict | Pending")
+    findings: List[str] = Field(default_factory=list, description="النقاط المتطابقة")
+    red_flags: List[str] = Field(default_factory=list, description="التجاوزات المرصودة")
+    summary_ar: str = Field("", description="ملخص التحقيق بالعربية")
+class InvestigationResult(BaseModel):
+    """Full investigation response."""
+    company_id: str
+    company_name: str
+    wilaya: str
+    analysis: LLMAnalysis
+    sources_used: List[str] = Field(default_factory=list)
+    analyzed_at: str
+    model_used: str = "rule-based-engine"
+# ── Helper: Extract Ahlya data from CSV ──────────────────────────────────
+# ── Helper: Extract Ahlya data from DB (Legacy named _get_ahlya_data) ──────────────────────────────────
+async def _get_ahlya_data(company_id: str, company_name: str) -> Optional[dict]:
+    """Find the company in the database via DataLoader."""
+    df = await get_companies_df()
+    if df is None or df.empty:
+        return None
+    # Match by 'id' if numeric
+    try:
+        numeric_id = int(company_id)
+        match = df[df["id"] == numeric_id]
+        if not match.empty:
+            return match.iloc[0].to_dict()
+    except ValueError:
+        pass
+    # Fallback to name matching (matching the logic in enriched_companies)
+    normalized_target = company_name.strip().upper()
+    # Note: df['name'] is already uppercase if normalized by SQL view, but safety first
+    match = df[df["name"].astype(str).str.strip().str.upper() == normalized_target]
+    if not match.empty:
+        return match.iloc[0].to_dict()
+    return None
+# ── Main Endpoint ────────────────────────────────────────────────────────
+@router.post(
+    "/{company_id}",
+    response_model=InvestigationResult,
+    summary="تحليل المقارنة المتقاطعة عبر محرك القواعد"
+)
+async def investigate_company(
+    company_id: str
+):
+    """
+    Cross-reference a company's data from Ahlya (DB), JORT (DB enrichment),
+    and RNE (DB enrichment) using a local Rule-Based comparison engine.
+    """
+    logger.info(f"📋 Investigation request for company_id: {company_id}")
+    # ── 1. Retrieve enriched data from Supabase ────────────────────────────
+    client = get_user_client() # Public read
+    resp = client.table("enriched_companies").select("*").eq("company_id", company_id).execute()
+    # Note: If not in enriched_companies, we'll try to build it from basic tables
+    enriched = resp.data[0] if resp.data else {}
+    company_name = enriched.get("company_name", "")
+    wilaya = enriched.get("wilaya", "")
+    enrichment_data = enriched.get("data", {})
+    # ── 2. Retrieve Ahlya/Unified data from DB ──────────────────────────
+    ahlya_data = await _get_ahlya_data(company_id, company_name)
+    if not company_name and ahlya_data:
+        company_name = ahlya_data.get("name", "")
+    if not wilaya and ahlya_data:
+        wilaya = ahlya_data.get("wilaya", "")
+    # Extract JORT and RNE from enrichment data or from unified fetch
+    jort_data = enrichment_data.get("jort", {})
+    if not jort_data and ahlya_data:
+        # Fallback to direct JORT data from joined view
+        jort_data = {
+            "jort_ref": ahlya_data.get("jort_ref"),
+            "jort_date": ahlya_data.get("jort_date"),
+            "capital_social": ahlya_data.get("jort_capital")
+        }
+    rne_data = enrichment_data.get("rne", {})
+    if not rne_data and ahlya_data:
+        # Fallback to RNE data from joined view
+        rne_data = {
+            "rne_id": ahlya_data.get("rne_id"),
+            "tax_id": ahlya_data.get("rne_tax_id"),
+            "capital": ahlya_data.get("rne_capital")
+        }
+    # Track which sources were used
+    sources_used = []
+    if ahlya_data:
+        sources_used.append("أهلية (DB)")
+    if jort_data and (jort_data.get("jort_ref") or jort_data.get("announcements")):
+        sources_used.append("الرائد الرسمي (JORT)")
+    if rne_data and (rne_data.get("tax_id") or rne_data.get("capital_social") or rne_data.get("capital")):
+        sources_used.append("السجل الوطني (RNE)")
+    if not sources_used:
+        raise HTTPException(
+            status_code=404,
+            detail="لا توجد بيانات كافية لإجراء التحليل المتقاطع"
+        )
+    # ── 3. Build the payload for the engine ───────────────────────────────
+    ahlya_payload = ahlya_data or {"company_name": company_name, "wilaya": wilaya}
+    jort_payload = jort_data
+    rne_payload = rne_data
+    # Apply clean_nans (Bouclier Moez Elbey)
+    ahlya_payload = clean_nans(ahlya_payload)
+    jort_payload = clean_nans(jort_payload)
+    rne_payload = clean_nans(rne_payload)
+    # ── 4. Call Local Engine Analysis ─────────────────────────────────────
+    logger.info(
+        f"🚀 Sending to Local Engine: company='{company_name}', "
+        f"sources={sources_used}"
+    )
+    raw_analysis = await llm_service.analyze_cross_check(
+        ahlya_data=ahlya_payload,
+        jort_data=jort_payload,
+        rne_data=rne_payload,
+    )
+    # Parse into Pydantic model (validates schema)
+    analysis = LLMAnalysis(
+        match_score=raw_analysis.get("match_score", 0),
+        status=raw_analysis.get("status", "Pending"),
+        findings=raw_analysis.get("findings", []),
+        red_flags=raw_analysis.get("red_flags", []),
+        summary_ar=raw_analysis.get("summary_ar", ""),
+    )
+    # ── 5. Build response ────────────────────────────────────────────────
+    return InvestigationResult(
+        company_id=company_id,
+        company_name=company_name or "Unknown",
+        wilaya=wilaya or "Unknown",
+        analysis=analysis,
+        sources_used=sources_used,
+        analyzed_at=datetime.utcnow().isoformat(),
+        model_used="rule-based-engine",
+    )

app/api/v1/risk.py CHANGED Viewed

@@ -6,9 +6,9 @@ from app.models.schemas import WilayaRisk
 router = APIRouter()
 @router.get("/wilayas", response_model=List[WilayaRisk])
-def list_risks():
-    return get_all_risks()
 @router.get("/wilayas/{name}", response_model=WilayaRisk)
-def read_risk(name: str):
-    return get_risk_for_wilaya(name)

 router = APIRouter()
 @router.get("/wilayas", response_model=List[WilayaRisk])
+async def list_risks():
+    return await get_all_risks()
 @router.get("/wilayas/{name}", response_model=WilayaRisk)
+async def read_risk(name: str):
+    return await get_risk_for_wilaya(name)

app/api/v1/stats.py CHANGED Viewed

@@ -5,9 +5,9 @@ from app.models.schemas import NationalStats, WilayaStats
 router = APIRouter()
 @router.get("/national", response_model=NationalStats)
-def read_national_stats():
-    return get_national_stats()
 @router.get("/wilayas/{name}", response_model=WilayaStats)
-def read_wilaya_stats(name: str):
-    return get_wilaya_stats(name)

 router = APIRouter()
 @router.get("/national", response_model=NationalStats)
+async def read_national_stats():
+    return await get_national_stats()
 @router.get("/wilayas/{name}", response_model=WilayaStats)
+async def read_wilaya_stats(name: str):
+    return await get_wilaya_stats(name)

app/core/utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import math
 from typing import Any
 def clean_nans(obj: Any) -> Any:
@@ -11,6 +12,8 @@ def clean_nans(obj: Any) -> Any:
         if math.isnan(obj) or math.isinf(obj):
             return None
         return obj
     elif isinstance(obj, dict):
         return {k: clean_nans(v) for k, v in obj.items()}
     elif isinstance(obj, list):

 import math
+from decimal import Decimal
 from typing import Any
 def clean_nans(obj: Any) -> Any:
         if math.isnan(obj) or math.isinf(obj):
             return None
         return obj
+    elif isinstance(obj, Decimal):
+        return float(obj)
     elif isinstance(obj, dict):
         return {k: clean_nans(v) for k, v in obj.items()}
     elif isinstance(obj, list):

app/services/aggregation.py CHANGED Viewed

@@ -1,70 +1,72 @@
-from app.services.data_loader import get_companies_df, get_stats_data
-from app.models.schemas import NationalStats, WilayaStats
-def _safe_value_counts(df, col, head=None):
-    """Safely get value_counts for a column, returning {} if column doesn't exist."""
-    if col not in df.columns:
-        return {}
-    vc = df[col].dropna().value_counts()
-    if head:
-        vc = vc.head(head)
-    return vc.to_dict()
-async def get_national_stats():
-    stats = get_stats_data()
-    df = await get_companies_df()
-    total = stats.get("total", 0)
-    wilayas = stats.get("wilayas", {})
-    types = stats.get("types", {})
-    if not df.empty:
-        top_groups = _safe_value_counts(df, 'activity_group')
-        top_activities = _safe_value_counts(df, 'activity_normalized', head=10)
-    else:
-        top_groups = {}
-        top_activities = {}
-    return NationalStats(
-        total=total,
-        wilayas=wilayas,
-        types=types,
-        top_activities=top_activities,
-        top_groups=top_groups
-    )
-async def get_wilaya_stats(wilaya: str):
-    df = await get_companies_df()
-    stats = get_stats_data()
-    if df.empty:
-        return None
-    wilaya_df = df[df['wilaya'] == wilaya]
-    count = len(wilaya_df)
-    total = stats.get("total", 1)
-    pct = round((count / total) * 100, 1)
-    # Rank
-    sorted_wilayas = sorted(stats.get("wilayas", {}).items(), key=lambda x: x[1], reverse=True)
-    rank = next((i for i, (w, c) in enumerate(sorted_wilayas, 1) if w == wilaya), 0)
-    if not wilaya_df.empty:
-        top_groups = _safe_value_counts(wilaya_df, 'activity_group')
-        top_activities = _safe_value_counts(wilaya_df, 'activity_normalized', head=10)
-        types = _safe_value_counts(wilaya_df, 'type')
-    else:
-        top_groups = {}
-        top_activities = {}
-        types = {}
-    return WilayaStats(
-        wilaya=wilaya,
-        count=count,
-        pct_national=pct,
-        rank=rank,
-        types=types,
-        top_groups=top_groups,
-        top_activities=top_activities
-    )

+from app.services.data_loader import get_companies_df, get_stats_data
+from app.models.schemas import NationalStats, WilayaStats
+def _safe_value_counts(df, col, head=None):
+    """Safely get value_counts for a column, returning {} if column doesn't exist."""
+    if col not in df.columns:
+        return {}
+    vc = df[col].dropna().value_counts()
+    if head:
+        vc = vc.head(head)
+    return vc.to_dict()
+async def get_national_stats():
+    stats = get_stats_data()
+    df = await get_companies_df()
+    total = stats.get("total", 0)
+    wilayas = stats.get("wilayas", {})
+    types = stats.get("types", {})
+    if not df.empty:
+        top_groups = _safe_value_counts(df, 'activity_group')
+        top_activities = _safe_value_counts(df, 'activity_normalized', head=10)
+    else:
+        top_groups = {}
+        top_activities = {}
+    return NationalStats(
+        total=total,
+        wilayas=wilayas,
+        types=types,
+        top_activities=top_activities,
+        top_groups=top_groups
+    )
+async def get_wilaya_stats(wilaya: str):
+    df = await get_companies_df()
+    stats = get_stats_data()
+    if df.empty:
+        return None
+    wilaya_df = df[df['wilaya'] == wilaya]
+    count = len(wilaya_df)
+    total = stats.get("total", 1)
+    pct = round((count / total) * 100, 1)
+    # Rank
+    sorted_wilayas = sorted(stats.get("wilayas", {}).items(), key=lambda x: x[1], reverse=True)
+    rank = next((i for i, (w, c) in enumerate(sorted_wilayas, 1) if w == wilaya), 0)
+    if not wilaya_df.empty:
+        top_groups = _safe_value_counts(wilaya_df, 'activity_group')
+        top_activities = _safe_value_counts(wilaya_df, 'activity_normalized', head=10)
+        types = _safe_value_counts(wilaya_df, 'type')
+    else:
+        top_groups = {}
+        top_activities = {}
+        types = {}
+    return WilayaStats(
+        wilaya=wilaya,
+        count=count,
+        pct_national=pct,
+        rank=rank,
+        types=types,
+        top_groups=top_groups,
+        top_activities=top_activities
+    )

app/services/osint_links.py CHANGED Viewed

@@ -1,32 +1,33 @@
-import urllib.parse
-import os
-from dotenv import load_dotenv
-load_dotenv()
-INTERNAL_OSINT_MODE = os.getenv("INTERNAL_OSINT_MODE", "False").lower() == "true"
-def generate_links(company_name: str, wilaya: str):
-    base_name = urllib.parse.quote(company_name)
-    links = {
-        "Google": f"https://www.google.com/search?q={base_name} {wilaya} site:tn",
-        "Facebook": f"https://www.facebook.com/search/top?q={base_name}"
-    }
-    if INTERNAL_OSINT_MODE:
-        links["RNE"] = f"https://www.registre-entreprises.tn/search?q={base_name}" # Placeholder
-        links["JORT"] = f"http://www.iort.gov.tn/search?q={base_name}" # Placeholder
-    return links
-async def get_company_links(company_id: int):
-    from app.services.data_loader import get_companies_df
-    df = await get_companies_df()
-    company = df[df['id'] == company_id]
-    if company.empty:
-        return {}
-    row = company.iloc[0]
-    return generate_links(row['name'], row['wilaya'])

+import urllib.parse
+import os
+from dotenv import load_dotenv
+load_dotenv()
+INTERNAL_OSINT_MODE = os.getenv("INTERNAL_OSINT_MODE", "False").lower() == "true"
+def generate_links(company_name: str, wilaya: str):
+    base_name = urllib.parse.quote(company_name)
+    links = {
+        "Google": f"https://www.google.com/search?q={base_name} {wilaya} site:tn",
+        "Facebook": f"https://www.facebook.com/search/top?q={base_name}"
+    }
+    if INTERNAL_OSINT_MODE:
+        links["RNE"] = f"https://www.registre-entreprises.tn/search?q={base_name}" # Placeholder
+        links["JORT"] = f"http://www.iort.gov.tn/search?q={base_name}" # Placeholder
+    return links
+async def get_company_links(company_id: int):
+    from app.services.data_loader import get_companies_df
+    df = await get_companies_df()
+    company = df[df['id'] == company_id]
+    if company.empty:
+        return {}
+    row = company.iloc[0]
+    return generate_links(row['name'], row['wilaya'])

app/services/risk_engine.py CHANGED Viewed

@@ -1,150 +1,174 @@
-from app.services.data_loader import get_companies_df
-from app.models.schemas import WilayaRisk, Flag
-import numpy as np
-def generate_risk_commentary(wilaya_data: dict, risk_scores: dict) -> dict:
-    """
-    Génère des commentaires éditoriaux en arabe basés sur les scores de risque.
-    """
-    s1, s2, s3 = risk_scores['s1'], risk_scores['s2'], risk_scores['s3']
-    index = risk_scores['baath_index']
-    # Defaults
-    level = "LOW"
-    level_ar = "منخفض"
-    color = "emerald"
-    if index >= 70:
-        level = "HIGH"
-        level_ar = "مرتفع"
-        color = "red"
-    elif index >= 40:
-        level = "MEDIUM"
-        level_ar = "متوسط"
-        color = "amber"
-    comments = []
-    # S1 - Dépendance
-    if s1 > 0.6:
-        dominant_groups = [g for g, count in wilaya_data['groups'].items()
-                           if g in ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
-                           and count / (sum(wilaya_data['groups'].values()) or 1) > 0.3]
-        if dominant_groups:
-             comments.append(f"الولاية تعتمد بشكل كبير على الأنشطة المرتبطة بالموارد العمومية ({', '.join(dominant_groups)})")
-    # S2 - Concentration
-    if s2 > 0.7:
-        if wilaya_data['groups']:
-            top_group = max(wilaya_data['groups'].items(), key=lambda x: x[1])[0]
-            pct = (wilaya_data['groups'][top_group] / (sum(wilaya_data['groups'].values()) or 1)) * 100
-            comments.append(f"تركيز عالٍ جدا في مجموعة نشاط واحدة ({top_group}: {pct:.0f}%)")
-    elif s2 > 0.5:
-        comments.append("تركيز ملحوظ في عدد محدود من القطاعات")
-    # S3 - Gouvernance
-    if s3 > 0.5:
-        total_types = sum(wilaya_data['types'].values()) or 1
-        local_pct = (wilaya_data['types'].get('محلية', 0) / total_types) * 100
-        regional_pct = (wilaya_data['types'].get('جهوية', 0) / total_types) * 100
-        comments.append(f"اختلال واضح في الحوكمة: {local_pct:.0f}% محلية مقابل {regional_pct:.0f}% جهوية")
-    # Recommendations
-    recommendations = []
-    if s1 > 0.6:
-        recommendations.append("التحقق من الأراضي الدولية المُسندة (OTD)")
-        recommendations.append("البحث في صفقات التطهير والبيئة (TUNEPS)")
-    if s2 > 0.7:
-        recommendations.append("تحليل الاحتكارات القطاعية المحتملة")
-    if s3 > 0.5:
-        recommendations.append("مراجعة التوازن بين المحلي والجهوي في تركيبة مجالس الإدارة")
-    if index > 70:
-        recommendations.append("يُنصح بتحقيق صحفي معمق على هذه الولاية")
-    return {
-        "level": level,
-        "level_ar": level_ar,
-        "color": color,
-        "comment_ar": " · ".join(comments) if comments else "لا توجد إشارات خطر واضحة في البيانات الحالية",
-        "recommendations": recommendations
-    }
-def compute_baath_index_v2(wilaya_df):
-    """
-    Computes Ba7ath Index (0-100) using continuous formula:
-    INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
-    """
-    if wilaya_df.empty:
-        return 0.0, 0.0, 0.0, 0.0, []
-    total = len(wilaya_df)
-    flags = []
-    # --- s1: Resource Dependency ---
-    resource_groups = ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
-    resource_count = wilaya_df[wilaya_df['activity_group'].isin(resource_groups)].shape[0]
-    s1 = resource_count / total if total > 0 else 0.0
-    if s1 > 0.6:
-        flags.append(Flag(code="RESOURCE_DEPENDENT", severity="high", label_ar="اعتماد كبير على الأنشطة المرتبطة بالموارد العمومية"))
-    # --- s2: Sector Concentration ---
-    group_counts = wilaya_df['activity_group'].value_counts(normalize=True)
-    s2 = group_counts.max() if not group_counts.empty else 0.0
-    if s2 > 0.7:
-        flags.append(Flag(code="ULTRA_CONCENTRATION", severity="medium", label_ar="تركيز عالٍ في مجموعة نشاط واحدة"))
-    # --- s3: Governance Imbalance ---
-    type_counts = wilaya_df['type'].value_counts(normalize=True)
-    pct_local = type_counts.get('محلية', 0.0)
-    pct_regional = type_counts.get('جهوية', 0.0)
-    s3 = abs(pct_local - pct_regional)
-    if s3 > 0.5:
-        flags.append(Flag(code="GOVERNANCE_IMBALANCE", severity="low", label_ar="اختلال واضح بين الشركات المحلية والجهوية"))
-    # --- Final Score ---
-    raw_index = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
-    baath_index = round(min(raw_index, 100), 1)
-    details = {
-        'groups': wilaya_df['activity_group'].value_counts().to_dict(),
-        'types': wilaya_df['type'].value_counts().to_dict()
-    }
-    return baath_index, round(s1, 2), round(s2, 2), round(s3, 2), flags, details
-async def get_risk_for_wilaya(wilaya: str):
-    df = await get_companies_df()
-    if df.empty:
-        return None
-    wilaya_df = df[df['wilaya'] == wilaya]
-    if wilaya_df.empty:
-        return WilayaRisk(
-            wilaya=wilaya, baath_index=0, s1=0, s2=0, s3=0, flags=[],
-            level="LOW", level_ar="منخفض", color="emerald",
-            comment_ar="لا توجد بيانات كافية", recommendations=[]
-        )
-    score, s1, s2, s3, flags, details = compute_baath_index_v2(wilaya_df)
-    editorial = generate_risk_commentary(details, {'s1': s1, 's2': s2, 's3': s3, 'baath_index': score})
-    return WilayaRisk(
-        wilaya=wilaya, baath_index=score, s1=s1, s2=s2, s3=s3, flags=flags, **editorial
-    )
-async def get_all_risks():
-    df = await get_companies_df()
-    if df.empty:
-        return []
-    risks = []
-    wilayas = df['wilaya'].unique()
-    for wilaya in wilayas:
-        risk = await get_risk_for_wilaya(wilaya)
-        if risk:
-            risks.append(risk)
-    return sorted(risks, key=lambda x: x.baath_index, reverse=True)

+from app.services.data_loader import get_companies_df
+from app.models.schemas import WilayaRisk, Flag
+import numpy as np
+def generate_risk_commentary(wilaya_data: dict, risk_scores: dict) -> dict:
+    """
+    Génère des commentaires éditoriaux en arabe basés sur les scores de risque.
+    """
+    s1, s2, s3 = risk_scores['s1'], risk_scores['s2'], risk_scores['s3']
+    index = risk_scores['baath_index']
+    # Defaults
+    level = "LOW"
+    level_ar = "منخفض"
+    color = "emerald"
+    if index >= 70:
+        level = "HIGH"
+        level_ar = "مرتفع"
+        color = "red"
+    elif index >= 40:
+        level = "MEDIUM"
+        level_ar = "متوسط"
+        color = "amber"
+    comments = []
+    # S1 - Dépendance
+    if s1 > 0.6: # lowered threshold slightly to match prompt logic 0.7 or 0.6 inconsistency
+        # Prompt said > 0.7 but code example used 0.7. Let's stick to prompt code example logic if possible but use safe checks.
+        dominant_groups = [g for g, count in wilaya_data['groups'].items()
+                           if g in ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
+                           and count / (sum(wilaya_data['groups'].values()) or 1) > 0.3]
+        if dominant_groups:
+             comments.append(f"الولاية تعتمد بشكل كبير على الأنشطة المرتبطة بالموارد العمومية ({', '.join(dominant_groups)})")
+    # S2 - Concentration
+    if s2 > 0.7:
+        if wilaya_data['groups']:
+            top_group = max(wilaya_data['groups'].items(), key=lambda x: x[1])[0]
+            pct = (wilaya_data['groups'][top_group] / (sum(wilaya_data['groups'].values()) or 1)) * 100
+            comments.append(f"تركيز عالٍ جدا في مجموعة نشاط واحدة ({top_group}: {pct:.0f}%)")
+    elif s2 > 0.5:
+        comments.append("تركيز ملحوظ في عدد محدود من القطاعات")
+    # S3 - Gouvernance
+    if s3 > 0.5: # Prompt threshold was 0.6 in general description but 0.5 in code example for flag.
+        total_types = sum(wilaya_data['types'].values()) or 1
+        local_pct = (wilaya_data['types'].get('محلية', 0) / total_types) * 100
+        regional_pct = (wilaya_data['types'].get('جهوية', 0) / total_types) * 100
+        comments.append(f"اختلال واضح في الحوكمة: {local_pct:.0f}% محلية مقابل {regional_pct:.0f}% جهوية")
+    # Recommendations
+    recommendations = []
+    if s1 > 0.6:
+        recommendations.append("التحقق من الأراضي الدولية المُسندة (OTD)")
+        recommendations.append("البحث في صفقات التطهير والبيئة (TUNEPS)")
+    if s2 > 0.7:
+        recommendations.append("تحليل الاحتكارات القطاعية المحتملة")
+    if s3 > 0.5:
+        recommendations.append("مراجعة التوازن بين المحلي والجهوي في تركيبة مجالس الإدارة")
+    if index > 70:
+        recommendations.append("يُنصح بتحقيق صحفي معمق على هذه الولاية")
+    return {
+        "level": level,
+        "level_ar": level_ar,
+        "color": color,
+        "comment_ar": " · ".join(comments) if comments else "لا توجد إشارات خطر واضحة في البيانات الحالية",
+        "recommendations": recommendations
+    }
+def compute_baath_index_v2(wilaya_df):
+    """
+    Computes Ba7ath Index (0-100) using continuous formula:
+    INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
+    s1: Dependency on public-resource sectors (AGRI, ENV, MINES)
+    s2: Sector concentration (Max share of any group)
+    s3: Governance imbalance (abs(local - regional))
+    """
+    if wilaya_df.empty:
+        return 0.0, 0.0, 0.0, 0.0, []
+    total = len(wilaya_df)
+    flags = []
+    # --- s1: Resource Dependency ---
+    # Groups: AGRI_NATUREL, ENVIRONNEMENT, ENERGIE_MINES
+    resource_groups = ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
+    resource_count = wilaya_df[wilaya_df['activity_group'].isin(resource_groups)].shape[0]
+    s1 = resource_count / total if total > 0 else 0.0
+    if s1 > 0.6:
+        flags.append(Flag(code="RESOURCE_DEPENDENT", severity="high", label_ar="اعتماد كبير على الأنشطة المرتبطة بالموارد العمومية"))
+    # --- s2: Sector Concentration ---
+    # Max share of any single group
+    group_counts = wilaya_df['activity_group'].value_counts(normalize=True)
+    s2 = group_counts.max() if not group_counts.empty else 0.0
+    if s2 > 0.7:
+        flags.append(Flag(code="ULTRA_CONCENTRATION", severity="medium", label_ar="تركيز عالٍ في مجموعة نشاط واحدة"))
+    # --- s3: Governance Imbalance ---
+    # abs(% local - % regional)
+    type_counts = wilaya_df['type'].value_counts(normalize=True)
+    pct_local = type_counts.get('محلية', 0.0)
+    pct_regional = type_counts.get('جهوية', 0.0)
+    s3 = abs(pct_local - pct_regional)
+    if s3 > 0.5:
+        flags.append(Flag(code="GOVERNANCE_IMBALANCE", severity="low", label_ar="اختلال واضح بين الشركات المحلية والجهوية"))
+    # --- Final Score ---
+    # INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
+    raw_index = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
+    baath_index = round(min(raw_index, 100), 1)
+    # Return details for commentary
+    details = {
+        'groups': wilaya_df['activity_group'].value_counts().to_dict(),
+        'types': wilaya_df['type'].value_counts().to_dict()
+    }
+    return baath_index, round(s1, 2), round(s2, 2), round(s3, 2), flags, details
+async def get_risk_for_wilaya(wilaya: str):
+    df = await get_companies_df()
+    if df.empty:
+        return None
+    wilaya_df = df[df['wilaya'] == wilaya]
+    if wilaya_df.empty:
+        # Return neutral risk if no companies
+        return WilayaRisk(
+            wilaya=wilaya, baath_index=0, s1=0, s2=0, s3=0, flags=[],
+            level="LOW", level_ar="منخفض", color="emerald",
+            comment_ar="لا توجد بيانات كافية", recommendations=[]
+        )
+    score, s1, s2, s3, flags, details = compute_baath_index_v2(wilaya_df)
+    # Generate commentary
+    editorial = generate_risk_commentary(details, {
+        's1': s1, 's2': s2, 's3': s3, 'baath_index': score
+    })
+    return WilayaRisk(
+        wilaya=wilaya,
+        baath_index=score,
+        s1=s1,
+        s2=s2,
+        s3=s3,
+        flags=flags,
+        **editorial
+    )
+async def get_all_risks():
+    df = await get_companies_df()
+    if df.empty:
+        return []
+    risks = []
+    # Collect unique wilayas
+    wilayas = df['wilaya'].unique()
+    for wilaya in wilayas:
+        risk = await get_risk_for_wilaya(wilaya)
+        if risk:
+            risks.append(risk)
+    return sorted(risks, key=lambda x: x.baath_index, reverse=True)