Spaces:
Running
Running
Commit ·
91de656
1
Parent(s): 93aec95
Fix et migration database
Browse files- app/api/v1/auth.py +125 -125
- app/api/v1/companies.py +49 -48
- app/api/v1/investigate.py +189 -186
- app/api/v1/risk.py +4 -4
- app/api/v1/stats.py +4 -4
- app/core/utils.py +3 -0
- app/services/aggregation.py +72 -70
- app/services/osint_links.py +33 -32
- app/services/risk_engine.py +174 -150
app/api/v1/auth.py
CHANGED
|
@@ -1,125 +1,125 @@
|
|
| 1 |
-
from fastapi import APIRouter, Depends, HTTPException
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
-
from typing import Optional, List
|
| 4 |
-
|
| 5 |
-
from app.core.supabase_client import get_user_client, get_admin_client
|
| 6 |
-
from app.services.auth_service import get_current_active_user, get_current_admin_user, AuthenticatedUserInfo
|
| 7 |
-
|
| 8 |
-
router = APIRouter()
|
| 9 |
-
|
| 10 |
-
class UserCreate(BaseModel):
|
| 11 |
-
email: str
|
| 12 |
-
password: str
|
| 13 |
-
full_name: str
|
| 14 |
-
is_admin: bool = False
|
| 15 |
-
|
| 16 |
-
class UserUpdate(BaseModel):
|
| 17 |
-
email: Optional[str] = None
|
| 18 |
-
password: Optional[str] = None
|
| 19 |
-
full_name: Optional[str] = None
|
| 20 |
-
is_admin: Optional[bool] = None
|
| 21 |
-
|
| 22 |
-
@router.get("/me")
|
| 23 |
-
async def read_users_me(
|
| 24 |
-
current_user: AuthenticatedUserInfo = Depends(get_current_active_user)
|
| 25 |
-
):
|
| 26 |
-
"""
|
| 27 |
-
Example endpoint showing how to query information on behalf of the user,
|
| 28 |
-
forcing Supabase to apply Row Level Security (RLS) via their JWT.
|
| 29 |
-
"""
|
| 30 |
-
try:
|
| 31 |
-
# Initialize client with the user's JWT
|
| 32 |
-
client = get_user_client(current_user.jwt)
|
| 33 |
-
|
| 34 |
-
# This will securely return only the row matching `auth.uid() = auth_user_id`
|
| 35 |
-
response = (
|
| 36 |
-
client.table("users")
|
| 37 |
-
.select("*")
|
| 38 |
-
.eq("auth_user_id", current_user.user_id)
|
| 39 |
-
.single()
|
| 40 |
-
.execute()
|
| 41 |
-
)
|
| 42 |
-
return response.data
|
| 43 |
-
except Exception as e:
|
| 44 |
-
# Note: If RLS prevents reading, Supabase might return a PostgREST error.
|
| 45 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 46 |
-
|
| 47 |
-
@router.get("/users")
|
| 48 |
-
async def list_users(current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)):
|
| 49 |
-
try:
|
| 50 |
-
admin_client = get_admin_client()
|
| 51 |
-
response = admin_client.auth.admin.list_users()
|
| 52 |
-
|
| 53 |
-
users = []
|
| 54 |
-
for u in response:
|
| 55 |
-
metadata = getattr(u, "user_metadata", {}) or {}
|
| 56 |
-
users.append({
|
| 57 |
-
"id": u.id,
|
| 58 |
-
"email": u.email,
|
| 59 |
-
"full_name": metadata.get("full_name", ""),
|
| 60 |
-
"is_admin": metadata.get("is_admin", False),
|
| 61 |
-
"created_at": str(u.created_at)
|
| 62 |
-
})
|
| 63 |
-
return users
|
| 64 |
-
except Exception as e:
|
| 65 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 66 |
-
|
| 67 |
-
@router.post("/users")
|
| 68 |
-
async def create_user(
|
| 69 |
-
user: UserCreate,
|
| 70 |
-
current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
|
| 71 |
-
):
|
| 72 |
-
try:
|
| 73 |
-
admin_client = get_admin_client()
|
| 74 |
-
response = admin_client.auth.admin.create_user({
|
| 75 |
-
"email": user.email,
|
| 76 |
-
"password": user.password,
|
| 77 |
-
"email_confirm": True,
|
| 78 |
-
"user_metadata": {
|
| 79 |
-
"full_name": user.full_name,
|
| 80 |
-
"is_admin": user.is_admin
|
| 81 |
-
}
|
| 82 |
-
})
|
| 83 |
-
return {"id": response.user.id, "email": response.user.email}
|
| 84 |
-
except Exception as e:
|
| 85 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 86 |
-
|
| 87 |
-
@router.patch("/users/{user_id}")
|
| 88 |
-
async def update_user(
|
| 89 |
-
user_id: str,
|
| 90 |
-
user_update: UserUpdate,
|
| 91 |
-
current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
|
| 92 |
-
):
|
| 93 |
-
try:
|
| 94 |
-
admin_client = get_admin_client()
|
| 95 |
-
attributes = {}
|
| 96 |
-
if user_update.email is not None:
|
| 97 |
-
attributes["email"] = user_update.email
|
| 98 |
-
if user_update.password:
|
| 99 |
-
attributes["password"] = user_update.password
|
| 100 |
-
|
| 101 |
-
user_metadata = {}
|
| 102 |
-
if user_update.full_name is not None:
|
| 103 |
-
user_metadata["full_name"] = user_update.full_name
|
| 104 |
-
if user_update.is_admin is not None:
|
| 105 |
-
user_metadata["is_admin"] = user_update.is_admin
|
| 106 |
-
|
| 107 |
-
if user_metadata:
|
| 108 |
-
attributes["user_metadata"] = user_metadata
|
| 109 |
-
|
| 110 |
-
response = admin_client.auth.admin.update_user_by_id(user_id, attributes)
|
| 111 |
-
return {"id": response.user.id}
|
| 112 |
-
except Exception as e:
|
| 113 |
-
raise HTTPException(status_code=400, detail=str(e))
|
| 114 |
-
|
| 115 |
-
@router.delete("/users/{user_id}")
|
| 116 |
-
async def delete_user(
|
| 117 |
-
user_id: str,
|
| 118 |
-
current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
|
| 119 |
-
):
|
| 120 |
-
try:
|
| 121 |
-
admin_client = get_admin_client()
|
| 122 |
-
admin_client.auth.admin.delete_user(user_id)
|
| 123 |
-
return {"status": "success"}
|
| 124 |
-
except Exception as e:
|
| 125 |
-
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Optional, List
|
| 4 |
+
|
| 5 |
+
from app.core.supabase_client import get_user_client, get_admin_client
|
| 6 |
+
from app.services.auth_service import get_current_active_user, get_current_admin_user, AuthenticatedUserInfo
|
| 7 |
+
|
| 8 |
+
router = APIRouter()
|
| 9 |
+
|
| 10 |
+
class UserCreate(BaseModel):
|
| 11 |
+
email: str
|
| 12 |
+
password: str
|
| 13 |
+
full_name: str
|
| 14 |
+
is_admin: bool = False
|
| 15 |
+
|
| 16 |
+
class UserUpdate(BaseModel):
|
| 17 |
+
email: Optional[str] = None
|
| 18 |
+
password: Optional[str] = None
|
| 19 |
+
full_name: Optional[str] = None
|
| 20 |
+
is_admin: Optional[bool] = None
|
| 21 |
+
|
| 22 |
+
@router.get("/me")
|
| 23 |
+
async def read_users_me(
|
| 24 |
+
current_user: AuthenticatedUserInfo = Depends(get_current_active_user)
|
| 25 |
+
):
|
| 26 |
+
"""
|
| 27 |
+
Example endpoint showing how to query information on behalf of the user,
|
| 28 |
+
forcing Supabase to apply Row Level Security (RLS) via their JWT.
|
| 29 |
+
"""
|
| 30 |
+
try:
|
| 31 |
+
# Initialize client with the user's JWT
|
| 32 |
+
client = get_user_client(current_user.jwt)
|
| 33 |
+
|
| 34 |
+
# This will securely return only the row matching `auth.uid() = auth_user_id`
|
| 35 |
+
response = (
|
| 36 |
+
client.table("users")
|
| 37 |
+
.select("*")
|
| 38 |
+
.eq("auth_user_id", current_user.user_id)
|
| 39 |
+
.single()
|
| 40 |
+
.execute()
|
| 41 |
+
)
|
| 42 |
+
return response.data
|
| 43 |
+
except Exception as e:
|
| 44 |
+
# Note: If RLS prevents reading, Supabase might return a PostgREST error.
|
| 45 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 46 |
+
|
| 47 |
+
@router.get("/users")
|
| 48 |
+
async def list_users(current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)):
|
| 49 |
+
try:
|
| 50 |
+
admin_client = get_admin_client()
|
| 51 |
+
response = admin_client.auth.admin.list_users()
|
| 52 |
+
|
| 53 |
+
users = []
|
| 54 |
+
for u in response:
|
| 55 |
+
metadata = getattr(u, "user_metadata", {}) or {}
|
| 56 |
+
users.append({
|
| 57 |
+
"id": u.id,
|
| 58 |
+
"email": u.email,
|
| 59 |
+
"full_name": metadata.get("full_name", ""),
|
| 60 |
+
"is_admin": metadata.get("is_admin", False),
|
| 61 |
+
"created_at": str(u.created_at)
|
| 62 |
+
})
|
| 63 |
+
return users
|
| 64 |
+
except Exception as e:
|
| 65 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 66 |
+
|
| 67 |
+
@router.post("/users")
|
| 68 |
+
async def create_user(
|
| 69 |
+
user: UserCreate,
|
| 70 |
+
current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
|
| 71 |
+
):
|
| 72 |
+
try:
|
| 73 |
+
admin_client = get_admin_client()
|
| 74 |
+
response = admin_client.auth.admin.create_user({
|
| 75 |
+
"email": user.email,
|
| 76 |
+
"password": user.password,
|
| 77 |
+
"email_confirm": True,
|
| 78 |
+
"user_metadata": {
|
| 79 |
+
"full_name": user.full_name,
|
| 80 |
+
"is_admin": user.is_admin
|
| 81 |
+
}
|
| 82 |
+
})
|
| 83 |
+
return {"id": response.user.id, "email": response.user.email}
|
| 84 |
+
except Exception as e:
|
| 85 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 86 |
+
|
| 87 |
+
@router.patch("/users/{user_id}")
|
| 88 |
+
async def update_user(
|
| 89 |
+
user_id: str,
|
| 90 |
+
user_update: UserUpdate,
|
| 91 |
+
current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
|
| 92 |
+
):
|
| 93 |
+
try:
|
| 94 |
+
admin_client = get_admin_client()
|
| 95 |
+
attributes = {}
|
| 96 |
+
if user_update.email is not None:
|
| 97 |
+
attributes["email"] = user_update.email
|
| 98 |
+
if user_update.password:
|
| 99 |
+
attributes["password"] = user_update.password
|
| 100 |
+
|
| 101 |
+
user_metadata = {}
|
| 102 |
+
if user_update.full_name is not None:
|
| 103 |
+
user_metadata["full_name"] = user_update.full_name
|
| 104 |
+
if user_update.is_admin is not None:
|
| 105 |
+
user_metadata["is_admin"] = user_update.is_admin
|
| 106 |
+
|
| 107 |
+
if user_metadata:
|
| 108 |
+
attributes["user_metadata"] = user_metadata
|
| 109 |
+
|
| 110 |
+
response = admin_client.auth.admin.update_user_by_id(user_id, attributes)
|
| 111 |
+
return {"id": response.user.id}
|
| 112 |
+
except Exception as e:
|
| 113 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 114 |
+
|
| 115 |
+
@router.delete("/users/{user_id}")
|
| 116 |
+
async def delete_user(
|
| 117 |
+
user_id: str,
|
| 118 |
+
current_admin: AuthenticatedUserInfo = Depends(get_current_admin_user)
|
| 119 |
+
):
|
| 120 |
+
try:
|
| 121 |
+
admin_client = get_admin_client()
|
| 122 |
+
admin_client.auth.admin.delete_user(user_id)
|
| 123 |
+
return {"status": "success"}
|
| 124 |
+
except Exception as e:
|
| 125 |
+
raise HTTPException(status_code=400, detail=str(e))
|
app/api/v1/companies.py
CHANGED
|
@@ -1,48 +1,49 @@
|
|
| 1 |
-
from fastapi import APIRouter, Query
|
| 2 |
-
from typing import List, Optional
|
| 3 |
-
from app.services.data_loader import get_companies_df
|
| 4 |
-
from app.models.schemas import Company, CompanyWithLinks
|
| 5 |
-
from app.services.osint_links import get_company_links
|
| 6 |
-
|
| 7 |
-
router = APIRouter()
|
| 8 |
-
|
| 9 |
-
@router.get("/", response_model=List[Company])
|
| 10 |
-
async def list_companies(
|
| 11 |
-
wilaya: Optional[str] = None,
|
| 12 |
-
group: Optional[str] = None,
|
| 13 |
-
type: Optional[str] = None,
|
| 14 |
-
search: Optional[str] = None,
|
| 15 |
-
limit: int = 50
|
| 16 |
-
):
|
| 17 |
-
df = await get_companies_df()
|
| 18 |
-
if df.empty:
|
| 19 |
-
return []
|
| 20 |
-
|
| 21 |
-
if wilaya:
|
| 22 |
-
df = df[df['wilaya'] == wilaya]
|
| 23 |
-
if group:
|
| 24 |
-
df = df[df['activity_group'] == group]
|
| 25 |
-
if type:
|
| 26 |
-
df = df[df['type'] == type]
|
| 27 |
-
if search:
|
| 28 |
-
mask = df['name'].str.contains(search, na=False, case=False) | \
|
| 29 |
-
df['activity_normalized'].str.contains(search, na=False, case=False)
|
| 30 |
-
df = df[mask]
|
| 31 |
-
|
| 32 |
-
return df.head(limit).to_dict(orient='records')
|
| 33 |
-
|
| 34 |
-
@router.get("/{company_id}", response_model=CompanyWithLinks)
|
| 35 |
-
async def read_company(company_id: int):
|
| 36 |
-
df = await get_companies_df()
|
| 37 |
-
company = df[df['id'] == company_id]
|
| 38 |
-
if company.empty:
|
| 39 |
-
from fastapi import HTTPException
|
| 40 |
-
raise HTTPException(status_code=404, detail="Company not found")
|
| 41 |
-
|
| 42 |
-
data = company.iloc[0].to_dict()
|
| 43 |
-
data['osint_links'] = await get_company_links(company_id)
|
| 44 |
-
return data
|
| 45 |
-
|
| 46 |
-
@router.get("/{company_id}/osint_links")
|
| 47 |
-
async def read_company_links(company_id: int):
|
| 48 |
-
return await get_company_links(company_id)
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Query
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
from app.services.data_loader import get_companies_df
|
| 4 |
+
from app.models.schemas import Company, CompanyWithLinks
|
| 5 |
+
from app.services.osint_links import get_company_links
|
| 6 |
+
|
| 7 |
+
router = APIRouter()
|
| 8 |
+
|
| 9 |
+
@router.get("/", response_model=List[Company])
|
| 10 |
+
async def list_companies(
|
| 11 |
+
wilaya: Optional[str] = None,
|
| 12 |
+
group: Optional[str] = None,
|
| 13 |
+
type: Optional[str] = None,
|
| 14 |
+
search: Optional[str] = None,
|
| 15 |
+
limit: int = 50
|
| 16 |
+
):
|
| 17 |
+
df = await get_companies_df()
|
| 18 |
+
if df.empty:
|
| 19 |
+
return []
|
| 20 |
+
|
| 21 |
+
if wilaya:
|
| 22 |
+
df = df[df['wilaya'] == wilaya]
|
| 23 |
+
if group:
|
| 24 |
+
df = df[df['activity_group'] == group]
|
| 25 |
+
if type:
|
| 26 |
+
df = df[df['type'] == type]
|
| 27 |
+
if search:
|
| 28 |
+
mask = df['name'].str.contains(search, na=False, case=False) | \
|
| 29 |
+
df['activity_normalized'].str.contains(search, na=False, case=False)
|
| 30 |
+
df = df[mask]
|
| 31 |
+
|
| 32 |
+
return df.head(limit).to_dict(orient='records')
|
| 33 |
+
|
| 34 |
+
@router.get("/{company_id}", response_model=CompanyWithLinks)
|
| 35 |
+
async def read_company(company_id: int):
|
| 36 |
+
df = await get_companies_df()
|
| 37 |
+
company = df[df['id'] == company_id]
|
| 38 |
+
if company.empty:
|
| 39 |
+
from fastapi import HTTPException
|
| 40 |
+
raise HTTPException(status_code=404, detail="Company not found")
|
| 41 |
+
|
| 42 |
+
data = company.iloc[0].to_dict()
|
| 43 |
+
data['osint_links'] = await get_company_links(company_id)
|
| 44 |
+
return data
|
| 45 |
+
|
| 46 |
+
@router.get("/{company_id}/osint_links")
|
| 47 |
+
async def read_company_links(company_id: int):
|
| 48 |
+
return await get_company_links(company_id)
|
| 49 |
+
|
app/api/v1/investigate.py
CHANGED
|
@@ -1,186 +1,189 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Ba7ath Investigation Endpoint
|
| 3 |
-
==============================
|
| 4 |
-
POST /api/v1/investigate/{company_id}
|
| 5 |
-
|
| 6 |
-
Cross-references Ahlya (CSV), JORT (DB), and RNE (DB) data via Local Rule-Based Engine.
|
| 7 |
-
A ajouter de nouvelles sources comme ISIE.tn et marché public pour les appels d'offres publics.
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
-
from fastapi import APIRouter, HTTPException, Depends
|
| 11 |
-
from pydantic import BaseModel, Field
|
| 12 |
-
from typing import Optional, List
|
| 13 |
-
from datetime import datetime
|
| 14 |
-
from sqlalchemy.orm import Session
|
| 15 |
-
|
| 16 |
-
from app.core.supabase_client import get_user_client
|
| 17 |
-
from app.services.llm_service import llm_service
|
| 18 |
-
from app.services.data_loader import get_companies_df
|
| 19 |
-
from app.services.auth_service import get_current_user
|
| 20 |
-
from app.core.utils import clean_nans
|
| 21 |
-
|
| 22 |
-
import logging
|
| 23 |
-
|
| 24 |
-
logger = logging.getLogger("ba7ath.investigate")
|
| 25 |
-
|
| 26 |
-
router = APIRouter()
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
# ── Pydantic Response Models ─────────────────────────────────────────────
|
| 30 |
-
|
| 31 |
-
class LLMAnalysis(BaseModel):
|
| 32 |
-
"""The structured output from the analysis engine."""
|
| 33 |
-
match_score: int = Field(0, ge=0, le=100, description="Score de correspondance (0-100)")
|
| 34 |
-
status: str = Field("Pending", description="Verified | Suspicious | Conflict | Pending")
|
| 35 |
-
findings: List[str] = Field(default_factory=list, description="النقاط المتطابقة")
|
| 36 |
-
red_flags: List[str] = Field(default_factory=list, description="التجاوزات المرصودة")
|
| 37 |
-
summary_ar: str = Field("", description="ملخص التحقيق بالعربية")
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
class InvestigationResult(BaseModel):
|
| 41 |
-
"""Full investigation response."""
|
| 42 |
-
company_id: str
|
| 43 |
-
company_name: str
|
| 44 |
-
wilaya: str
|
| 45 |
-
analysis: LLMAnalysis
|
| 46 |
-
sources_used: List[str] = Field(default_factory=list)
|
| 47 |
-
analyzed_at: str
|
| 48 |
-
model_used: str = "rule-based-engine"
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
# ── Helper: Extract Ahlya data from
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
#
|
| 71 |
-
|
| 72 |
-
if
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
if not
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
"
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
"
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
if
|
| 136 |
-
sources_used.append("
|
| 137 |
-
if
|
| 138 |
-
sources_used.append("ال
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Ba7ath Investigation Endpoint
|
| 3 |
+
==============================
|
| 4 |
+
POST /api/v1/investigate/{company_id}
|
| 5 |
+
|
| 6 |
+
Cross-references Ahlya (CSV), JORT (DB), and RNE (DB) data via Local Rule-Based Engine.
|
| 7 |
+
A ajouter de nouvelles sources comme ISIE.tn et marché public pour les appels d'offres publics.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from fastapi import APIRouter, HTTPException, Depends
|
| 11 |
+
from pydantic import BaseModel, Field
|
| 12 |
+
from typing import Optional, List
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
from sqlalchemy.orm import Session
|
| 15 |
+
|
| 16 |
+
from app.core.supabase_client import get_user_client
|
| 17 |
+
from app.services.llm_service import llm_service
|
| 18 |
+
from app.services.data_loader import get_companies_df
|
| 19 |
+
from app.services.auth_service import get_current_user
|
| 20 |
+
from app.core.utils import clean_nans
|
| 21 |
+
|
| 22 |
+
import logging
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger("ba7ath.investigate")
|
| 25 |
+
|
| 26 |
+
router = APIRouter()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ── Pydantic Response Models ─────────────────────────────────────────────
|
| 30 |
+
|
| 31 |
+
class LLMAnalysis(BaseModel):
|
| 32 |
+
"""The structured output from the analysis engine."""
|
| 33 |
+
match_score: int = Field(0, ge=0, le=100, description="Score de correspondance (0-100)")
|
| 34 |
+
status: str = Field("Pending", description="Verified | Suspicious | Conflict | Pending")
|
| 35 |
+
findings: List[str] = Field(default_factory=list, description="النقاط المتطابقة")
|
| 36 |
+
red_flags: List[str] = Field(default_factory=list, description="التجاوزات المرصودة")
|
| 37 |
+
summary_ar: str = Field("", description="ملخص التحقيق بالعربية")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class InvestigationResult(BaseModel):
|
| 41 |
+
"""Full investigation response."""
|
| 42 |
+
company_id: str
|
| 43 |
+
company_name: str
|
| 44 |
+
wilaya: str
|
| 45 |
+
analysis: LLMAnalysis
|
| 46 |
+
sources_used: List[str] = Field(default_factory=list)
|
| 47 |
+
analyzed_at: str
|
| 48 |
+
model_used: str = "rule-based-engine"
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# ── Helper: Extract Ahlya data from CSV ──────────────────────────────────
|
| 52 |
+
|
| 53 |
+
# ── Helper: Extract Ahlya data from DB (Legacy named _get_ahlya_data) ──────────────────────────────────
|
| 54 |
+
|
| 55 |
+
async def _get_ahlya_data(company_id: str, company_name: str) -> Optional[dict]:
|
| 56 |
+
"""Find the company in the database via DataLoader."""
|
| 57 |
+
df = await get_companies_df()
|
| 58 |
+
if df is None or df.empty:
|
| 59 |
+
return None
|
| 60 |
+
|
| 61 |
+
# Match by 'id' if numeric
|
| 62 |
+
try:
|
| 63 |
+
numeric_id = int(company_id)
|
| 64 |
+
match = df[df["id"] == numeric_id]
|
| 65 |
+
if not match.empty:
|
| 66 |
+
return match.iloc[0].to_dict()
|
| 67 |
+
except ValueError:
|
| 68 |
+
pass
|
| 69 |
+
|
| 70 |
+
# Fallback to name matching (matching the logic in enriched_companies)
|
| 71 |
+
normalized_target = company_name.strip().upper()
|
| 72 |
+
# Note: df['name'] is already uppercase if normalized by SQL view, but safety first
|
| 73 |
+
match = df[df["name"].astype(str).str.strip().str.upper() == normalized_target]
|
| 74 |
+
if not match.empty:
|
| 75 |
+
return match.iloc[0].to_dict()
|
| 76 |
+
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# ── Main Endpoint ────────────────────────────────────────────────────────
|
| 81 |
+
|
| 82 |
+
@router.post(
|
| 83 |
+
"/{company_id}",
|
| 84 |
+
response_model=InvestigationResult,
|
| 85 |
+
summary="تحليل المقارنة المتقاطعة عبر محرك القواعد"
|
| 86 |
+
)
|
| 87 |
+
async def investigate_company(
|
| 88 |
+
company_id: str
|
| 89 |
+
):
|
| 90 |
+
"""
|
| 91 |
+
Cross-reference a company's data from Ahlya (DB), JORT (DB enrichment),
|
| 92 |
+
and RNE (DB enrichment) using a local Rule-Based comparison engine.
|
| 93 |
+
"""
|
| 94 |
+
logger.info(f"📋 Investigation request for company_id: {company_id}")
|
| 95 |
+
|
| 96 |
+
# ── 1. Retrieve enriched data from Supabase ────────────────────────────
|
| 97 |
+
client = get_user_client() # Public read
|
| 98 |
+
resp = client.table("enriched_companies").select("*").eq("company_id", company_id).execute()
|
| 99 |
+
|
| 100 |
+
# Note: If not in enriched_companies, we'll try to build it from basic tables
|
| 101 |
+
enriched = resp.data[0] if resp.data else {}
|
| 102 |
+
company_name = enriched.get("company_name", "")
|
| 103 |
+
wilaya = enriched.get("wilaya", "")
|
| 104 |
+
enrichment_data = enriched.get("data", {})
|
| 105 |
+
|
| 106 |
+
# ── 2. Retrieve Ahlya/Unified data from DB ──────────────────────────
|
| 107 |
+
ahlya_data = await _get_ahlya_data(company_id, company_name)
|
| 108 |
+
|
| 109 |
+
if not company_name and ahlya_data:
|
| 110 |
+
company_name = ahlya_data.get("name", "")
|
| 111 |
+
if not wilaya and ahlya_data:
|
| 112 |
+
wilaya = ahlya_data.get("wilaya", "")
|
| 113 |
+
|
| 114 |
+
# Extract JORT and RNE from enrichment data or from unified fetch
|
| 115 |
+
jort_data = enrichment_data.get("jort", {})
|
| 116 |
+
if not jort_data and ahlya_data:
|
| 117 |
+
# Fallback to direct JORT data from joined view
|
| 118 |
+
jort_data = {
|
| 119 |
+
"jort_ref": ahlya_data.get("jort_ref"),
|
| 120 |
+
"jort_date": ahlya_data.get("jort_date"),
|
| 121 |
+
"capital_social": ahlya_data.get("jort_capital")
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
rne_data = enrichment_data.get("rne", {})
|
| 125 |
+
if not rne_data and ahlya_data:
|
| 126 |
+
# Fallback to RNE data from joined view
|
| 127 |
+
rne_data = {
|
| 128 |
+
"rne_id": ahlya_data.get("rne_id"),
|
| 129 |
+
"tax_id": ahlya_data.get("rne_tax_id"),
|
| 130 |
+
"capital": ahlya_data.get("rne_capital")
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
# Track which sources were used
|
| 134 |
+
sources_used = []
|
| 135 |
+
if ahlya_data:
|
| 136 |
+
sources_used.append("أهلية (DB)")
|
| 137 |
+
if jort_data and (jort_data.get("jort_ref") or jort_data.get("announcements")):
|
| 138 |
+
sources_used.append("الرائد الرسمي (JORT)")
|
| 139 |
+
if rne_data and (rne_data.get("tax_id") or rne_data.get("capital_social") or rne_data.get("capital")):
|
| 140 |
+
sources_used.append("السجل الوطني (RNE)")
|
| 141 |
+
|
| 142 |
+
if not sources_used:
|
| 143 |
+
raise HTTPException(
|
| 144 |
+
status_code=404,
|
| 145 |
+
detail="لا توجد بيانات كافية لإجراء التحليل المتقاطع"
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# ── 3. Build the payload for the engine ───────────────────────────────
|
| 149 |
+
ahlya_payload = ahlya_data or {"company_name": company_name, "wilaya": wilaya}
|
| 150 |
+
jort_payload = jort_data
|
| 151 |
+
rne_payload = rne_data
|
| 152 |
+
|
| 153 |
+
# Apply clean_nans (Bouclier Moez Elbey)
|
| 154 |
+
ahlya_payload = clean_nans(ahlya_payload)
|
| 155 |
+
jort_payload = clean_nans(jort_payload)
|
| 156 |
+
rne_payload = clean_nans(rne_payload)
|
| 157 |
+
|
| 158 |
+
# ── 4. Call Local Engine Analysis ─────────────────────────────────────
|
| 159 |
+
logger.info(
|
| 160 |
+
f"🚀 Sending to Local Engine: company='{company_name}', "
|
| 161 |
+
f"sources={sources_used}"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
raw_analysis = await llm_service.analyze_cross_check(
|
| 165 |
+
ahlya_data=ahlya_payload,
|
| 166 |
+
jort_data=jort_payload,
|
| 167 |
+
rne_data=rne_payload,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# Parse into Pydantic model (validates schema)
|
| 171 |
+
analysis = LLMAnalysis(
|
| 172 |
+
match_score=raw_analysis.get("match_score", 0),
|
| 173 |
+
status=raw_analysis.get("status", "Pending"),
|
| 174 |
+
findings=raw_analysis.get("findings", []),
|
| 175 |
+
red_flags=raw_analysis.get("red_flags", []),
|
| 176 |
+
summary_ar=raw_analysis.get("summary_ar", ""),
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# ── 5. Build response ────────────────────────────────────────────────
|
| 180 |
+
return InvestigationResult(
|
| 181 |
+
company_id=company_id,
|
| 182 |
+
company_name=company_name or "Unknown",
|
| 183 |
+
wilaya=wilaya or "Unknown",
|
| 184 |
+
analysis=analysis,
|
| 185 |
+
sources_used=sources_used,
|
| 186 |
+
analyzed_at=datetime.utcnow().isoformat(),
|
| 187 |
+
model_used="rule-based-engine",
|
| 188 |
+
)
|
| 189 |
+
|
app/api/v1/risk.py
CHANGED
|
@@ -6,9 +6,9 @@ from app.models.schemas import WilayaRisk
|
|
| 6 |
router = APIRouter()
|
| 7 |
|
| 8 |
@router.get("/wilayas", response_model=List[WilayaRisk])
|
| 9 |
-
def list_risks():
|
| 10 |
-
return get_all_risks()
|
| 11 |
|
| 12 |
@router.get("/wilayas/{name}", response_model=WilayaRisk)
|
| 13 |
-
def read_risk(name: str):
|
| 14 |
-
return get_risk_for_wilaya(name)
|
|
|
|
| 6 |
router = APIRouter()
|
| 7 |
|
| 8 |
@router.get("/wilayas", response_model=List[WilayaRisk])
|
| 9 |
+
async def list_risks():
|
| 10 |
+
return await get_all_risks()
|
| 11 |
|
| 12 |
@router.get("/wilayas/{name}", response_model=WilayaRisk)
|
| 13 |
+
async def read_risk(name: str):
|
| 14 |
+
return await get_risk_for_wilaya(name)
|
app/api/v1/stats.py
CHANGED
|
@@ -5,9 +5,9 @@ from app.models.schemas import NationalStats, WilayaStats
|
|
| 5 |
router = APIRouter()
|
| 6 |
|
| 7 |
@router.get("/national", response_model=NationalStats)
|
| 8 |
-
def read_national_stats():
|
| 9 |
-
return get_national_stats()
|
| 10 |
|
| 11 |
@router.get("/wilayas/{name}", response_model=WilayaStats)
|
| 12 |
-
def read_wilaya_stats(name: str):
|
| 13 |
-
return get_wilaya_stats(name)
|
|
|
|
| 5 |
router = APIRouter()
|
| 6 |
|
| 7 |
@router.get("/national", response_model=NationalStats)
|
| 8 |
+
async def read_national_stats():
|
| 9 |
+
return await get_national_stats()
|
| 10 |
|
| 11 |
@router.get("/wilayas/{name}", response_model=WilayaStats)
|
| 12 |
+
async def read_wilaya_stats(name: str):
|
| 13 |
+
return await get_wilaya_stats(name)
|
app/core/utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import math
|
|
|
|
| 2 |
from typing import Any
|
| 3 |
|
| 4 |
def clean_nans(obj: Any) -> Any:
|
|
@@ -11,6 +12,8 @@ def clean_nans(obj: Any) -> Any:
|
|
| 11 |
if math.isnan(obj) or math.isinf(obj):
|
| 12 |
return None
|
| 13 |
return obj
|
|
|
|
|
|
|
| 14 |
elif isinstance(obj, dict):
|
| 15 |
return {k: clean_nans(v) for k, v in obj.items()}
|
| 16 |
elif isinstance(obj, list):
|
|
|
|
| 1 |
import math
|
| 2 |
+
from decimal import Decimal
|
| 3 |
from typing import Any
|
| 4 |
|
| 5 |
def clean_nans(obj: Any) -> Any:
|
|
|
|
| 12 |
if math.isnan(obj) or math.isinf(obj):
|
| 13 |
return None
|
| 14 |
return obj
|
| 15 |
+
elif isinstance(obj, Decimal):
|
| 16 |
+
return float(obj)
|
| 17 |
elif isinstance(obj, dict):
|
| 18 |
return {k: clean_nans(v) for k, v in obj.items()}
|
| 19 |
elif isinstance(obj, list):
|
app/services/aggregation.py
CHANGED
|
@@ -1,70 +1,72 @@
|
|
| 1 |
-
from app.services.data_loader import get_companies_df, get_stats_data
|
| 2 |
-
from app.models.schemas import NationalStats, WilayaStats
|
| 3 |
-
|
| 4 |
-
def _safe_value_counts(df, col, head=None):
|
| 5 |
-
"""Safely get value_counts for a column, returning {} if column doesn't exist."""
|
| 6 |
-
if col not in df.columns:
|
| 7 |
-
return {}
|
| 8 |
-
vc = df[col].dropna().value_counts()
|
| 9 |
-
if head:
|
| 10 |
-
vc = vc.head(head)
|
| 11 |
-
return vc.to_dict()
|
| 12 |
-
|
| 13 |
-
async def get_national_stats():
|
| 14 |
-
stats = get_stats_data()
|
| 15 |
-
df = await get_companies_df()
|
| 16 |
-
|
| 17 |
-
total = stats.get("total", 0)
|
| 18 |
-
wilayas = stats.get("wilayas", {})
|
| 19 |
-
types = stats.get("types", {})
|
| 20 |
-
|
| 21 |
-
if not df.empty:
|
| 22 |
-
top_groups = _safe_value_counts(df, 'activity_group')
|
| 23 |
-
top_activities = _safe_value_counts(df, 'activity_normalized', head=10)
|
| 24 |
-
else:
|
| 25 |
-
top_groups = {}
|
| 26 |
-
top_activities = {}
|
| 27 |
-
|
| 28 |
-
return NationalStats(
|
| 29 |
-
total=total,
|
| 30 |
-
wilayas=wilayas,
|
| 31 |
-
types=types,
|
| 32 |
-
top_activities=top_activities,
|
| 33 |
-
top_groups=top_groups
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
-
async def get_wilaya_stats(wilaya: str):
|
| 37 |
-
df = await get_companies_df()
|
| 38 |
-
stats = get_stats_data()
|
| 39 |
-
|
| 40 |
-
if df.empty:
|
| 41 |
-
return None
|
| 42 |
-
|
| 43 |
-
wilaya_df = df[df['wilaya'] == wilaya]
|
| 44 |
-
count = len(wilaya_df)
|
| 45 |
-
|
| 46 |
-
total = stats.get("total", 1)
|
| 47 |
-
pct = round((count / total) * 100, 1)
|
| 48 |
-
|
| 49 |
-
# Rank
|
| 50 |
-
sorted_wilayas = sorted(stats.get("wilayas", {}).items(), key=lambda x: x[1], reverse=True)
|
| 51 |
-
rank = next((i for i, (w, c) in enumerate(sorted_wilayas, 1) if w == wilaya), 0)
|
| 52 |
-
|
| 53 |
-
if not wilaya_df.empty:
|
| 54 |
-
top_groups = _safe_value_counts(wilaya_df, 'activity_group')
|
| 55 |
-
top_activities = _safe_value_counts(wilaya_df, 'activity_normalized', head=10)
|
| 56 |
-
types = _safe_value_counts(wilaya_df, 'type')
|
| 57 |
-
else:
|
| 58 |
-
top_groups = {}
|
| 59 |
-
top_activities = {}
|
| 60 |
-
types = {}
|
| 61 |
-
|
| 62 |
-
return WilayaStats(
|
| 63 |
-
wilaya=wilaya,
|
| 64 |
-
count=count,
|
| 65 |
-
pct_national=pct,
|
| 66 |
-
rank=rank,
|
| 67 |
-
types=types,
|
| 68 |
-
top_groups=top_groups,
|
| 69 |
-
top_activities=top_activities
|
| 70 |
-
)
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.services.data_loader import get_companies_df, get_stats_data
|
| 2 |
+
from app.models.schemas import NationalStats, WilayaStats
|
| 3 |
+
|
| 4 |
+
def _safe_value_counts(df, col, head=None):
|
| 5 |
+
"""Safely get value_counts for a column, returning {} if column doesn't exist."""
|
| 6 |
+
if col not in df.columns:
|
| 7 |
+
return {}
|
| 8 |
+
vc = df[col].dropna().value_counts()
|
| 9 |
+
if head:
|
| 10 |
+
vc = vc.head(head)
|
| 11 |
+
return vc.to_dict()
|
| 12 |
+
|
| 13 |
+
async def get_national_stats():
|
| 14 |
+
stats = get_stats_data()
|
| 15 |
+
df = await get_companies_df()
|
| 16 |
+
|
| 17 |
+
total = stats.get("total", 0)
|
| 18 |
+
wilayas = stats.get("wilayas", {})
|
| 19 |
+
types = stats.get("types", {})
|
| 20 |
+
|
| 21 |
+
if not df.empty:
|
| 22 |
+
top_groups = _safe_value_counts(df, 'activity_group')
|
| 23 |
+
top_activities = _safe_value_counts(df, 'activity_normalized', head=10)
|
| 24 |
+
else:
|
| 25 |
+
top_groups = {}
|
| 26 |
+
top_activities = {}
|
| 27 |
+
|
| 28 |
+
return NationalStats(
|
| 29 |
+
total=total,
|
| 30 |
+
wilayas=wilayas,
|
| 31 |
+
types=types,
|
| 32 |
+
top_activities=top_activities,
|
| 33 |
+
top_groups=top_groups
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
async def get_wilaya_stats(wilaya: str):
|
| 37 |
+
df = await get_companies_df()
|
| 38 |
+
stats = get_stats_data()
|
| 39 |
+
|
| 40 |
+
if df.empty:
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
wilaya_df = df[df['wilaya'] == wilaya]
|
| 44 |
+
count = len(wilaya_df)
|
| 45 |
+
|
| 46 |
+
total = stats.get("total", 1)
|
| 47 |
+
pct = round((count / total) * 100, 1)
|
| 48 |
+
|
| 49 |
+
# Rank
|
| 50 |
+
sorted_wilayas = sorted(stats.get("wilayas", {}).items(), key=lambda x: x[1], reverse=True)
|
| 51 |
+
rank = next((i for i, (w, c) in enumerate(sorted_wilayas, 1) if w == wilaya), 0)
|
| 52 |
+
|
| 53 |
+
if not wilaya_df.empty:
|
| 54 |
+
top_groups = _safe_value_counts(wilaya_df, 'activity_group')
|
| 55 |
+
top_activities = _safe_value_counts(wilaya_df, 'activity_normalized', head=10)
|
| 56 |
+
types = _safe_value_counts(wilaya_df, 'type')
|
| 57 |
+
else:
|
| 58 |
+
top_groups = {}
|
| 59 |
+
top_activities = {}
|
| 60 |
+
types = {}
|
| 61 |
+
|
| 62 |
+
return WilayaStats(
|
| 63 |
+
wilaya=wilaya,
|
| 64 |
+
count=count,
|
| 65 |
+
pct_national=pct,
|
| 66 |
+
rank=rank,
|
| 67 |
+
types=types,
|
| 68 |
+
top_groups=top_groups,
|
| 69 |
+
top_activities=top_activities
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
app/services/osint_links.py
CHANGED
|
@@ -1,32 +1,33 @@
|
|
| 1 |
-
import urllib.parse
|
| 2 |
-
import os
|
| 3 |
-
from dotenv import load_dotenv
|
| 4 |
-
|
| 5 |
-
load_dotenv()
|
| 6 |
-
|
| 7 |
-
INTERNAL_OSINT_MODE = os.getenv("INTERNAL_OSINT_MODE", "False").lower() == "true"
|
| 8 |
-
|
| 9 |
-
def generate_links(company_name: str, wilaya: str):
|
| 10 |
-
base_name = urllib.parse.quote(company_name)
|
| 11 |
-
|
| 12 |
-
links = {
|
| 13 |
-
"Google": f"https://www.google.com/search?q={base_name} {wilaya} site:tn",
|
| 14 |
-
"Facebook": f"https://www.facebook.com/search/top?q={base_name}"
|
| 15 |
-
}
|
| 16 |
-
|
| 17 |
-
if INTERNAL_OSINT_MODE:
|
| 18 |
-
links["RNE"] = f"https://www.registre-entreprises.tn/search?q={base_name}" # Placeholder
|
| 19 |
-
links["JORT"] = f"http://www.iort.gov.tn/search?q={base_name}" # Placeholder
|
| 20 |
-
|
| 21 |
-
return links
|
| 22 |
-
|
| 23 |
-
async def get_company_links(company_id: int):
|
| 24 |
-
from app.services.data_loader import get_companies_df
|
| 25 |
-
df = await get_companies_df()
|
| 26 |
-
|
| 27 |
-
company = df[df['id'] == company_id]
|
| 28 |
-
if company.empty:
|
| 29 |
-
return {}
|
| 30 |
-
|
| 31 |
-
row = company.iloc[0]
|
| 32 |
-
return generate_links(row['name'], row['wilaya'])
|
|
|
|
|
|
| 1 |
+
import urllib.parse
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
INTERNAL_OSINT_MODE = os.getenv("INTERNAL_OSINT_MODE", "False").lower() == "true"
|
| 8 |
+
|
| 9 |
+
def generate_links(company_name: str, wilaya: str):
|
| 10 |
+
base_name = urllib.parse.quote(company_name)
|
| 11 |
+
|
| 12 |
+
links = {
|
| 13 |
+
"Google": f"https://www.google.com/search?q={base_name} {wilaya} site:tn",
|
| 14 |
+
"Facebook": f"https://www.facebook.com/search/top?q={base_name}"
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
if INTERNAL_OSINT_MODE:
|
| 18 |
+
links["RNE"] = f"https://www.registre-entreprises.tn/search?q={base_name}" # Placeholder
|
| 19 |
+
links["JORT"] = f"http://www.iort.gov.tn/search?q={base_name}" # Placeholder
|
| 20 |
+
|
| 21 |
+
return links
|
| 22 |
+
|
| 23 |
+
async def get_company_links(company_id: int):
|
| 24 |
+
from app.services.data_loader import get_companies_df
|
| 25 |
+
df = await get_companies_df()
|
| 26 |
+
|
| 27 |
+
company = df[df['id'] == company_id]
|
| 28 |
+
if company.empty:
|
| 29 |
+
return {}
|
| 30 |
+
|
| 31 |
+
row = company.iloc[0]
|
| 32 |
+
return generate_links(row['name'], row['wilaya'])
|
| 33 |
+
|
app/services/risk_engine.py
CHANGED
|
@@ -1,150 +1,174 @@
|
|
| 1 |
-
from app.services.data_loader import get_companies_df
|
| 2 |
-
from app.models.schemas import WilayaRisk, Flag
|
| 3 |
-
import numpy as np
|
| 4 |
-
|
| 5 |
-
def generate_risk_commentary(wilaya_data: dict, risk_scores: dict) -> dict:
|
| 6 |
-
"""
|
| 7 |
-
Génère des commentaires éditoriaux en arabe basés sur les scores de risque.
|
| 8 |
-
"""
|
| 9 |
-
s1, s2, s3 = risk_scores['s1'], risk_scores['s2'], risk_scores['s3']
|
| 10 |
-
index = risk_scores['baath_index']
|
| 11 |
-
|
| 12 |
-
# Defaults
|
| 13 |
-
level = "LOW"
|
| 14 |
-
level_ar = "منخفض"
|
| 15 |
-
color = "emerald"
|
| 16 |
-
|
| 17 |
-
if index >= 70:
|
| 18 |
-
level = "HIGH"
|
| 19 |
-
level_ar = "مرتفع"
|
| 20 |
-
color = "red"
|
| 21 |
-
elif index >= 40:
|
| 22 |
-
level = "MEDIUM"
|
| 23 |
-
level_ar = "متوسط"
|
| 24 |
-
color = "amber"
|
| 25 |
-
|
| 26 |
-
comments = []
|
| 27 |
-
|
| 28 |
-
# S1 - Dépendance
|
| 29 |
-
if s1 > 0.6:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
recommendations.append("ال
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
-
"
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
#
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.services.data_loader import get_companies_df
|
| 2 |
+
from app.models.schemas import WilayaRisk, Flag
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
def generate_risk_commentary(wilaya_data: dict, risk_scores: dict) -> dict:
|
| 6 |
+
"""
|
| 7 |
+
Génère des commentaires éditoriaux en arabe basés sur les scores de risque.
|
| 8 |
+
"""
|
| 9 |
+
s1, s2, s3 = risk_scores['s1'], risk_scores['s2'], risk_scores['s3']
|
| 10 |
+
index = risk_scores['baath_index']
|
| 11 |
+
|
| 12 |
+
# Defaults
|
| 13 |
+
level = "LOW"
|
| 14 |
+
level_ar = "منخفض"
|
| 15 |
+
color = "emerald"
|
| 16 |
+
|
| 17 |
+
if index >= 70:
|
| 18 |
+
level = "HIGH"
|
| 19 |
+
level_ar = "مرتفع"
|
| 20 |
+
color = "red"
|
| 21 |
+
elif index >= 40:
|
| 22 |
+
level = "MEDIUM"
|
| 23 |
+
level_ar = "متوسط"
|
| 24 |
+
color = "amber"
|
| 25 |
+
|
| 26 |
+
comments = []
|
| 27 |
+
|
| 28 |
+
# S1 - Dépendance
|
| 29 |
+
if s1 > 0.6: # lowered threshold slightly to match prompt logic 0.7 or 0.6 inconsistency
|
| 30 |
+
# Prompt said > 0.7 but code example used 0.7. Let's stick to prompt code example logic if possible but use safe checks.
|
| 31 |
+
dominant_groups = [g for g, count in wilaya_data['groups'].items()
|
| 32 |
+
if g in ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
|
| 33 |
+
and count / (sum(wilaya_data['groups'].values()) or 1) > 0.3]
|
| 34 |
+
if dominant_groups:
|
| 35 |
+
comments.append(f"الولاية تعتمد بشكل كبير على الأنشطة المرتبطة بالموارد العمومية ({', '.join(dominant_groups)})")
|
| 36 |
+
|
| 37 |
+
# S2 - Concentration
|
| 38 |
+
if s2 > 0.7:
|
| 39 |
+
if wilaya_data['groups']:
|
| 40 |
+
top_group = max(wilaya_data['groups'].items(), key=lambda x: x[1])[0]
|
| 41 |
+
pct = (wilaya_data['groups'][top_group] / (sum(wilaya_data['groups'].values()) or 1)) * 100
|
| 42 |
+
comments.append(f"تركيز عالٍ جدا في مجموعة نشاط واحدة ({top_group}: {pct:.0f}%)")
|
| 43 |
+
elif s2 > 0.5:
|
| 44 |
+
comments.append("تركيز ملحوظ في عدد محدود من القطاعات")
|
| 45 |
+
|
| 46 |
+
# S3 - Gouvernance
|
| 47 |
+
if s3 > 0.5: # Prompt threshold was 0.6 in general description but 0.5 in code example for flag.
|
| 48 |
+
total_types = sum(wilaya_data['types'].values()) or 1
|
| 49 |
+
local_pct = (wilaya_data['types'].get('محلية', 0) / total_types) * 100
|
| 50 |
+
regional_pct = (wilaya_data['types'].get('جهوية', 0) / total_types) * 100
|
| 51 |
+
comments.append(f"اختلال واضح في الحوكمة: {local_pct:.0f}% محلية مقابل {regional_pct:.0f}% جهوية")
|
| 52 |
+
|
| 53 |
+
# Recommendations
|
| 54 |
+
recommendations = []
|
| 55 |
+
if s1 > 0.6:
|
| 56 |
+
recommendations.append("التحقق من الأراضي الدولية المُسندة (OTD)")
|
| 57 |
+
recommendations.append("البحث في صفقات التطهير والبيئة (TUNEPS)")
|
| 58 |
+
if s2 > 0.7:
|
| 59 |
+
recommendations.append("تحليل الاحتكارات القطاعية المحتملة")
|
| 60 |
+
if s3 > 0.5:
|
| 61 |
+
recommendations.append("مراجعة التوازن بين المحلي والجهوي في تركيبة مجالس الإدارة")
|
| 62 |
+
if index > 70:
|
| 63 |
+
recommendations.append("يُنصح بتحقيق صحفي معمق على هذه الولاية")
|
| 64 |
+
|
| 65 |
+
return {
|
| 66 |
+
"level": level,
|
| 67 |
+
"level_ar": level_ar,
|
| 68 |
+
"color": color,
|
| 69 |
+
"comment_ar": " · ".join(comments) if comments else "لا توجد إشارات خطر واضحة في البيانات الحالية",
|
| 70 |
+
"recommendations": recommendations
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
def compute_baath_index_v2(wilaya_df):
|
| 74 |
+
"""
|
| 75 |
+
Computes Ba7ath Index (0-100) using continuous formula:
|
| 76 |
+
INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
|
| 77 |
+
|
| 78 |
+
s1: Dependency on public-resource sectors (AGRI, ENV, MINES)
|
| 79 |
+
s2: Sector concentration (Max share of any group)
|
| 80 |
+
s3: Governance imbalance (abs(local - regional))
|
| 81 |
+
"""
|
| 82 |
+
if wilaya_df.empty:
|
| 83 |
+
return 0.0, 0.0, 0.0, 0.0, []
|
| 84 |
+
|
| 85 |
+
total = len(wilaya_df)
|
| 86 |
+
flags = []
|
| 87 |
+
|
| 88 |
+
# --- s1: Resource Dependency ---
|
| 89 |
+
# Groups: AGRI_NATUREL, ENVIRONNEMENT, ENERGIE_MINES
|
| 90 |
+
resource_groups = ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
|
| 91 |
+
resource_count = wilaya_df[wilaya_df['activity_group'].isin(resource_groups)].shape[0]
|
| 92 |
+
s1 = resource_count / total if total > 0 else 0.0
|
| 93 |
+
|
| 94 |
+
if s1 > 0.6:
|
| 95 |
+
flags.append(Flag(code="RESOURCE_DEPENDENT", severity="high", label_ar="اعتماد كبير على الأنشطة المرتبطة بالموارد العمومية"))
|
| 96 |
+
|
| 97 |
+
# --- s2: Sector Concentration ---
|
| 98 |
+
# Max share of any single group
|
| 99 |
+
group_counts = wilaya_df['activity_group'].value_counts(normalize=True)
|
| 100 |
+
s2 = group_counts.max() if not group_counts.empty else 0.0
|
| 101 |
+
|
| 102 |
+
if s2 > 0.7:
|
| 103 |
+
flags.append(Flag(code="ULTRA_CONCENTRATION", severity="medium", label_ar="تركيز عالٍ في مجموعة نشاط واحدة"))
|
| 104 |
+
|
| 105 |
+
# --- s3: Governance Imbalance ---
|
| 106 |
+
# abs(% local - % regional)
|
| 107 |
+
type_counts = wilaya_df['type'].value_counts(normalize=True)
|
| 108 |
+
pct_local = type_counts.get('محلية', 0.0)
|
| 109 |
+
pct_regional = type_counts.get('جهوية', 0.0)
|
| 110 |
+
s3 = abs(pct_local - pct_regional)
|
| 111 |
+
|
| 112 |
+
if s3 > 0.5:
|
| 113 |
+
flags.append(Flag(code="GOVERNANCE_IMBALANCE", severity="low", label_ar="اختلال واضح بين الشركات المحلية والجهوية"))
|
| 114 |
+
|
| 115 |
+
# --- Final Score ---
|
| 116 |
+
# INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
|
| 117 |
+
raw_index = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
|
| 118 |
+
baath_index = round(min(raw_index, 100), 1)
|
| 119 |
+
|
| 120 |
+
# Return details for commentary
|
| 121 |
+
details = {
|
| 122 |
+
'groups': wilaya_df['activity_group'].value_counts().to_dict(),
|
| 123 |
+
'types': wilaya_df['type'].value_counts().to_dict()
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
return baath_index, round(s1, 2), round(s2, 2), round(s3, 2), flags, details
|
| 127 |
+
|
| 128 |
+
async def get_risk_for_wilaya(wilaya: str):
|
| 129 |
+
df = await get_companies_df()
|
| 130 |
+
if df.empty:
|
| 131 |
+
return None
|
| 132 |
+
|
| 133 |
+
wilaya_df = df[df['wilaya'] == wilaya]
|
| 134 |
+
if wilaya_df.empty:
|
| 135 |
+
# Return neutral risk if no companies
|
| 136 |
+
return WilayaRisk(
|
| 137 |
+
wilaya=wilaya, baath_index=0, s1=0, s2=0, s3=0, flags=[],
|
| 138 |
+
level="LOW", level_ar="منخفض", color="emerald",
|
| 139 |
+
comment_ar="لا توجد بيانات كافية", recommendations=[]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
score, s1, s2, s3, flags, details = compute_baath_index_v2(wilaya_df)
|
| 143 |
+
|
| 144 |
+
# Generate commentary
|
| 145 |
+
editorial = generate_risk_commentary(details, {
|
| 146 |
+
's1': s1, 's2': s2, 's3': s3, 'baath_index': score
|
| 147 |
+
})
|
| 148 |
+
|
| 149 |
+
return WilayaRisk(
|
| 150 |
+
wilaya=wilaya,
|
| 151 |
+
baath_index=score,
|
| 152 |
+
s1=s1,
|
| 153 |
+
s2=s2,
|
| 154 |
+
s3=s3,
|
| 155 |
+
flags=flags,
|
| 156 |
+
**editorial
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
async def get_all_risks():
|
| 160 |
+
df = await get_companies_df()
|
| 161 |
+
if df.empty:
|
| 162 |
+
return []
|
| 163 |
+
|
| 164 |
+
risks = []
|
| 165 |
+
# Collect unique wilayas
|
| 166 |
+
wilayas = df['wilaya'].unique()
|
| 167 |
+
|
| 168 |
+
for wilaya in wilayas:
|
| 169 |
+
risk = await get_risk_for_wilaya(wilaya)
|
| 170 |
+
if risk:
|
| 171 |
+
risks.append(risk)
|
| 172 |
+
|
| 173 |
+
return sorted(risks, key=lambda x: x.baath_index, reverse=True)
|
| 174 |
+
|