Ba7ath-Project commited on
Commit
7f18aa9
·
0 Parent(s):

Déploiement HF sans base de données, pour de vrai

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +50 -0
  2. README.md +70 -0
  3. backend/.gitignore +18 -0
  4. backend/.python-version +1 -0
  5. backend/ahlya_vs_trovit_fuzzy.py +181 -0
  6. backend/app.py +8 -0
  7. backend/app/api/enrichment.py +529 -0
  8. backend/app/api/v1/auth.py +88 -0
  9. backend/app/api/v1/companies.py +46 -0
  10. backend/app/api/v1/investigate.py +181 -0
  11. backend/app/api/v1/meta.py +28 -0
  12. backend/app/api/v1/risk.py +14 -0
  13. backend/app/api/v1/stats.py +13 -0
  14. backend/app/data/companies.json +0 -0
  15. backend/app/data/stats.json +45 -0
  16. backend/app/database.py +24 -0
  17. backend/app/main.py +91 -0
  18. backend/app/models/enrichment_models.py +77 -0
  19. backend/app/models/schemas.py +74 -0
  20. backend/app/models/user_models.py +12 -0
  21. backend/app/schemas/auth_schemas.py +28 -0
  22. backend/app/services/aggregation.py +71 -0
  23. backend/app/services/auth_service.py +74 -0
  24. backend/app/services/data_loader.py +216 -0
  25. backend/app/services/llm_service.py +201 -0
  26. backend/app/services/osint_links.py +32 -0
  27. backend/app/services/risk_engine.py +168 -0
  28. backend/compare_by_name_fuzzy.py +162 -0
  29. backend/compare_data.py +90 -0
  30. backend/compare_names_with_qwen.py +185 -0
  31. backend/create_admin.py +44 -0
  32. backend/enrich_not_in_trovit.py +71 -0
  33. backend/inspect_db.py +46 -0
  34. backend/readme.md +12 -0
  35. backend/test_auth_flow.py +52 -0
  36. docs/API_Reference.md +103 -0
  37. docs/Authentication_Guide.md +58 -0
  38. docs/Contributing_Guide.md +40 -0
  39. docs/Database_Schema.md +81 -0
  40. docs/Deployment_Guide.md +41 -0
  41. docs/Development_Guide.md +78 -0
  42. docs/Frontend_Architecture.md +59 -0
  43. docs/OSINT_Methodology.md +42 -0
  44. docs/README.md +104 -0
  45. docs/Troubleshooting.md +49 -0
  46. index.html +34 -0
  47. package-lock.json +0 -0
  48. package.json +54 -0
  49. postcss.config.js +6 -0
  50. project_tree.py +16 -0
.gitignore ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
+
3
+ # dependencies
4
+ /node_modules
5
+ /.pnp
6
+ .pnp.js
7
+
8
+ # testing
9
+ /coverage
10
+
11
+ # production
12
+ /build
13
+
14
+ # misc
15
+ .DS_Store
16
+ .env.local
17
+ .env.development.local
18
+ .env.test.local
19
+ .env.production.local
20
+ npm-debug.log*
21
+ yarn-debug.log*
22
+ yarn-error.log*
23
+
24
+ # --- Python backend / Ba7ath ---
25
+
26
+ # Environnements virtuels
27
+ venv/
28
+ .env/
29
+ .env.*
30
+ .env.*
31
+ .env
32
+
33
+ # Bytecode / cache
34
+ __pycache__/
35
+ *.py[cod]
36
+ *.pyo
37
+ *.pyd
38
+
39
+ # Bases et données locales
40
+ # *.db
41
+ *.sqlite3
42
+ instance/
43
+
44
+ # Logs
45
+ *.log
46
+ logs/
47
+
48
+ .vercel
49
+ backend/.env
50
+ backend/bulk_test.py
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Getting Started with Create React App
2
+
3
+ This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
4
+
5
+ ## Available Scripts
6
+
7
+ In the project directory, you can run:
8
+
9
+ ### `npm start`
10
+
11
+ Runs the app in the development mode.\
12
+ Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
13
+
14
+ The page will reload when you make changes.\
15
+ You may also see any lint errors in the console.
16
+
17
+ ### `npm test`
18
+
19
+ Launches the test runner in the interactive watch mode.\
20
+ See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
21
+
22
+ ### `npm run build`
23
+
24
+ Builds the app for production to the `build` folder.\
25
+ It correctly bundles React in production mode and optimizes the build for the best performance.
26
+
27
+ The build is minified and the filenames include the hashes.\
28
+ Your app is ready to be deployed!
29
+
30
+ See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
31
+
32
+ ### `npm run eject`
33
+
34
+ **Note: this is a one-way operation. Once you `eject`, you can't go back!**
35
+
36
+ If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
37
+
38
+ Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
39
+
40
+ You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
41
+
42
+ ## Learn More
43
+
44
+ You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
45
+
46
+ To learn React, check out the [React documentation](https://reactjs.org/).
47
+
48
+ ### Code Splitting
49
+
50
+ This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
51
+
52
+ ### Analyzing the Bundle Size
53
+
54
+ This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
55
+
56
+ ### Making a Progressive Web App
57
+
58
+ This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
59
+
60
+ ### Advanced Configuration
61
+
62
+ This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
63
+
64
+ ### Deployment
65
+
66
+ This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
67
+
68
+ ### `npm run build` fails to minify
69
+
70
+ This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
backend/.gitignore ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ # Ignorer tout le dossier des scripts sensibles
4
+ app/scripts/
5
+ force_admin.py
6
+
7
+
8
+ # Ignorer systématiquement les bases de données (Excel et CSV)
9
+ *.xlsx
10
+ *.csv
11
+
12
+ # Ignorer les journaux de progression et fichiers temporaires
13
+ ba7ath_progress.txt
14
+ *.log
15
+ *.txt
16
+ .env
17
+ .env.*
18
+ ../.env*.db
backend/.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11.13
backend/ahlya_vs_trovit_fuzzy.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ from rapidfuzz import process, fuzz
6
+
7
+ # -------- CONFIG --------
8
+
9
+ CSV_AHLYA = Path("Ahlya_Total_Feuil1.csv")
10
+ CSV_TROVIT = Path("trovit_charikat_ahliya_all.csv")
11
+
12
+ # Noms de colonnes
13
+ COL_NAME_AHLYA = "اسم_الشركة"
14
+ COL_NAME_TROVIT = "name"
15
+
16
+ # Seuils de décision
17
+ # score >= MATCH_THRESHOLD => match strict
18
+ # MAYBE_THRESHOLD <= score < MATCH_THRESHOLD => à vérifier
19
+ MATCH_THRESHOLD = 95
20
+ MAYBE_THRESHOLD = 85
21
+
22
+ # Fichiers de sortie
23
+ OUT_ALL = Path("ahlya_vs_trovit_fuzzy_all.csv")
24
+ OUT_NON_MATCH = Path("ahlya_not_in_trovit_fuzzy.csv")
25
+ OUT_MATCHES_STRICT = Path("ahlya_matches_stricts.csv")
26
+ OUT_MAYBE = Path("ahlya_a_verifier.csv")
27
+
28
+ # Encodage CSV
29
+ ENCODING = "utf-8-sig"
30
+
31
+ # ------------------------
32
+
33
+
34
+ def normalize_name(s: str) -> str:
35
+ """Normalisation agressive pour comparer des noms arabes proches."""
36
+ if pd.isna(s):
37
+ return ""
38
+ s = str(s).strip()
39
+
40
+ # Unifier quelques lettres arabes fréquentes
41
+ s = s.replace("أ", "ا").replace("إ", "ا").replace("آ", "ا")
42
+ s = s.replace("ى", "ي").replace("ئ", "ي").replace("ؤ", "و")
43
+ s = s.replace("ة", "ه")
44
+
45
+ # Supprimer mots génériques
46
+ generic = [
47
+ "شركة", "الشركة",
48
+ "الاهلية", "الأهلية", "الاهليه",
49
+ "المحلية", "المحليه",
50
+ "الجهوية", "الجهويه",
51
+ ]
52
+ for g in generic:
53
+ s = s.replace(g, "")
54
+
55
+ # Supprimer ponctuation simple et normaliser les espaces
56
+ s = re.sub(r"[^\w\s]", " ", s)
57
+ s = " ".join(s.split())
58
+ return s
59
+
60
+
61
+ def main():
62
+ if not CSV_AHLYA.exists():
63
+ raise FileNotFoundError(CSV_AHLYA.resolve())
64
+ if not CSV_TROVIT.exists():
65
+ raise FileNotFoundError(CSV_TROVIT.resolve())
66
+
67
+ # 1. Charger les deux fichiers
68
+ df_ahlya = pd.read_csv(CSV_AHLYA, encoding=ENCODING)
69
+ df_trovit = pd.read_csv(CSV_TROVIT, encoding=ENCODING)
70
+
71
+ if COL_NAME_AHLYA not in df_ahlya.columns:
72
+ raise KeyError(
73
+ f"Colonne '{COL_NAME_AHLYA}' absente dans {CSV_AHLYA.name} : "
74
+ f"{list(df_ahlya.columns)}"
75
+ )
76
+ if COL_NAME_TROVIT not in df_trovit.columns:
77
+ raise KeyError(
78
+ f"Colonne '{COL_NAME_TROVIT}' absente dans {CSV_TROVIT.name} : "
79
+ f"{list(df_trovit.columns)}"
80
+ )
81
+
82
+ # 2. Créer des versions normalisées des noms
83
+ df_ahlya["__name_norm__"] = df_ahlya[COL_NAME_AHLYA].apply(normalize_name)
84
+ df_trovit["__name_norm__"] = df_trovit[COL_NAME_TROVIT].apply(normalize_name)
85
+
86
+ # Liste des noms trovit pour RapidFuzz
87
+ trovit_names = df_trovit["__name_norm__"].tolist()
88
+
89
+ best_scores = []
90
+ best_indexes = []
91
+
92
+ # 3. Pour chaque société Ahlya, chercher le meilleur match dans Trovit
93
+ for _, row in df_ahlya.iterrows():
94
+ name_a = row["__name_norm__"]
95
+
96
+ if not name_a:
97
+ best_scores.append(0)
98
+ best_indexes.append(None)
99
+ continue
100
+
101
+ match = process.extractOne(
102
+ name_a,
103
+ trovit_names,
104
+ scorer=fuzz.token_sort_ratio,
105
+ )
106
+
107
+ if match is None:
108
+ best_scores.append(0)
109
+ best_indexes.append(None)
110
+ else:
111
+ _, score, idx = match
112
+ best_scores.append(score)
113
+ best_indexes.append(idx)
114
+
115
+ df_ahlya["match_score"] = best_scores
116
+ df_ahlya["trovit_index"] = best_indexes
117
+ df_ahlya["has_candidate"] = df_ahlya["trovit_index"].notna()
118
+
119
+ # 4. Ajouter quelques colonnes Trovit pour contexte (nom, wilaya, delegation, ids…)
120
+ def extract_from_trovit(idx, col):
121
+ if pd.isna(idx):
122
+ return None
123
+ idx = int(idx)
124
+ if 0 <= idx < len(df_trovit):
125
+ return df_trovit.iloc[idx].get(col)
126
+ return None
127
+
128
+ trovit_cols_to_add = [
129
+ COL_NAME_TROVIT,
130
+ "charika_id",
131
+ "tax_id",
132
+ "wilaya",
133
+ "delegation",
134
+ "capital",
135
+ "legal_form",
136
+ "detail_url",
137
+ ]
138
+
139
+ for col in trovit_cols_to_add:
140
+ new_col = f"trovit_{col}"
141
+ if col in df_trovit.columns:
142
+ df_ahlya[new_col] = df_ahlya["trovit_index"].apply(
143
+ lambda i: extract_from_trovit(i, col)
144
+ )
145
+ else:
146
+ df_ahlya[new_col] = None
147
+
148
+ # 5. Marquer les catégories
149
+ df_ahlya["matched_strict"] = df_ahlya["match_score"] >= MATCH_THRESHOLD
150
+ df_ahlya["matched_maybe"] = (
151
+ (df_ahlya["match_score"] >= MAYBE_THRESHOLD)
152
+ & (df_ahlya["match_score"] < MATCH_THRESHOLD)
153
+ )
154
+
155
+ # 6. Sauvegarder toutes les lignes avec info de match
156
+ df_ahlya.to_csv(OUT_ALL, index=False, encoding=ENCODING)
157
+
158
+ # 7. Fichiers dérivés
159
+ df_matches = df_ahlya[df_ahlya["matched_strict"]].copy()
160
+ df_maybe = df_ahlya[df_ahlya["matched_maybe"]].copy()
161
+ df_non_match = df_ahlya[~(df_ahlya["matched_strict"] | df_ahlya["matched_maybe"])].copy()
162
+
163
+ df_matches.to_csv(OUT_MATCHES_STRICT, index=False, encoding=ENCODING)
164
+ df_maybe.to_csv(OUT_MAYBE, index=False, encoding=ENCODING)
165
+ df_non_match.to_csv(OUT_NON_MATCH, index=False, encoding=ENCODING)
166
+
167
+ print(f"[INFO] Lignes Ahlya : {len(df_ahlya)}")
168
+ print(f"[INFO] Matchs stricts (score >= {MATCH_THRESHOLD}) : {len(df_matches)}")
169
+ print(
170
+ f"[INFO] À vérifier ({MAYBE_THRESHOLD} <= score < {MATCH_THRESHOLD}) : "
171
+ f"{len(df_maybe)}"
172
+ )
173
+ print(f"[INFO] Non-concordances (score < {MAYBE_THRESHOLD}) : {len(df_non_match)}")
174
+ print(f"[OK] Fichier complet : {OUT_ALL.resolve()}")
175
+ print(f"[OK] Matchs stricts : {OUT_MATCHES_STRICT.resolve()}")
176
+ print(f"[OK] À vérifier : {OUT_MAYBE.resolve()}")
177
+ print(f"[OK] Non-concordances : {OUT_NON_MATCH.resolve()}")
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()
backend/app.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+
3
+ # On importe ton instance FastAPI ('app') depuis ton fichier app/main.py
4
+ from app.main import app
5
+
6
+ # Hugging Face va exécuter ce fichier, qui lancera ton API sur le bon port
7
+ if __name__ == "__main__":
8
+ uvicorn.run(app, host="0.0.0.0", port=7860)
backend/app/api/enrichment.py ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Depends
2
+ from typing import List, Optional
3
+ from pydantic import BaseModel, Field
4
+ from datetime import datetime
5
+ from sqlalchemy.orm import Session
6
+ import uuid
7
+
8
+ from app.database import get_db
9
+ from app.models.enrichment_models import (
10
+ EnrichedCompany as EnrichedCompanyDB,
11
+ InvestigationNote as InvestigationNoteDB
12
+ )
13
+
14
+ router = APIRouter()
15
+
16
+ # --- Pydantic Models (Request/Response shapes) ---
17
+
18
+ class Shareholder(BaseModel):
19
+ name: str
20
+ percentage: float
21
+ role: str
22
+
23
+ class RneData(BaseModel):
24
+ # Existing fields
25
+ capital_social: float = 0.0
26
+ legal_form: Optional[str] = None # Made optional for CSV import compatibility
27
+ registration_number: Optional[str] = ""
28
+ registration_date: Optional[str] = ""
29
+ address: Optional[str] = None
30
+ shareholders: List[Shareholder] = []
31
+
32
+ # Trovit CSV fields (1:1 mapping)
33
+ charika_type: Optional[str] = None
34
+ charika_id: Optional[str] = None
35
+ name: Optional[str] = None
36
+ delegation: Optional[str] = None
37
+ zipcode_list: Optional[str] = None
38
+ start_date_raw: Optional[str] = None
39
+ capital: Optional[int] = None # Distinct from capital_social (float), kept for CSV fidelity
40
+ tax_id: Optional[str] = None
41
+ rc_number: Optional[str] = None
42
+ founding_date_iso: Optional[str] = None
43
+ zipcode_detail: Optional[str] = None
44
+ wilaya: Optional[str] = None
45
+ founding_location: Optional[str] = None
46
+ detail_url: Optional[str] = None
47
+
48
+ class JortAnnouncement(BaseModel):
49
+ date: str
50
+ type: str
51
+ jort_number: Optional[str] = None
52
+ content: str
53
+ year: Optional[int] = None
54
+
55
+ class JortData(BaseModel):
56
+ announcements: List[JortAnnouncement] = []
57
+
58
+ class Contract(BaseModel):
59
+ date: str
60
+ organisme: str
61
+ type: str
62
+ montant: float
63
+ objet: str
64
+
65
+ class MarchesData(BaseModel):
66
+ contracts: List[Contract] = []
67
+
68
+ class EnrichmentData(BaseModel):
69
+ rne: RneData
70
+ jort: JortData
71
+ marches: MarchesData
72
+ notes: Optional[str] = None
73
+
74
+ class RedFlag(BaseModel):
75
+ type: str
76
+ severity: str
77
+ message_ar: str
78
+
79
+ class Metrics(BaseModel):
80
+ total_contracts: int
81
+ total_contracts_value: float
82
+ capital_to_contracts_ratio: float
83
+ red_flags: List[RedFlag] = []
84
+
85
+ class EnrichedCompanyRequest(BaseModel):
86
+ company_id: str
87
+ company_name: str
88
+ wilaya: str
89
+ data: EnrichmentData
90
+ enriched_by: str = "Journalist"
91
+ enriched_at: Optional[str] = None
92
+
93
+ class EnrichedCompanyResponse(BaseModel):
94
+ company_id: str
95
+ company_name: str
96
+ wilaya: str
97
+ data: dict
98
+ metrics: dict
99
+ enriched_by: str
100
+ enriched_at: Optional[str]
101
+
102
+ # --- Investigation Notes Pydantic Models ---
103
+
104
+ class CreateNoteRequest(BaseModel):
105
+ title: str
106
+ content: str
107
+ created_by: Optional[str] = "Unknown"
108
+ tags: Optional[List[str]] = []
109
+
110
+ class UpdateNoteRequest(BaseModel):
111
+ title: Optional[str] = None
112
+ content: Optional[str] = None
113
+ tags: Optional[List[str]] = None
114
+
115
+ # --- Business Logic ---
116
+
117
+ def calculate_red_flags(data: EnrichmentData) -> Metrics:
118
+ """Calculate metrics and detect red flags from enrichment data."""
119
+ total_contracts = len(data.marches.contracts)
120
+ total_value = sum(c.montant for c in data.marches.contracts)
121
+ capital = data.rne.capital_social if data.rne.capital_social > 0 else 1
122
+
123
+ ratio = total_value / capital
124
+ flags = []
125
+
126
+ # Flag: High Ratio (> 10x)
127
+ if ratio > 10:
128
+ flags.append(RedFlag(
129
+ type="FINANCIAL_RATIO",
130
+ severity="HIGH",
131
+ message_ar=f"قيمة الصفقات تتجاوز رأس المال بـ {ratio:.1f} مرة"
132
+ ))
133
+
134
+ # Flag: Gré à gré frequency
135
+ gre_a_gre_count = sum(1 for c in data.marches.contracts if "تراضي" in c.type or "Direct" in c.type)
136
+ if total_contracts > 0 and (gre_a_gre_count / total_contracts) > 0.5:
137
+ flags.append(RedFlag(
138
+ type="PROCUREMENT_METHOD",
139
+ severity="HIGH",
140
+ message_ar="أكثر من 50% من الصفقات بالتراضي"
141
+ ))
142
+
143
+ # Flag: Single Shareholder
144
+ if len(data.rne.shareholders) == 1:
145
+ flags.append(RedFlag(
146
+ type="GOVERNANCE",
147
+ severity="MEDIUM",
148
+ message_ar="مساهم وحيد في الشركة"
149
+ ))
150
+
151
+ return Metrics(
152
+ total_contracts=total_contracts,
153
+ total_contracts_value=total_value,
154
+ capital_to_contracts_ratio=ratio,
155
+ red_flags=flags
156
+ )
157
+
158
+
159
+ def db_company_to_dict(company: EnrichedCompanyDB) -> dict:
160
+ """Convert SQLAlchemy model to dict matching the frontend-expected shape."""
161
+ return {
162
+ "company_id": company.company_id,
163
+ "company_name": company.company_name,
164
+ "wilaya": company.wilaya,
165
+ "data": company.data,
166
+ "metrics": company.metrics,
167
+ "enriched_by": company.enriched_by,
168
+ "enriched_at": company.enriched_at.isoformat() if company.enriched_at else None,
169
+ }
170
+
171
+
172
+ def db_note_to_dict(note: InvestigationNoteDB) -> dict:
173
+ """Convert SQLAlchemy note model to dict matching the frontend-expected shape."""
174
+ return {
175
+ "id": note.id,
176
+ "title": note.title,
177
+ "content": note.content,
178
+ "created_at": note.created_at.isoformat() if note.created_at else None,
179
+ "updated_at": note.updated_at.isoformat() if note.updated_at else None,
180
+ "created_by": note.created_by,
181
+ "tags": note.tags or []
182
+ }
183
+
184
+
185
+ # --- Enrichment Endpoints ---
186
+
187
+ @router.post("/manual")
188
+ def save_manual_enrichment(payload: EnrichedCompanyRequest, db: Session = Depends(get_db)):
189
+ """Save or update an enriched company profile."""
190
+ # Calculate metrics & flags
191
+ metrics = calculate_red_flags(payload.data)
192
+
193
+ metrics_dict = {
194
+ "total_contracts": metrics.total_contracts,
195
+ "total_contracts_value": metrics.total_contracts_value,
196
+ "capital_to_contracts_ratio": metrics.capital_to_contracts_ratio,
197
+ "red_flags": [f.dict() for f in metrics.red_flags]
198
+ }
199
+
200
+ data_dict = payload.data.dict()
201
+ enriched_at = datetime.fromisoformat(payload.enriched_at) if payload.enriched_at else datetime.utcnow()
202
+
203
+ # Check if company exists (upsert)
204
+ existing = db.query(EnrichedCompanyDB).filter(
205
+ EnrichedCompanyDB.company_id == payload.company_id
206
+ ).first()
207
+
208
+ if existing:
209
+ # Update existing record
210
+ existing.company_name = payload.company_name
211
+ existing.wilaya = payload.wilaya
212
+ existing.data = data_dict
213
+ existing.metrics = metrics_dict
214
+ existing.enriched_by = payload.enriched_by
215
+ existing.enriched_at = enriched_at
216
+ db.commit()
217
+ db.refresh(existing)
218
+ company_obj = existing
219
+ else:
220
+ # Create new record
221
+ company_obj = EnrichedCompanyDB(
222
+ company_id=payload.company_id,
223
+ company_name=payload.company_name,
224
+ wilaya=payload.wilaya,
225
+ data=data_dict,
226
+ metrics=metrics_dict,
227
+ enriched_by=payload.enriched_by,
228
+ enriched_at=enriched_at,
229
+ )
230
+ db.add(company_obj)
231
+ db.commit()
232
+ db.refresh(company_obj)
233
+
234
+ return db_company_to_dict(company_obj)
235
+
236
+
237
+ @router.get("/profile/{company_id}")
238
+ def get_enriched_profile(company_id: str, db: Session = Depends(get_db)):
239
+ """Get a single enriched company profile by ID."""
240
+ company = db.query(EnrichedCompanyDB).filter(
241
+ EnrichedCompanyDB.company_id == company_id
242
+ ).first()
243
+
244
+ if not company:
245
+ raise HTTPException(status_code=404, detail="Profile not enriched yet")
246
+
247
+ return db_company_to_dict(company)
248
+
249
+
250
+ @router.get("/status/{company_id}")
251
+ def check_enrichment_status(company_id: str, db: Session = Depends(get_db)):
252
+ """Check if a company has been enriched."""
253
+ company = db.query(EnrichedCompanyDB).filter(
254
+ EnrichedCompanyDB.company_id == company_id
255
+ ).first()
256
+
257
+ return {
258
+ "company_id": company_id,
259
+ "is_enriched": company is not None
260
+ }
261
+
262
+
263
+ @router.get("/all")
264
+ def get_all_enriched(db: Session = Depends(get_db)):
265
+ """Get all enriched companies (without pagination)."""
266
+ companies = db.query(EnrichedCompanyDB).order_by(
267
+ EnrichedCompanyDB.enriched_at.desc()
268
+ ).all()
269
+
270
+ return [db_company_to_dict(c) for c in companies]
271
+
272
+
273
+ @router.get("/list")
274
+ def list_enriched_companies(
275
+ page: int = 1,
276
+ per_page: int = 12,
277
+ search: Optional[str] = None,
278
+ wilaya: Optional[str] = None,
279
+ has_red_flags: Optional[bool] = None,
280
+ db: Session = Depends(get_db)
281
+ ):
282
+ """List all enriched companies with filters and pagination."""
283
+ query = db.query(EnrichedCompanyDB)
284
+
285
+ # Filter by search (company name)
286
+ if search:
287
+ query = query.filter(EnrichedCompanyDB.company_name.ilike(f"%{search}%"))
288
+
289
+ # Filter by wilaya
290
+ if wilaya:
291
+ query = query.filter(EnrichedCompanyDB.wilaya == wilaya)
292
+
293
+ # Get all matching companies for counting and red flag filtering
294
+ # (SQLite JSON filtering is limited, so we filter in Python for has_red_flags)
295
+ all_companies = query.order_by(EnrichedCompanyDB.enriched_at.desc()).all()
296
+
297
+ # Convert to dicts and apply red flag filter if needed
298
+ companies_dicts = [db_company_to_dict(c) for c in all_companies]
299
+
300
+ if has_red_flags is not None:
301
+ if has_red_flags:
302
+ companies_dicts = [
303
+ c for c in companies_dicts
304
+ if c.get('metrics', {}).get('red_flags')
305
+ ]
306
+ else:
307
+ companies_dicts = [
308
+ c for c in companies_dicts
309
+ if not c.get('metrics', {}).get('red_flags')
310
+ ]
311
+
312
+ # Pagination
313
+ total = len(companies_dicts)
314
+ start = (page - 1) * per_page
315
+ end = start + per_page
316
+ paginated = companies_dicts[start:end]
317
+
318
+ return {
319
+ "companies": paginated,
320
+ "total": total,
321
+ "page": page,
322
+ "per_page": per_page,
323
+ "total_pages": (total + per_page - 1) // per_page if total > 0 else 1
324
+ }
325
+
326
+
327
+ # --- Investigation Notes Endpoints ---
328
+
329
+ @router.post("/{company_id}/notes")
330
+ def create_note(company_id: str, request: CreateNoteRequest, db: Session = Depends(get_db)):
331
+ """Create a new investigation note for a company."""
332
+ # Check if company exists
333
+ company = db.query(EnrichedCompanyDB).filter(
334
+ EnrichedCompanyDB.company_id == company_id
335
+ ).first()
336
+
337
+ if not company:
338
+ raise HTTPException(status_code=404, detail="Company not found")
339
+
340
+ now = datetime.utcnow()
341
+ note = InvestigationNoteDB(
342
+ id=str(uuid.uuid4()),
343
+ company_id=company_id,
344
+ title=request.title,
345
+ content=request.content,
346
+ created_by=request.created_by or "Unknown",
347
+ tags=request.tags or [],
348
+ created_at=now,
349
+ updated_at=now,
350
+ )
351
+
352
+ db.add(note)
353
+ db.commit()
354
+ db.refresh(note)
355
+
356
+ # Count total notes for this company
357
+ total_notes = db.query(InvestigationNoteDB).filter(
358
+ InvestigationNoteDB.company_id == company_id
359
+ ).count()
360
+
361
+ return {
362
+ "status": "success",
363
+ "note": db_note_to_dict(note),
364
+ "total_notes": total_notes
365
+ }
366
+
367
+
368
+ @router.get("/{company_id}/notes")
369
+ def get_notes(company_id: str, db: Session = Depends(get_db)):
370
+ """Get all investigation notes for a company."""
371
+ company = db.query(EnrichedCompanyDB).filter(
372
+ EnrichedCompanyDB.company_id == company_id
373
+ ).first()
374
+
375
+ if not company:
376
+ raise HTTPException(status_code=404, detail="Company not found")
377
+
378
+ notes = db.query(InvestigationNoteDB).filter(
379
+ InvestigationNoteDB.company_id == company_id
380
+ ).order_by(InvestigationNoteDB.created_at.desc()).all()
381
+
382
+ return {
383
+ "company_id": company_id,
384
+ "company_name": company.company_name,
385
+ "notes": [db_note_to_dict(n) for n in notes],
386
+ "total": len(notes)
387
+ }
388
+
389
+
390
+ @router.put("/{company_id}/notes/{note_id}")
391
+ def update_note(
392
+ company_id: str,
393
+ note_id: str,
394
+ updates: UpdateNoteRequest,
395
+ db: Session = Depends(get_db)
396
+ ):
397
+ """Update an existing investigation note."""
398
+ note = db.query(InvestigationNoteDB).filter(
399
+ InvestigationNoteDB.company_id == company_id,
400
+ InvestigationNoteDB.id == note_id
401
+ ).first()
402
+
403
+ if not note:
404
+ raise HTTPException(status_code=404, detail="Note not found")
405
+
406
+ if updates.title is not None:
407
+ note.title = updates.title
408
+ if updates.content is not None:
409
+ note.content = updates.content
410
+ if updates.tags is not None:
411
+ note.tags = updates.tags
412
+
413
+ note.updated_at = datetime.utcnow()
414
+
415
+ db.commit()
416
+ db.refresh(note)
417
+
418
+ return {
419
+ "status": "success",
420
+ "note": db_note_to_dict(note)
421
+ }
422
+
423
+
424
+ @router.delete("/{company_id}/notes/{note_id}")
425
+ def delete_note(company_id: str, note_id: str, db: Session = Depends(get_db)):
426
+ """Delete an investigation note."""
427
+ note = db.query(InvestigationNoteDB).filter(
428
+ InvestigationNoteDB.company_id == company_id,
429
+ InvestigationNoteDB.id == note_id
430
+ ).first()
431
+
432
+ if not note:
433
+ raise HTTPException(status_code=404, detail="Note not found")
434
+
435
+ db.delete(note)
436
+ db.commit()
437
+
438
+ # Count remaining notes
439
+ remaining = db.query(InvestigationNoteDB).filter(
440
+ InvestigationNoteDB.company_id == company_id
441
+ ).count()
442
+
443
+ return {
444
+ "status": "success",
445
+ "deleted_note_id": note_id,
446
+ "total_notes": remaining
447
+ }
448
+
449
+
450
+ # --- Watchlist Endpoints ---
451
+
452
+ class WatchCompanyOut(BaseModel):
453
+ id: str
454
+ name_ar: str
455
+ wilaya: Optional[str]
456
+ delegation: Optional[str]
457
+ activity: Optional[str]
458
+ type: Optional[str]
459
+ date_annonce: Optional[str]
460
+ etat_enregistrement: str
461
+ detected_trovit_at: Optional[datetime]
462
+ detected_trovit_charika_id: Optional[str]
463
+ detected_trovit_url: Optional[str]
464
+ created_at: datetime
465
+ updated_at: datetime
466
+
467
+ class Config:
468
+ from_attributes = True
469
+
470
+ class WatchCompanyUpdate(BaseModel):
471
+ etat_enregistrement: Optional[str] = None
472
+ detected_trovit_charika_id: Optional[str] = None
473
+ detected_trovit_url: Optional[str] = None
474
+
475
+
476
+ @router.get("/watch-companies", response_model=List[WatchCompanyOut])
477
+ def list_watch_companies(
478
+ wilaya: Optional[str] = None,
479
+ etat: Optional[str] = None,
480
+ q: Optional[str] = None,
481
+ db: Session = Depends(get_db)
482
+ ):
483
+ """List companies in the watchlist with optional filters."""
484
+ from app.models.enrichment_models import WatchCompany
485
+
486
+ query = db.query(WatchCompany)
487
+
488
+ if wilaya:
489
+ query = query.filter(WatchCompany.wilaya == wilaya)
490
+
491
+ if etat:
492
+ query = query.filter(WatchCompany.etat_enregistrement == etat)
493
+
494
+ if q:
495
+ query = query.filter(WatchCompany.name_ar.ilike(f"%{q}%"))
496
+
497
+ # Default sort: created_at desc
498
+ return query.order_by(WatchCompany.created_at.desc()).all()
499
+
500
+
501
+ @router.patch("/watch-companies/{company_id}", response_model=WatchCompanyOut)
502
+ def update_watch_company(
503
+ company_id: str,
504
+ updates: WatchCompanyUpdate,
505
+ db: Session = Depends(get_db)
506
+ ):
507
+ """Update status or details of a watched company."""
508
+ from app.models.enrichment_models import WatchCompany
509
+
510
+ company = db.query(WatchCompany).filter(WatchCompany.id == company_id).first()
511
+ if not company:
512
+ raise HTTPException(status_code=404, detail="Watch company not found")
513
+
514
+ if updates.etat_enregistrement is not None:
515
+ company.etat_enregistrement = updates.etat_enregistrement
516
+ if updates.etat_enregistrement == "detected_trovit" and not company.detected_trovit_at:
517
+ company.detected_trovit_at = datetime.utcnow()
518
+
519
+ if updates.detected_trovit_charika_id is not None:
520
+ company.detected_trovit_charika_id = updates.detected_trovit_charika_id
521
+
522
+ if updates.detected_trovit_url is not None:
523
+ company.detected_trovit_url = updates.detected_trovit_url
524
+
525
+ company.updated_at = datetime.utcnow()
526
+ db.commit()
527
+ db.refresh(company)
528
+
529
+ return company
backend/app/api/v1/auth.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
+ from fastapi import APIRouter, Depends, HTTPException, status
3
+ from fastapi.security import OAuth2PasswordRequestForm
4
+ from sqlalchemy.orm import Session
5
+
6
+ from app.database import get_db
7
+ from app.models.user_models import User
8
+ from app.schemas.auth_schemas import Token, UserCreate, UserRead, UserUpdate
9
+ from app.services.auth_service import (
10
+ ACCESS_TOKEN_EXPIRE_MINUTES,
11
+ create_access_token,
12
+ get_password_hash,
13
+ verify_password,
14
+ get_current_active_user,
15
+ get_current_admin_user
16
+ )
17
+
18
+ router = APIRouter()
19
+
20
+ @router.post("/login", response_model=Token)
21
+ async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)):
22
+ user = db.query(User).filter(User.email == form_data.username).first()
23
+ if not user or not verify_password(form_data.password, user.hashed_password):
24
+ raise HTTPException(
25
+ status_code=status.HTTP_401_UNAUTHORIZED,
26
+ detail="Incorrect username or password",
27
+ headers={"WWW-Authenticate": "Bearer"},
28
+ )
29
+
30
+ access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
31
+ access_token = create_access_token(
32
+ data={"sub": user.email}, expires_delta=access_token_expires
33
+ )
34
+ return {"access_token": access_token, "token_type": "bearer"}
35
+
36
+ @router.post("/users", response_model=UserRead)
37
+ def create_user(user: UserCreate, db: Session = Depends(get_db)):
38
+ # Check if user exists
39
+ db_user = db.query(User).filter(User.email == user.email).first()
40
+ if db_user:
41
+ raise HTTPException(status_code=400, detail="Email already registered")
42
+
43
+ hashed_password = get_password_hash(user.password)
44
+ new_user = User(
45
+ email=user.email,
46
+ hashed_password=hashed_password,
47
+ full_name=user.full_name,
48
+ is_active=user.is_active,
49
+ is_admin=user.is_admin
50
+ )
51
+ db.add(new_user)
52
+ db.commit()
53
+ db.refresh(new_user)
54
+ return new_user
55
+
56
+ @router.get("/me", response_model=UserRead)
57
+ async def read_users_me(current_user: User = Depends(get_current_active_user)):
58
+ return current_user
59
+
60
+ @router.get("/users", response_model=list[UserRead])
61
+ def read_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), current_user: User = Depends(get_current_admin_user)):
62
+ users = db.query(User).offset(skip).limit(limit).all()
63
+ return users
64
+
65
+ @router.patch("/users/{user_id}", response_model=UserRead)
66
+ def update_user(user_id: int, user_update: UserUpdate, db: Session = Depends(get_db), current_user: User = Depends(get_current_admin_user)):
67
+ db_user = db.query(User).filter(User.id == user_id).first()
68
+ if not db_user:
69
+ raise HTTPException(status_code=404, detail="User not found")
70
+
71
+ if user_update.is_active is not None:
72
+ db_user.is_active = user_update.is_active
73
+ if user_update.is_admin is not None:
74
+ db_user.is_admin = user_update.is_admin
75
+
76
+ db.commit()
77
+ db.refresh(db_user)
78
+ return db_user
79
+
80
+ @router.delete("/users/{user_id}", status_code=status.HTTP_204_NO_CONTENT)
81
+ def delete_user(user_id: int, db: Session = Depends(get_db), current_user: User = Depends(get_current_admin_user)):
82
+ db_user = db.query(User).filter(User.id == user_id).first()
83
+ if not db_user:
84
+ raise HTTPException(status_code=404, detail="User not found")
85
+
86
+ db.delete(db_user)
87
+ db.commit()
88
+ return None
backend/app/api/v1/companies.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Query
2
+ from typing import List, Optional
3
+ from app.services.data_loader import get_companies_df
4
+ from app.models.schemas import Company, CompanyWithLinks
5
+ from app.services.osint_links import get_company_links
6
+
7
+ router = APIRouter()
8
+
9
+ @router.get("/", response_model=List[Company])
10
+ def list_companies(
11
+ wilaya: Optional[str] = None,
12
+ group: Optional[str] = None,
13
+ type: Optional[str] = None,
14
+ search: Optional[str] = None,
15
+ limit: int = 50
16
+ ):
17
+ df = get_companies_df()
18
+ if df.empty:
19
+ return []
20
+
21
+ if wilaya:
22
+ df = df[df['wilaya'] == wilaya]
23
+ if group:
24
+ df = df[df['activity_group'] == group]
25
+ if type:
26
+ df = df[df['type'] == type]
27
+ if search:
28
+ mask = df['name'].str.contains(search, na=False) | df['activity_normalized'].str.contains(search, na=False)
29
+ df = df[mask]
30
+
31
+ return df.head(limit).to_dict(orient='records')
32
+
33
+ @router.get("/{company_id}", response_model=CompanyWithLinks)
34
+ def read_company(company_id: int):
35
+ df = get_companies_df()
36
+ company = df[df['id'] == company_id]
37
+ if company.empty:
38
+ return {} # Should raise 404
39
+
40
+ data = company.iloc[0].to_dict()
41
+ data['osint_links'] = get_company_links(company_id)
42
+ return data
43
+
44
+ @router.get("/{company_id}/osint_links")
45
+ def read_company_links(company_id: int):
46
+ return get_company_links(company_id)
backend/app/api/v1/investigate.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ba7ath Investigation Endpoint
3
+ ==============================
4
+ POST /api/v1/investigate/{company_id}
5
+
6
+ Cross-references Ahlya (CSV), JORT (DB), and RNE (DB) data via Gemini LLM.
7
+ """
8
+
9
+ from fastapi import APIRouter, HTTPException, Depends
10
+ from pydantic import BaseModel, Field
11
+ from typing import Optional, List
12
+ from datetime import datetime
13
+ from sqlalchemy.orm import Session
14
+
15
+ from app.database import get_db
16
+ from app.models.enrichment_models import EnrichedCompany as EnrichedCompanyDB
17
+ from app.services.llm_service import llm_service
18
+ from app.services.data_loader import get_companies_df
19
+ from app.services.auth_service import get_current_user
20
+
21
+ import logging
22
+
23
+ logger = logging.getLogger("ba7ath.investigate")
24
+
25
+ router = APIRouter()
26
+
27
+
28
+ # ── Pydantic Response Models ─────────────────────────────────────────────
29
+
30
+ class LLMAnalysis(BaseModel):
31
+ """The structured output from Gemini."""
32
+ match_score: int = Field(0, ge=0, le=100, description="Score de correspondance (0-100)")
33
+ status: str = Field("Pending", description="Verified | Suspicious | Conflict | Pending")
34
+ findings: List[str] = Field(default_factory=list, description="النقاط المتطابقة")
35
+ red_flags: List[str] = Field(default_factory=list, description="التجاوزات المرصودة")
36
+ summary_ar: str = Field("", description="ملخص التحقيق بالعربية")
37
+
38
+
39
+ class InvestigationResult(BaseModel):
40
+ """Full investigation response."""
41
+ company_id: str
42
+ company_name: str
43
+ wilaya: str
44
+ analysis: LLMAnalysis
45
+ sources_used: List[str] = Field(default_factory=list)
46
+ analyzed_at: str
47
+ model_used: str = "gemini-1.5-flash"
48
+
49
+
50
+ # ── Helper: Extract Ahlya data from CSV ──────────────────────────────────
51
+
52
+ def _get_ahlya_data(company_id: str, company_name: str) -> Optional[dict]:
53
+ """Find the company in the Ahlya DataFrame by ID or name."""
54
+ df = get_companies_df()
55
+ if df is None or df.empty:
56
+ return None
57
+
58
+ # Try matching by company_id first (if there's an ID column)
59
+ if "company_id" in df.columns:
60
+ match = df[df["company_id"] == company_id]
61
+ if not match.empty:
62
+ return match.iloc[0].to_dict()
63
+
64
+ # Fallback to name matching
65
+ name_col = "name" if "name" in df.columns else None
66
+ if name_col is None:
67
+ for col in df.columns:
68
+ if "name" in col.lower() or "اسم" in col:
69
+ name_col = col
70
+ break
71
+
72
+ if name_col:
73
+ # Normalize for fuzzy matching
74
+ normalized_target = company_name.strip().upper()
75
+ match = df[df[name_col].astype(str).str.strip().str.upper() == normalized_target]
76
+ if not match.empty:
77
+ return match.iloc[0].to_dict()
78
+
79
+ return None
80
+
81
+
82
+ # ── Main Endpoint ────────────────────────────────────────────────────────
83
+
84
+ @router.post(
85
+ "/{company_id}",
86
+ response_model=InvestigationResult,
87
+ summary="تحليل المقارنة المتقاطعة عبر الذكاء الاصطناعي"
88
+ )
89
+ async def investigate_company(
90
+ company_id: str,
91
+ db: Session = Depends(get_db),
92
+ current_user=Depends(get_current_user),
93
+ ):
94
+ """
95
+ Cross-reference a company's data from Ahlya (CSV), JORT (DB enrichment),
96
+ and RNE (DB enrichment) using Gemini 1.5 Flash LLM analysis.
97
+
98
+ Returns a structured investigation report in Arabic (MSA).
99
+ """
100
+ logger.info(f"📋 Investigation request for company_id: {company_id}")
101
+
102
+ # ── 1. Retrieve enriched data from SQLite ────────────────────────────
103
+ enriched = db.query(EnrichedCompanyDB).filter(
104
+ EnrichedCompanyDB.company_id == company_id
105
+ ).first()
106
+
107
+ if not enriched:
108
+ raise HTTPException(
109
+ status_code=404,
110
+ detail=f"الشركة '{company_id}' غير موجودة في قاعدة البيانات المُثرَاة"
111
+ )
112
+
113
+ company_name = enriched.company_name
114
+ wilaya = enriched.wilaya
115
+ enrichment_data = enriched.data or {}
116
+
117
+ # Extract JORT and RNE from enrichment data
118
+ jort_data = enrichment_data.get("jort", {})
119
+ rne_data = enrichment_data.get("rne", {})
120
+
121
+ # ── 2. Retrieve Ahlya data from CSV ──────────────────────────────────
122
+ ahlya_data = _get_ahlya_data(company_id, company_name)
123
+
124
+ # Track which sources were used
125
+ sources_used = []
126
+ if ahlya_data:
127
+ sources_used.append("أهلية (CSV)")
128
+ if jort_data and jort_data.get("announcements"):
129
+ sources_used.append("الرائد الر��مي (JORT)")
130
+ if rne_data and (rne_data.get("capital_social") or rne_data.get("tax_id")):
131
+ sources_used.append("السجل الوطني (RNE)")
132
+
133
+ if not sources_used:
134
+ raise HTTPException(
135
+ status_code=422,
136
+ detail="لا توجد بيانات كافية لإجراء التحليل المتقاطع"
137
+ )
138
+
139
+ # ── 3. Build the payload for Gemini ───────────────────────────────────
140
+ ahlya_payload = ahlya_data or {"company_name": company_name, "wilaya": wilaya}
141
+ jort_payload = jort_data if jort_data.get("announcements") else {}
142
+ rne_payload = rne_data if rne_data.get("capital_social") or rne_data.get("tax_id") else {}
143
+
144
+ # Clean NaN/float values from ahlya DataFrame row
145
+ if ahlya_payload:
146
+ ahlya_payload = {
147
+ k: (None if (isinstance(v, float) and (v != v)) else v)
148
+ for k, v in ahlya_payload.items()
149
+ }
150
+
151
+ # ── 4. Call LLM Analysis ─────────────────────────────────────────────
152
+ logger.info(
153
+ f"🚀 Sending to Gemini: company='{company_name}', "
154
+ f"sources={sources_used}"
155
+ )
156
+
157
+ raw_analysis = await llm_service.analyze_cross_check(
158
+ ahlya_data=ahlya_payload,
159
+ jort_data=jort_payload,
160
+ rne_data=rne_payload,
161
+ )
162
+
163
+ # Parse into Pydantic model (validates schema)
164
+ analysis = LLMAnalysis(
165
+ match_score=raw_analysis.get("match_score", 0),
166
+ status=raw_analysis.get("status", "Pending"),
167
+ findings=raw_analysis.get("findings", []),
168
+ red_flags=raw_analysis.get("red_flags", []),
169
+ summary_ar=raw_analysis.get("summary_ar", ""),
170
+ )
171
+
172
+ # ── 5. Build response ────────────────────────────────────────────────
173
+ return InvestigationResult(
174
+ company_id=company_id,
175
+ company_name=company_name,
176
+ wilaya=wilaya,
177
+ analysis=analysis,
178
+ sources_used=sources_used,
179
+ analyzed_at=datetime.utcnow().isoformat(),
180
+ model_used="gemini-1.5-flash",
181
+ )
backend/app/api/v1/meta.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ router = APIRouter()
4
+
5
+ @router.get("/methodology")
6
+ def methodology():
7
+ return {
8
+ "title": "Methodology",
9
+ "description": "How we process data and compute metrics.",
10
+ "content_ar": """
11
+ تم استخراج البيانات من السجل الوطني للشركات الأهلية (alahlia.tn).
12
+
13
+ مؤشر 'بحث' (Ba7ath Index) هو مؤشر مركب يقيس ثلاث أبعاد رئيسية (0-100):
14
+ 1. الاعتماد على الموارد العمومية (40%): نسبة الشركات في قطاعات الفلاحة، المناجم، والبيئة.
15
+ 2. التركيز القطاعي (40%): مدى هيمنة قطاع واحد على اقتصاد الجهة.
16
+ 3. التوازن المحلي/الجهوي (20%): الفرق بين نسبة الشركات المحلية والجهوية.
17
+
18
+ صيغة الاحتساب: INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
19
+ """
20
+ }
21
+
22
+ @router.get("/sources")
23
+ def sources():
24
+ return [
25
+ {"name": "RNE", "url": "https://www.registre-entreprises.tn", "description_ar": "للتثبت من الوضعية القانونية للشركة."},
26
+ {"name": "JORT", "url": "http://www.iort.gov.tn", "description_ar": "للبحث عن النصوص التأسيسية."},
27
+ {"name": "INS", "url": "http://www.ins.tn", "description_ar": "للمقارنة مع الإحصائيات الرسمية."}
28
+ ]
backend/app/api/v1/risk.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from typing import List
3
+ from app.services.risk_engine import get_risk_for_wilaya, get_all_risks
4
+ from app.models.schemas import WilayaRisk
5
+
6
+ router = APIRouter()
7
+
8
+ @router.get("/wilayas", response_model=List[WilayaRisk])
9
+ def list_risks():
10
+ return get_all_risks()
11
+
12
+ @router.get("/wilayas/{name}", response_model=WilayaRisk)
13
+ def read_risk(name: str):
14
+ return get_risk_for_wilaya(name)
backend/app/api/v1/stats.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.services.aggregation import get_national_stats, get_wilaya_stats
3
+ from app.models.schemas import NationalStats, WilayaStats
4
+
5
+ router = APIRouter()
6
+
7
+ @router.get("/national", response_model=NationalStats)
8
+ def read_national_stats():
9
+ return get_national_stats()
10
+
11
+ @router.get("/wilayas/{name}", response_model=WilayaStats)
12
+ def read_wilaya_stats(name: str):
13
+ return get_wilaya_stats(name)
backend/app/data/companies.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/app/data/stats.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total": 230,
3
+ "wilayas": {
4
+ "باجة": 25,
5
+ "سيدي بوزيد": 22,
6
+ "قفصة": 19,
7
+ "صفاقس": 18,
8
+ "القيروان": 14,
9
+ "زغوان": 11,
10
+ "مدنين": 11,
11
+ "القصرين": 10,
12
+ "سليانة": 10,
13
+ "قبلي": 10,
14
+ "نابل": 10,
15
+ "توزر": 9,
16
+ "جندوبة": 8,
17
+ "المهدية": 7,
18
+ "تطاوين": 7,
19
+ "المنستير": 6,
20
+ "الكاف": 5,
21
+ "بنزرت": 5,
22
+ "سوسة": 5,
23
+ "منوبة": 5,
24
+ "بن عروس": 4,
25
+ "تونس": 4,
26
+ "قابس": 4,
27
+ "أريانة": 1
28
+ },
29
+ "activites_top10": {
30
+ "فلاحة / صيد و الخدمات المتصلة بها": 71,
31
+ "زراعة": 21,
32
+ "تربية الحيوانات": 17,
33
+ "فلاحة/ صيد و الخدمات المتصلة بها": 15,
34
+ "خدمات ملحقة بالنقل": 11,
35
+ "أنشطة ترفيهية و ثقافية و رياضية": 8,
36
+ "حراجة / إستغلال الغابات": 6,
37
+ "أنشطة الخدمات الملحقة بالفلاحة بإستثناء الأنشطة البيطرية": 6,
38
+ "أنشطة ترفيهية": 6,
39
+ "التطهير وتنظيف الطرقات و التصرف في الفضلات": 5
40
+ },
41
+ "types": {
42
+ "محلية": 178,
43
+ "جهوية": 52
44
+ }
45
+ }
backend/app/database.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.orm import sessionmaker, declarative_base
3
+
4
+ # SQLite database file path (relative to where the server runs)
5
+ SQLALCHEMY_DATABASE_URL = "sqlite:///./ba7ath_enriched.db"
6
+
7
+ # For SQLite with FastAPI, check_same_thread is required
8
+ engine = create_engine(
9
+ SQLALCHEMY_DATABASE_URL,
10
+ connect_args={"check_same_thread": False}
11
+ )
12
+
13
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
14
+
15
+ Base = declarative_base()
16
+
17
+
18
+ def get_db():
19
+ """Dependency that provides a database session per request."""
20
+ db = SessionLocal()
21
+ try:
22
+ yield db
23
+ finally:
24
+ db.close()
backend/app/main.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+
4
+ # Load environment variables as the very first step
5
+ load_dotenv()
6
+
7
+ from fastapi import FastAPI, Request, Depends
8
+ from fastapi.responses import JSONResponse
9
+ from starlette.middleware.cors import CORSMiddleware
10
+ from app.api.v1 import stats, companies, risk, meta
11
+ from app.api.v1 import investigate as investigate_api
12
+ from app.services.data_loader import load_data
13
+ from app.database import engine, Base
14
+ from app.models import enrichment_models, user_models
15
+ from app.api.v1 import auth
16
+ from app.services.auth_service import get_current_user
17
+
18
+ app = FastAPI(title="Ba7ath OSINT API", version="1.0.0")
19
+
20
+ # ── CORS ──────────────────────────────────────────────────────────────
21
+ # Starlette CORSMiddleware with allow_origins=["*"]
22
+ # NOTE: When allow_origins=["*"], allow_credentials MUST be False.
23
+ # The frontend sends the token in the Authorization header, NOT via cookies,
24
+ # so allow_credentials=False is perfectly fine.
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=False,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+
33
+
34
+ # ── Startup ───────────────────────────────────────────────────────────
35
+ @app.on_event("startup")
36
+ async def startup_event():
37
+ print("=" * 60)
38
+ print(" Ba7ath OSINT API - VERSION CORS V4 (allow_origins=[*])")
39
+ print("=" * 60)
40
+ load_data()
41
+ Base.metadata.create_all(bind=engine)
42
+
43
+
44
+ # ── Routers ───────────────────────────────────────────────────────────
45
+ app.include_router(auth.router, prefix="/api/v1/auth", tags=["Auth"])
46
+
47
+ app.include_router(
48
+ stats.router,
49
+ prefix="/api/v1/stats",
50
+ tags=["Stats"],
51
+ dependencies=[Depends(get_current_user)],
52
+ )
53
+ app.include_router(
54
+ companies.router,
55
+ prefix="/api/v1/companies",
56
+ tags=["Companies"],
57
+ dependencies=[Depends(get_current_user)],
58
+ )
59
+
60
+ from app.api import enrichment
61
+
62
+ app.include_router(
63
+ risk.router,
64
+ prefix="/api/v1/risk",
65
+ tags=["Risk"],
66
+ dependencies=[Depends(get_current_user)],
67
+ )
68
+ app.include_router(
69
+ meta.router,
70
+ prefix="/api/v1/meta",
71
+ tags=["Meta"],
72
+ dependencies=[Depends(get_current_user)],
73
+ )
74
+ app.include_router(
75
+ enrichment.router,
76
+ prefix="/api/v1/enrichment",
77
+ tags=["Enrichment"],
78
+ dependencies=[Depends(get_current_user)],
79
+ )
80
+ app.include_router(
81
+ investigate_api.router,
82
+ prefix="/api/v1/investigate",
83
+ tags=["Investigation"],
84
+ dependencies=[Depends(get_current_user)],
85
+ )
86
+
87
+
88
+ @app.get("/")
89
+ def read_root():
90
+ return {"message": "Ba7ath OSINT API is running - VERSION CORS V4"}
91
+
backend/app/models/enrichment_models.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, String, Float, DateTime, Text, ForeignKey
2
+ from sqlalchemy.orm import relationship
3
+ from sqlalchemy.dialects.sqlite import JSON
4
+ from datetime import datetime
5
+ from app.database import Base
6
+
7
+
8
+ class EnrichedCompany(Base):
9
+ """SQLAlchemy model for enriched company profiles."""
10
+ __tablename__ = "enriched_companies"
11
+
12
+ company_id = Column(String, primary_key=True, index=True)
13
+ company_name = Column(String, index=True, nullable=False)
14
+ wilaya = Column(String, index=True, nullable=False)
15
+
16
+ # Full raw enrichment data (rne, jort, marches, notes) as JSON
17
+ data = Column(JSON, nullable=False)
18
+
19
+ # Computed metrics (total_contracts, total_contracts_value, ratio, red_flags) as JSON
20
+ metrics = Column(JSON, nullable=False)
21
+
22
+ enriched_by = Column(String, nullable=True, default="Journalist")
23
+ enriched_at = Column(DateTime, default=datetime.utcnow)
24
+
25
+ # Relationship to investigation notes
26
+ notes = relationship(
27
+ "InvestigationNote",
28
+ back_populates="company",
29
+ cascade="all, delete-orphan"
30
+ )
31
+
32
+
33
+ class WatchCompany(Base): # Using Base from database.py (SQLAlchemy), NOT Pydantic
34
+ __tablename__ = "watch_companies"
35
+
36
+ id = Column(String, primary_key=True, index=True)
37
+ name_ar = Column(String, index=True, nullable=False)
38
+ wilaya = Column(String, index=True, nullable=True)
39
+ delegation = Column(String, nullable=True)
40
+ activity = Column(String, nullable=True)
41
+ type = Column(String, nullable=True) # jihawiya / mahaliya
42
+ date_annonce = Column(String, nullable=True) # YYYY-MM-DD or raw text
43
+
44
+ # Status: 'watch', 'detected_trovit', 'detected_rne', 'archived'
45
+ etat_enregistrement = Column(String, nullable=False, default="watch", index=True)
46
+
47
+ # Auto-detection fields
48
+ detected_trovit_at = Column(DateTime, nullable=True)
49
+ detected_trovit_charika_id = Column(String, nullable=True)
50
+ detected_trovit_url = Column(String, nullable=True)
51
+
52
+ created_at = Column(DateTime, default=datetime.utcnow)
53
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
54
+
55
+ class InvestigationNote(Base):
56
+ """SQLAlchemy model for investigation notes attached to a company dossier."""
57
+ __tablename__ = "investigation_notes"
58
+
59
+ id = Column(String, primary_key=True, index=True) # UUID as string
60
+ company_id = Column(
61
+ String,
62
+ ForeignKey("enriched_companies.company_id", ondelete="CASCADE"),
63
+ index=True,
64
+ nullable=False
65
+ )
66
+
67
+ title = Column(String, nullable=False)
68
+ content = Column(Text, nullable=False)
69
+ created_at = Column(DateTime, default=datetime.utcnow)
70
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
71
+ created_by = Column(String, nullable=True, default="Unknown")
72
+
73
+ # Tags stored as JSON list of strings
74
+ tags = Column(JSON, nullable=True)
75
+
76
+ # Back-reference to company
77
+ company = relationship("EnrichedCompany", back_populates="notes")
backend/app/models/schemas.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional, Dict, Any
3
+
4
+ class Company(BaseModel):
5
+ id: Optional[int] = None # Generated ID
6
+ name: str
7
+ wilaya: str
8
+ delegation: Optional[str] = None
9
+ locality: Optional[str] = None
10
+ type: str # محلية / جهوية
11
+ activity_raw: Optional[str] = None
12
+ activity_normalized: Optional[str] = None
13
+ activity_group: Optional[str] = None
14
+
15
+ # Status / Match info
16
+ match_status: Optional[str] = "not_matched" # matched | partial | none
17
+
18
+ # JORT Data
19
+ jort_ref: Optional[str] = None
20
+ jort_date: Optional[str] = None
21
+ jort_capital: Optional[float] = None
22
+ jort_text: Optional[str] = None
23
+
24
+ # RNE/Trovit Data
25
+ rne_id: Optional[str] = None
26
+ rne_tax_id: Optional[str] = None
27
+ rne_rc_number: Optional[str] = None
28
+ rne_founding_date: Optional[str] = None
29
+ rne_capital: Optional[float] = None
30
+ rne_legal_form: Optional[str] = None
31
+ rne_address: Optional[str] = None
32
+ rne_detail_url: Optional[str] = None
33
+
34
+ # Audit Flags
35
+ capital_divergence: Optional[bool] = False
36
+
37
+ class CompanyWithLinks(Company):
38
+ osint_links: Dict[str, str]
39
+
40
+ class WilayaStats(BaseModel):
41
+ wilaya: str
42
+ count: int
43
+ pct_national: float
44
+ rank: int
45
+ types: Dict[str, int]
46
+ top_groups: Dict[str, int]
47
+ top_activities: Dict[str, int]
48
+
49
+ class NationalStats(BaseModel):
50
+ total: int
51
+ wilayas: Dict[str, int]
52
+ types: Dict[str, int]
53
+ top_activities: Dict[str, int]
54
+ top_groups: Dict[str, int]
55
+
56
+ class Flag(BaseModel):
57
+ code: str
58
+ severity: str # "low", "medium", "high"
59
+ label_ar: str
60
+
61
+ class WilayaRisk(BaseModel):
62
+ wilaya: str
63
+ baath_index: float
64
+ s1: float # Dependency on resource sectors
65
+ s2: float # Concentration in one group
66
+ s3: float # Governance imbalance
67
+ flags: List[Flag]
68
+
69
+ # Editorial Enriched Fields
70
+ level: str # LOW | MEDIUM | HIGH
71
+ level_ar: str
72
+ color: str # emerald | amber | red
73
+ comment_ar: str
74
+ recommendations: List[str]
backend/app/models/user_models.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, Boolean
2
+ from app.database import Base
3
+
4
+ class User(Base):
5
+ __tablename__ = "users"
6
+
7
+ id = Column(Integer, primary_key=True, index=True)
8
+ email = Column(String, unique=True, index=True)
9
+ hashed_password = Column(String)
10
+ full_name = Column(String, nullable=True)
11
+ is_active = Column(Boolean, default=True)
12
+ is_admin = Column(Boolean, default=False)
backend/app/schemas/auth_schemas.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, EmailStr
2
+ from typing import Optional
3
+
4
+ class UserBase(BaseModel):
5
+ email: EmailStr
6
+ full_name: Optional[str] = None
7
+ is_active: Optional[bool] = True
8
+ is_admin: Optional[bool] = False
9
+
10
+ class UserCreate(UserBase):
11
+ password: str
12
+
13
+ class UserRead(UserBase):
14
+ id: int
15
+
16
+ class Config:
17
+ from_attributes = True
18
+
19
+ class UserUpdate(BaseModel):
20
+ is_active: Optional[bool] = None
21
+ is_admin: Optional[bool] = None
22
+
23
+ class Token(BaseModel):
24
+ access_token: str
25
+ token_type: str
26
+
27
+ class TokenData(BaseModel):
28
+ username: Optional[str] = None
backend/app/services/aggregation.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.services.data_loader import get_companies_df, get_stats_data
2
+ from app.models.schemas import NationalStats, WilayaStats
3
+
4
+ def _safe_value_counts(df, col, head=None):
5
+ """Safely get value_counts for a column, returning {} if column doesn't exist."""
6
+ if col not in df.columns:
7
+ return {}
8
+ vc = df[col].dropna().value_counts()
9
+ if head:
10
+ vc = vc.head(head)
11
+ return vc.to_dict()
12
+
13
+ def get_national_stats():
14
+ stats = get_stats_data()
15
+ df = get_companies_df()
16
+
17
+ total = stats.get("total", 0)
18
+ wilayas = stats.get("wilayas", {})
19
+ types = stats.get("types", {})
20
+
21
+ if not df.empty:
22
+ top_groups = _safe_value_counts(df, 'activity_group')
23
+ top_activities = _safe_value_counts(df, 'activity_normalized', head=10)
24
+ else:
25
+ top_groups = {}
26
+ top_activities = {}
27
+
28
+ return NationalStats(
29
+ total=total,
30
+ wilayas=wilayas,
31
+ types=types,
32
+ top_activities=top_activities,
33
+ top_groups=top_groups
34
+ )
35
+
36
+ def get_wilaya_stats(wilaya: str):
37
+ df = get_companies_df()
38
+ stats = get_stats_data()
39
+
40
+ if df.empty:
41
+ return None
42
+
43
+ wilaya_df = df[df['wilaya'] == wilaya]
44
+ count = len(wilaya_df)
45
+
46
+ total = stats.get("total", 1)
47
+ pct = round((count / total) * 100, 1)
48
+
49
+ # Rank
50
+ sorted_wilayas = sorted(stats.get("wilayas", {}).items(), key=lambda x: x[1], reverse=True)
51
+ rank = next((i for i, (w, c) in enumerate(sorted_wilayas, 1) if w == wilaya), 0)
52
+
53
+ if not wilaya_df.empty:
54
+ top_groups = _safe_value_counts(wilaya_df, 'activity_group')
55
+ top_activities = _safe_value_counts(wilaya_df, 'activity_normalized', head=10)
56
+ types = _safe_value_counts(wilaya_df, 'type')
57
+ else:
58
+ top_groups = {}
59
+ top_activities = {}
60
+ types = {}
61
+
62
+ return WilayaStats(
63
+ wilaya=wilaya,
64
+ count=count,
65
+ pct_national=pct,
66
+ rank=rank,
67
+ types=types,
68
+ top_groups=top_groups,
69
+ top_activities=top_activities
70
+ )
71
+
backend/app/services/auth_service.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ from typing import Optional
3
+ from jose import JWTError, jwt
4
+ from passlib.context import CryptContext
5
+ from fastapi import Depends, HTTPException, status
6
+ from fastapi.security import OAuth2PasswordBearer
7
+ from sqlalchemy.orm import Session
8
+ import os
9
+ from dotenv import load_dotenv
10
+
11
+ from app.database import get_db
12
+ from app.models.user_models import User
13
+ from app.schemas.auth_schemas import TokenData
14
+
15
+ load_dotenv()
16
+
17
+ # Config
18
+ SECRET_KEY = os.getenv("SECRET_KEY")
19
+ if not SECRET_KEY:
20
+ raise RuntimeError("SECRET_KEY environment variable is not set")
21
+ ALGORITHM = os.getenv("ALGORITHM", "HS256")
22
+ ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", 30))
23
+
24
+ pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
25
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/v1/auth/login")
26
+
27
+ def verify_password(plain_password, hashed_password):
28
+ return pwd_context.verify(plain_password, hashed_password)
29
+
30
+ def get_password_hash(password):
31
+ return pwd_context.hash(password)
32
+
33
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
34
+ to_encode = data.copy()
35
+ if expires_delta:
36
+ expire = datetime.utcnow() + expires_delta
37
+ else:
38
+ expire = datetime.utcnow() + timedelta(minutes=15)
39
+ to_encode.update({"exp": expire})
40
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
41
+ return encoded_jwt
42
+
43
+ async def get_current_user(token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)):
44
+ credentials_exception = HTTPException(
45
+ status_code=status.HTTP_401_UNAUTHORIZED,
46
+ detail="Could not validate credentials",
47
+ headers={"WWW-Authenticate": "Bearer"},
48
+ )
49
+ try:
50
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
51
+ username: str = payload.get("sub")
52
+ if username is None:
53
+ raise credentials_exception
54
+ token_data = TokenData(username=username)
55
+ except JWTError:
56
+ raise credentials_exception
57
+
58
+ user = db.query(User).filter(User.email == token_data.username).first()
59
+ if user is None:
60
+ raise credentials_exception
61
+ return user
62
+
63
+ async def get_current_active_user(current_user: User = Depends(get_current_user)):
64
+ if not current_user.is_active:
65
+ raise HTTPException(status_code=400, detail="Inactive user")
66
+ return current_user
67
+
68
+ async def get_current_admin_user(current_user: User = Depends(get_current_active_user)):
69
+ if not current_user.is_admin:
70
+ raise HTTPException(
71
+ status_code=status.HTTP_403_FORBIDDEN,
72
+ detail="The user doesn't have enough privileges"
73
+ )
74
+ return current_user
backend/app/services/data_loader.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import json
3
+ import os
4
+ import unicodedata
5
+ import re
6
+ from pathlib import Path
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Build paths inside the project like this: BASE_DIR / 'subdir'.
13
+ # BASE_DIR is .../backend/app
14
+ BASE_DIR = Path(__file__).resolve().parent.parent
15
+ DATA_DIR = BASE_DIR / "data"
16
+
17
+ STATS_PATH = DATA_DIR / "stats.json"
18
+ COMPANIES_PATH = DATA_DIR / "companies.json"
19
+
20
+ # CSV Paths from Environment Variables
21
+ PATH_AHLYA_CSV = os.getenv("PATH_AHLYA_CSV", "Ahlya_Total_Feuil1.csv")
22
+ PATH_JORT_CSV = os.getenv("PATH_JORT_CSV", "app/scripts/Base-JORT.csv")
23
+ PATH_RNE_CSV = os.getenv("PATH_RNE_CSV", "trovit_charikat_ahliya_all.csv")
24
+
25
+ def normalize_company_name(name):
26
+ """
27
+ Standard logic for the join key:
28
+ - Uppercase
29
+ - Stripped
30
+ - Without accents
31
+ - Without double spaces
32
+ """
33
+ if not isinstance(name, str):
34
+ return ""
35
+
36
+ # Uppercase
37
+ name = name.upper()
38
+
39
+ # Normalize unicode to decompose accents
40
+ name = unicodedata.normalize('NFKD', name)
41
+ # Remove accents/diacritics
42
+ name = "".join([c for c in name if not unicodedata.combining(c)])
43
+
44
+ # Remove double spaces and strip
45
+ name = re.sub(r'\s+', ' ', name).strip()
46
+
47
+ return name
48
+
49
+ class DataLoader:
50
+ _instance = None
51
+ companies_df = None
52
+ stats_data = None
53
+
54
+ def __new__(cls):
55
+ if cls._instance is None:
56
+ cls._instance = super(DataLoader, cls).__new__(cls)
57
+ return cls._instance
58
+
59
+ def load(self):
60
+ print(f"Loading data from {DATA_DIR} and CSVs...")
61
+ try:
62
+ # 1. Load Stats
63
+ if not STATS_PATH.exists():
64
+ print(f"Warning: Stats file not found at {STATS_PATH}")
65
+ self.stats_data = {}
66
+ else:
67
+ with open(STATS_PATH, 'r', encoding='utf-8') as f:
68
+ self.stats_data = json.load(f)
69
+
70
+ # 2. Load Base Companies (Ahlya)
71
+ ahlya_path = Path(PATH_AHLYA_CSV)
72
+ if not ahlya_path.is_absolute():
73
+ ahlya_path = BASE_DIR.parent / ahlya_path
74
+
75
+ if ahlya_path.exists():
76
+ print(f"Loading Ahlya CSV from {ahlya_path}")
77
+ self.companies_df = pd.read_csv(ahlya_path)
78
+ # Normalize columns
79
+ self.companies_df.rename(columns={
80
+ "اسم_الشركة": "name",
81
+ "الولاية": "wilaya",
82
+ "المعتمدية": "delegation",
83
+ "المنطقة": "locality",
84
+ "النوع": "type",
85
+ "الموضوع / النشاط": "activity_raw",
86
+ "activité_normalisée": "activity_normalized",
87
+ "activité_groupe": "activity_group"
88
+ }, inplace=True)
89
+ # Ensure critical columns exist even if CSV is missing them
90
+ if 'activity_normalized' not in self.companies_df.columns:
91
+ self.companies_df['activity_normalized'] = self.companies_df.get('activity_raw', pd.Series(dtype=str))
92
+ if 'activity_group' not in self.companies_df.columns:
93
+ # Derive from activity_normalized if available
94
+ self.companies_df['activity_group'] = self.companies_df.get('activity_normalized', pd.Series(dtype=str))
95
+ print(f" -> Loaded {len(self.companies_df)} companies. Columns: {list(self.companies_df.columns)}")
96
+
97
+ elif COMPANIES_PATH.exists():
98
+ print(f"Loading Ahlya from companies.json as fallback")
99
+ with open(COMPANIES_PATH, 'r', encoding='utf-8') as f:
100
+ companies = json.load(f)
101
+ self.companies_df = pd.DataFrame(companies)
102
+ self.companies_df.rename(columns={
103
+ "اسم_الشركة": "name",
104
+ "الولاية": "wilaya",
105
+ "المعتمدية": "delegation",
106
+ "المنطقة": "locality",
107
+ "النوع": "type",
108
+ "الموضوع / النشاط": "activity_raw",
109
+ "activité_normalisée": "activity_normalized",
110
+ "activité_groupe": "activity_group"
111
+ }, inplace=True)
112
+ if 'activity_normalized' not in self.companies_df.columns:
113
+ self.companies_df['activity_normalized'] = self.companies_df.get('activity_raw', pd.Series(dtype=str))
114
+ if 'activity_group' not in self.companies_df.columns:
115
+ self.companies_df['activity_group'] = self.companies_df.get('activity_normalized', pd.Series(dtype=str))
116
+
117
+ else:
118
+ print("Warning: No Ahlya data found!")
119
+ self.companies_df = pd.DataFrame()
120
+
121
+ if not self.companies_df.empty:
122
+ # Normalize name for join
123
+ self.companies_df['name_normalized'] = self.companies_df['name'].apply(normalize_company_name)
124
+ self.companies_df['id'] = range(1, len(self.companies_df) + 1)
125
+
126
+ # 3. Load JORT Data
127
+ jort_path = Path(PATH_JORT_CSV)
128
+ if not jort_path.is_absolute():
129
+ jort_path = BASE_DIR.parent / jort_path
130
+
131
+ if jort_path.exists():
132
+ print(f"Integrating JORT from {jort_path}")
133
+ jort_df = pd.read_csv(jort_path)
134
+ if 'Dénomination' in jort_df.columns:
135
+ jort_df['name_normalized'] = jort_df['Dénomination'].apply(normalize_company_name)
136
+ # Prepare subset for merge
137
+ jort_subset = jort_df[['name_normalized', 'Référence JORT', 'Date Annonce', 'Capital (DT)', 'Texte Source Original']].copy()
138
+ jort_subset.rename(columns={
139
+ 'Référence JORT': 'jort_ref',
140
+ 'Date Annonce': 'jort_date',
141
+ 'Capital (DT)': 'jort_capital',
142
+ 'Texte Source Original': 'jort_text'
143
+ }, inplace=True)
144
+ # Merge
145
+ self.companies_df = pd.merge(self.companies_df, jort_subset, on='name_normalized', how='left')
146
+
147
+ # 4. Load RNE Data
148
+ rne_path = Path(PATH_RNE_CSV)
149
+ if not rne_path.is_absolute():
150
+ rne_path = BASE_DIR.parent / rne_path
151
+
152
+ if rne_path.exists():
153
+ print(f"Integrating RNE from {rne_path}")
154
+ rne_df = pd.read_csv(rne_path)
155
+ if 'name' in rne_df.columns:
156
+ rne_df['name_normalized'] = rne_df['name'].apply(normalize_company_name)
157
+ # Prepare subset
158
+ rne_subset = rne_df[['name_normalized', 'charika_id', 'tax_id', 'rc_number', 'founding_date_iso', 'legal_form', 'address', 'detail_url', 'capital']].copy()
159
+ rne_subset.rename(columns={
160
+ 'charika_id': 'rne_id',
161
+ 'tax_id': 'rne_tax_id',
162
+ 'rc_number': 'rne_rc_number',
163
+ 'founding_date_iso': 'rne_founding_date',
164
+ 'legal_form': 'rne_legal_form',
165
+ 'address': 'rne_address',
166
+ 'detail_url': 'rne_detail_url',
167
+ 'capital': 'rne_capital'
168
+ }, inplace=True)
169
+ # Merge
170
+ self.companies_df = pd.merge(self.companies_df, rne_subset, on='name_normalized', how='left')
171
+
172
+ # 5. Capital Divergence Check
173
+ threshold = float(os.getenv("CAPITAL_DIVERGENCE_THRESHOLD", 0.05))
174
+ self.companies_df['capital_divergence'] = False
175
+
176
+ # Ensure columns exist before processing
177
+ if 'jort_capital' in self.companies_df.columns and 'rne_capital' in self.companies_df.columns:
178
+ # Ensure capitals are numeric
179
+ self.companies_df['jort_capital'] = pd.to_numeric(self.companies_df['jort_capital'], errors='coerce')
180
+ self.companies_df['rne_capital'] = pd.to_numeric(self.companies_df['rne_capital'], errors='coerce')
181
+
182
+ mask = (self.companies_df['jort_capital'].notna()) & (self.companies_df['rne_capital'].notna()) & (self.companies_df['jort_capital'] > 0)
183
+ diff = abs(self.companies_df.loc[mask, 'jort_capital'] - self.companies_df.loc[mask, 'rne_capital']) / self.companies_df.loc[mask, 'jort_capital']
184
+ self.companies_df.loc[mask, 'capital_divergence'] = diff > threshold
185
+ else:
186
+ # Create empty columns if they dont exist to stay compliant with schema
187
+ if 'jort_capital' not in self.companies_df.columns: self.companies_df['jort_capital'] = pd.NA
188
+ if 'rne_capital' not in self.companies_df.columns: self.companies_df['rne_capital'] = pd.NA
189
+ if 'jort_ref' not in self.companies_df.columns: self.companies_df['jort_ref'] = pd.NA
190
+ if 'jort_date' not in self.companies_df.columns: self.companies_df['jort_date'] = pd.NA
191
+ if 'jort_text' not in self.companies_df.columns: self.companies_df['jort_text'] = pd.NA
192
+ if 'rne_id' not in self.companies_df.columns: self.companies_df['rne_id'] = pd.NA
193
+ if 'rne_tax_id' not in self.companies_df.columns: self.companies_df['rne_tax_id'] = pd.NA
194
+ if 'rne_rc_number' not in self.companies_df.columns: self.companies_df['rne_rc_number'] = pd.NA
195
+ if 'rne_founding_date' not in self.companies_df.columns: self.companies_df['rne_founding_date'] = pd.NA
196
+ if 'rne_legal_form' not in self.companies_df.columns: self.companies_df['rne_legal_form'] = pd.NA
197
+ if 'rne_address' not in self.companies_df.columns: self.companies_df['rne_address'] = pd.NA
198
+ if 'rne_detail_url' not in self.companies_df.columns: self.companies_df['rne_detail_url'] = pd.NA
199
+
200
+ except Exception as e:
201
+ print(f"Error loading combined data: {e}")
202
+ import traceback
203
+ traceback.print_exc()
204
+ self.companies_df = pd.DataFrame()
205
+ self.stats_data = {}
206
+
207
+ data_loader = DataLoader()
208
+
209
+ def load_data():
210
+ data_loader.load()
211
+
212
+ def get_companies_df():
213
+ return data_loader.companies_df
214
+
215
+ def get_stats_data():
216
+ return data_loader.stats_data
backend/app/services/llm_service.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ba7ath LLM Analysis Service
3
+ ============================
4
+ Service d'analyse croisée des données Ahlya/JORT/RNE via Google Gemini.
5
+
6
+ Ce module utilise l'API REST Gemini DIRECTEMENT via httpx (pas le SDK
7
+ google-generativeai) pour forcer l'utilisation de l'endpoint v1 stable
8
+ et éviter le routage automatique vers v1beta qui provoque des erreurs
9
+ 404 sur Render et autres plateformes cloud.
10
+ """
11
+
12
+ import os
13
+ import json
14
+ import logging
15
+ from datetime import datetime
16
+
17
+ import httpx
18
+
19
+ # Configuration du logging spécifique au module Ba7ath
20
+ logger = logging.getLogger("ba7ath.llm")
21
+ logger.setLevel(logging.INFO)
22
+
23
+ # ── Constants ─────────────────────────────────────────────────────────────
24
+
25
+ GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta"
26
+ GEMINI_MODEL = "gemini-2.0-flash"
27
+ GEMINI_ENDPOINT = f"{GEMINI_API_BASE}/models/{GEMINI_MODEL}:generateContent"
28
+
29
+ # ── System Prompt (Expert Investigation) ──────────────────────────────────
30
+
31
+ SYSTEM_PROMPT = """أنت خبير تدقيق محقق في مشروع 'بحث' (Ba7ath). مهمتك هي مقارنة البيانات بدقة متناهية.
32
+
33
+ السياق القانوني:
34
+ - "شركة أهلية" (Entreprise Citoyenne) هي كيان قانوني أُنشئ بموجب القانون عدد 20 لسنة 2022.
35
+ - "الرائد الرسمي للجمهورية التونسية" (JORT) هو المنشور الرسمي الذي يتم فيه الإعلان عن تأسيس الشركات.
36
+ - "السجل الوطني للمؤسسات" (RNE) هو قاعدة البيانات الإدارية الرسمية.
37
+ - "المعرّف الجبائي" (Matricule Fiscal) هو رقم التعريف الضريبي.
38
+ - "الولاية" (Gouvernorat) هي الوحدة الإدارية في تونس (24 ولاية).
39
+
40
+ قواعد صارمة:
41
+ 1. لا تستنتج معلومات غير موجودة في البيانات المقدمة.
42
+ 2. إذا وجد اختلاف بين المصادر، صنفه كـ 'تضارب' (Conflict).
43
+ 3. اللغة المستخدمة في الإجابة هي العربية الرصينة (MSA).
44
+ 4. يجب أن يكون ملخص التحقيق (summary_ar) مهنيًا، مباشرًا، ومبنيًا فقط على الأدلة المقدمة.
45
+ 5. لا تضف نصوصًا تفسيرية خارج هيكل JSON المطلوب."""
46
+
47
+ # ── Fallback response ────────────────────────────────────────────────────
48
+
49
+ def _fallback_response(error_type: str, detail: str = "") -> dict:
50
+ """Génère une réponse JSON de secours en cas d'indisponibilité du LLM."""
51
+ return {
52
+ "match_score": 0,
53
+ "status": "Pending",
54
+ "findings": [],
55
+ "red_flags": [],
56
+ "summary_ar": f"تعذّر إجراء التحليل: {error_type}. {detail}".strip(),
57
+ "_error": error_type,
58
+ "_detail": detail,
59
+ }
60
+
61
+ # ══════════════════════════════════════════════════════════════════════════
62
+ # ██ LLM ANALYSIS SERVICE (Direct REST API — no SDK)
63
+ # ══════════════════════════════════════════════════════════════════════════
64
+
65
+ class LLMAnalysisService:
66
+ """
67
+ Service d'analyse utilisant l'API REST Gemini directement.
68
+ Contourne le SDK google-generativeai pour éviter le routage v1beta.
69
+ Configuré pour le déterminisme total (Temp=0).
70
+ """
71
+
72
+ def __init__(self):
73
+ self.api_key = os.getenv("GEMINI_API_KEY")
74
+ if not self.api_key:
75
+ logger.warning("⚠️ GEMINI_API_KEY not set — LLM analysis will be unavailable")
76
+ else:
77
+ logger.info(f"✅ LLMAnalysisService initialized — model: {GEMINI_MODEL} (REST API direct)")
78
+
79
+ @staticmethod
80
+ def _build_prompt(ahlya_data: dict, jort_data: dict, rne_data: dict) -> str:
81
+ """Construit un prompt structuré avec les trois sources de données."""
82
+
83
+ def fmt(data):
84
+ return json.dumps(data, ensure_ascii=False, indent=2) if data else "لا توجد بيانات"
85
+
86
+ return f"""قم بإجراء مقارنة شاملة ودقيقة بين المصادر الثلاثة التالية لهذه الشركة الأهلية التونسية.
87
+
88
+ ═══════════════════════════════════════
89
+ المصدر الأول: بيانات أهلية (البيانات التصريحية)
90
+ ═══════════════════════════════════════
91
+ {fmt(ahlya_data)}
92
+
93
+ ═══════════════════════════════════════
94
+ المصدر الثاني: الرائد الرسمي (JORT)
95
+ ═══════════════════════════════════════
96
+ {fmt(jort_data)}
97
+
98
+ ═══════════════════════════════════════
99
+ المصدر الثالث: السجل الوطني للمؤسسات (RNE)
100
+ ═══════════════════════════════════════
101
+ {fmt(rne_data)}
102
+
103
+ ═══════════════════════════════════════
104
+ التعليمات:
105
+ ═══════════════════════════════════════
106
+ 1. قارن الاسم التجاري، رأس المال، والولاية.
107
+ 2. تحقق من تطابق التواريخ والمعرّف الجبائي.
108
+ 3. حدد أي تضاربات (Conflicts) أو نقاط مشبوهة.
109
+ 4. أجب بصيغة JSON فقط وفق المخطط التالي بالضبط:
110
+
111
+ {{
112
+ "match_score": <عدد صحيح من 0 إلى 100>,
113
+ "status": "Verified" أو "Suspicious" أو "Conflict",
114
+ "findings": ["نقطة تطابق 1", "نقطة تطابق 2"],
115
+ "red_flags": ["تجاوز 1", "تجاوز 2"],
116
+ "summary_ar": "ملخص التحقيق هنا"
117
+ }}"""
118
+
119
+ async def analyze_cross_check(self, ahlya_data: dict, jort_data: dict, rne_data: dict) -> dict:
120
+ """Exécute l'analyse croisée via l'API REST Gemini (v1 stable)."""
121
+
122
+ company_name = ahlya_data.get("name", "Unknown")
123
+
124
+ if not self.api_key:
125
+ logger.error(f"LLM analysis skipped for '{company_name}': no API key")
126
+ return _fallback_response("no_api_key", "GEMINI_API_KEY غير مُعَيَّن")
127
+
128
+ logger.info(f"🔍 Starting LLM cross-check for: {company_name}")
129
+ start_time = datetime.now()
130
+ prompt = self._build_prompt(ahlya_data, jort_data, rne_data)
131
+
132
+ # ── Build the REST API request body ──────────────────────────────
133
+ request_body = {
134
+ "system_instruction": {
135
+ "parts": [{"text": SYSTEM_PROMPT}]
136
+ },
137
+ "contents": [
138
+ {
139
+ "parts": [{"text": prompt}]
140
+ }
141
+ ],
142
+ "generationConfig": {
143
+ "temperature": 0.0,
144
+ "topP": 1,
145
+ "topK": 1,
146
+ "responseMimeType": "application/json"
147
+ }
148
+ }
149
+
150
+ url = f"{GEMINI_ENDPOINT}?key={self.api_key}"
151
+
152
+ try:
153
+ async with httpx.AsyncClient(timeout=60.0) as client:
154
+ response = await client.post(
155
+ url,
156
+ json=request_body,
157
+ headers={"Content-Type": "application/json"}
158
+ )
159
+
160
+ # ── Handle HTTP errors ───────────────────────────────────────
161
+ if response.status_code == 429:
162
+ logger.warning(f"⚠️ Rate-limit Gemini (429) for '{company_name}'")
163
+ return _fallback_response("rate_limited", "الخدمة مشغولة حاليًا.")
164
+
165
+ if response.status_code != 200:
166
+ error_detail = response.text[:300]
167
+ logger.error(f"❌ Gemini API {response.status_code} for '{company_name}': {error_detail}")
168
+ return _fallback_response(f"http_{response.status_code}", error_detail)
169
+
170
+ # ── Parse the response ───────────────────────────────────────
171
+ resp_json = response.json()
172
+ candidates = resp_json.get("candidates", [])
173
+ if not candidates:
174
+ logger.error(f"❌ No candidates in Gemini response for '{company_name}'")
175
+ return _fallback_response("no_candidates", "لم يتم الحصول على نتائج من النموذج.")
176
+
177
+ text = candidates[0].get("content", {}).get("parts", [{}])[0].get("text", "")
178
+ result = json.loads(text)
179
+
180
+ elapsed = (datetime.now() - start_time).total_seconds()
181
+ logger.info(
182
+ f"✅ Analysis complete for '{company_name}' — "
183
+ f"score={result.get('match_score')}, status={result.get('status')}, "
184
+ f"time={elapsed:.1f}s"
185
+ )
186
+ return result
187
+
188
+ except json.JSONDecodeError as e:
189
+ logger.error(f"❌ JSONDecodeError for '{company_name}': {e}")
190
+ return _fallback_response("json_parse_error", "تعذّر تحليل استجابة النموذج.")
191
+
192
+ except httpx.TimeoutException:
193
+ logger.error(f"❌ Timeout for '{company_name}' (60s limit)")
194
+ return _fallback_response("timeout", "انتهت مهلة الاتصال بالنموذج.")
195
+
196
+ except Exception as e:
197
+ logger.error(f"❌ Unexpected error for '{company_name}': {e}")
198
+ return _fallback_response("unexpected_error", str(e))
199
+
200
+ # Instance unique du service
201
+ llm_service = LLMAnalysisService()
backend/app/services/osint_links.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.parse
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ INTERNAL_OSINT_MODE = os.getenv("INTERNAL_OSINT_MODE", "False").lower() == "true"
8
+
9
+ def generate_links(company_name: str, wilaya: str):
10
+ base_name = urllib.parse.quote(company_name)
11
+
12
+ links = {
13
+ "Google": f"https://www.google.com/search?q={base_name} {wilaya} site:tn",
14
+ "Facebook": f"https://www.facebook.com/search/top?q={base_name}"
15
+ }
16
+
17
+ if INTERNAL_OSINT_MODE:
18
+ links["RNE"] = f"https://www.registre-entreprises.tn/search?q={base_name}" # Placeholder
19
+ links["JORT"] = f"http://www.iort.gov.tn/search?q={base_name}" # Placeholder
20
+
21
+ return links
22
+
23
+ def get_company_links(company_id: int):
24
+ from app.services.data_loader import get_companies_df
25
+ df = get_companies_df()
26
+
27
+ company = df[df['id'] == company_id]
28
+ if company.empty:
29
+ return {}
30
+
31
+ row = company.iloc[0]
32
+ return generate_links(row['name'], row['wilaya'])
backend/app/services/risk_engine.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.services.data_loader import get_companies_df
2
+ from app.models.schemas import WilayaRisk, Flag
3
+ import numpy as np
4
+
5
+ def generate_risk_commentary(wilaya_data: dict, risk_scores: dict) -> dict:
6
+ """
7
+ Génère des commentaires éditoriaux en arabe basés sur les scores de risque.
8
+ """
9
+ s1, s2, s3 = risk_scores['s1'], risk_scores['s2'], risk_scores['s3']
10
+ index = risk_scores['baath_index']
11
+
12
+ # Defaults
13
+ level = "LOW"
14
+ level_ar = "منخفض"
15
+ color = "emerald"
16
+
17
+ if index >= 70:
18
+ level = "HIGH"
19
+ level_ar = "مرتفع"
20
+ color = "red"
21
+ elif index >= 40:
22
+ level = "MEDIUM"
23
+ level_ar = "متوسط"
24
+ color = "amber"
25
+
26
+ comments = []
27
+
28
+ # S1 - Dépendance
29
+ if s1 > 0.6: # lowered threshold slightly to match prompt logic 0.7 or 0.6 inconsistency
30
+ # Prompt said > 0.7 but code example used 0.7. Let's stick to prompt code example logic if possible but use safe checks.
31
+ dominant_groups = [g for g, count in wilaya_data['groups'].items()
32
+ if g in ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
33
+ and count / (sum(wilaya_data['groups'].values()) or 1) > 0.3]
34
+ if dominant_groups:
35
+ comments.append(f"الولاية تعتمد بشكل كبير على الأنشطة المرتبطة بالموارد العمومية ({', '.join(dominant_groups)})")
36
+
37
+ # S2 - Concentration
38
+ if s2 > 0.7:
39
+ if wilaya_data['groups']:
40
+ top_group = max(wilaya_data['groups'].items(), key=lambda x: x[1])[0]
41
+ pct = (wilaya_data['groups'][top_group] / (sum(wilaya_data['groups'].values()) or 1)) * 100
42
+ comments.append(f"تركيز عالٍ جدا في مجموعة نشاط واحدة ({top_group}: {pct:.0f}%)")
43
+ elif s2 > 0.5:
44
+ comments.append("تركيز ملحوظ في عدد محدود من القطاعات")
45
+
46
+ # S3 - Gouvernance
47
+ if s3 > 0.5: # Prompt threshold was 0.6 in general description but 0.5 in code example for flag.
48
+ total_types = sum(wilaya_data['types'].values()) or 1
49
+ local_pct = (wilaya_data['types'].get('محلية', 0) / total_types) * 100
50
+ regional_pct = (wilaya_data['types'].get('جهوية', 0) / total_types) * 100
51
+ comments.append(f"اختلال واضح في الحوكمة: {local_pct:.0f}% محلية مقابل {regional_pct:.0f}% جهوية")
52
+
53
+ # Recommendations
54
+ recommendations = []
55
+ if s1 > 0.6:
56
+ recommendations.append("التحقق من الأراضي الدولية المُسندة (OTD)")
57
+ recommendations.append("البحث في صفقات التطهير والبيئة (TUNEPS)")
58
+ if s2 > 0.7:
59
+ recommendations.append("تحليل الاحتكارات القطاعية المحتملة")
60
+ if s3 > 0.5:
61
+ recommendations.append("مراجعة التوازن بين المحلي والجهوي في تركيبة مجالس الإدارة")
62
+ if index > 70:
63
+ recommendations.append("يُنصح بتحقيق صحفي معمق على هذه الولاية")
64
+
65
+ return {
66
+ "level": level,
67
+ "level_ar": level_ar,
68
+ "color": color,
69
+ "comment_ar": " · ".join(comments) if comments else "لا توجد إشارات خطر واضحة في البيانات الحالية",
70
+ "recommendations": recommendations
71
+ }
72
+
73
+ def compute_baath_index_v2(wilaya_df):
74
+ """
75
+ Computes Ba7ath Index (0-100) using continuous formula:
76
+ INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
77
+
78
+ s1: Dependency on public-resource sectors (AGRI, ENV, MINES)
79
+ s2: Sector concentration (Max share of any group)
80
+ s3: Governance imbalance (abs(local - regional))
81
+ """
82
+ if wilaya_df.empty:
83
+ return 0.0, 0.0, 0.0, 0.0, []
84
+
85
+ total = len(wilaya_df)
86
+ flags = []
87
+
88
+ # --- s1: Resource Dependency ---
89
+ # Groups: AGRI_NATUREL, ENVIRONNEMENT, ENERGIE_MINES
90
+ resource_groups = ['AGRI_NATUREL', 'ENVIRONNEMENT', 'ENERGIE_MINES']
91
+ resource_count = wilaya_df[wilaya_df['activity_group'].isin(resource_groups)].shape[0]
92
+ s1 = resource_count / total if total > 0 else 0.0
93
+
94
+ if s1 > 0.6:
95
+ flags.append(Flag(code="RESOURCE_DEPENDENT", severity="high", label_ar="اعتماد كبير على الأنشطة المرتبطة بالموارد العمومية"))
96
+
97
+ # --- s2: Sector Concentration ---
98
+ # Max share of any single group
99
+ group_counts = wilaya_df['activity_group'].value_counts(normalize=True)
100
+ s2 = group_counts.max() if not group_counts.empty else 0.0
101
+
102
+ if s2 > 0.7:
103
+ flags.append(Flag(code="ULTRA_CONCENTRATION", severity="medium", label_ar="تركيز عالٍ في مجموعة نشاط واحدة"))
104
+
105
+ # --- s3: Governance Imbalance ---
106
+ # abs(% local - % regional)
107
+ type_counts = wilaya_df['type'].value_counts(normalize=True)
108
+ pct_local = type_counts.get('محلية', 0.0)
109
+ pct_regional = type_counts.get('جهوية', 0.0)
110
+ s3 = abs(pct_local - pct_regional)
111
+
112
+ if s3 > 0.5:
113
+ flags.append(Flag(code="GOVERNANCE_IMBALANCE", severity="low", label_ar="اختلال واضح بين الشركات المحلية والجهوية"))
114
+
115
+ # --- Final Score ---
116
+ # INDEX = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
117
+ raw_index = 100 * (0.4 * s1 + 0.4 * s2 + 0.2 * s3)
118
+ baath_index = round(min(raw_index, 100), 1)
119
+
120
+ # Return details for commentary
121
+ details = {
122
+ 'groups': wilaya_df['activity_group'].value_counts().to_dict(),
123
+ 'types': wilaya_df['type'].value_counts().to_dict()
124
+ }
125
+
126
+ return baath_index, round(s1, 2), round(s2, 2), round(s3, 2), flags, details
127
+
128
+ def get_risk_for_wilaya(wilaya: str):
129
+ df = get_companies_df()
130
+ if df.empty:
131
+ return None
132
+
133
+ wilaya_df = df[df['wilaya'] == wilaya]
134
+ if wilaya_df.empty:
135
+ # Return neutral risk if no companies
136
+ return WilayaRisk(
137
+ wilaya=wilaya, baath_index=0, s1=0, s2=0, s3=0, flags=[],
138
+ level="LOW", level_ar="منخفض", color="emerald",
139
+ comment_ar="لا توجد بيانات كافية", recommendations=[]
140
+ )
141
+
142
+ score, s1, s2, s3, flags, details = compute_baath_index_v2(wilaya_df)
143
+
144
+ # Generate commentary
145
+ editorial = generate_risk_commentary(details, {
146
+ 's1': s1, 's2': s2, 's3': s3, 'baath_index': score
147
+ })
148
+
149
+ return WilayaRisk(
150
+ wilaya=wilaya,
151
+ baath_index=score,
152
+ s1=s1,
153
+ s2=s2,
154
+ s3=s3,
155
+ flags=flags,
156
+ **editorial
157
+ )
158
+
159
+ def get_all_risks():
160
+ df = get_companies_df()
161
+ if df.empty:
162
+ return []
163
+
164
+ risks = []
165
+ for wilaya in df['wilaya'].unique():
166
+ risks.append(get_risk_for_wilaya(wilaya))
167
+
168
+ return sorted(risks, key=lambda x: x.baath_index, reverse=True)
backend/compare_by_name_fuzzy.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+ from rapidfuzz import process, fuzz
4
+
5
+ # ------------- CONFIG À ADAPTER --------------
6
+
7
+ # CSV A : ta "liste politique / terrain"
8
+ CSV_A = Path("liste_270.csv") # ex : Google Sheet complet
9
+
10
+ # CSV B : la liste des stés qui ont un RNE (ex. Trovit / base enrichie)
11
+ CSV_B = Path("liste_rne_ou_trovit.csv")
12
+
13
+ # Nom des colonnes contenant les NOMS à comparer
14
+ # A peut être en arabe, B en français, ou l'inverse.
15
+ # Idéalement, tu rajoutes dans chaque CSV une colonne 'name_canon'
16
+ # (normalisée/ traduite avec Qwen) et tu mets ces noms ici.
17
+ COL_NAME_A = "name" # ex : "Nom société (FR)" ou "الاسم"
18
+ COL_NAME_B = "name" # ex : nom Trovit en arabe
19
+
20
+ # (Optionnel) colonnes de contexte à garder pour l'analyse
21
+ CTX_COLS_A = ["wilaya", "delegation"] # adapte à ton fichier
22
+ CTX_COLS_B = ["wilaya", "delegation"] # idem
23
+
24
+ # Seuils fuzzy
25
+ # score >= HIGH_MATCH -> match sûr
26
+ # LOW_MATCH <= score < HIGH_MATCH -> match douteux (à vérifier à la main / par LLM)
27
+ # score < LOW_MATCH -> considéré comme "non trouvé"
28
+ HIGH_MATCH = 90
29
+ LOW_MATCH = 70
30
+
31
+ # Fichiers de sortie
32
+ OUT_MATCHES = Path("matches_surs.csv")
33
+ OUT_MAYBE = Path("matches_douteux.csv")
34
+ OUT_MISSING = Path("non_trouves_par_nom.csv")
35
+
36
+ # Encodage (UTF‑8 avec BOM fonctionne bien pour arabe + Excel)
37
+ ENC_A = "utf-8-sig"
38
+ ENC_B = "utf-8-sig"
39
+
40
+ # ------------- FONCTIONS ---------------------
41
+
42
+
43
+ def normalize_name(s: str) -> str:
44
+ """Nettoyage léger pour comparer les noms."""
45
+ if pd.isna(s):
46
+ return ""
47
+ s = str(s).strip()
48
+
49
+ # mettre en minuscules pour la partie latine
50
+ s = s.lower()
51
+
52
+ # enlever quelques termes génériques FR/AR
53
+ generic_fr = [
54
+ "societe", "société", "ste", "sa", "sarl",
55
+ "société anonyme", "société à responsabilité limitée",
56
+ ]
57
+ generic_ar = [
58
+ "شركة", "الشركة", "الاهلية", "الأهلية", "الجهوية",
59
+ "المحلية", "شركة أهلية", "شركة الاهلية", "شركة الأهلية",
60
+ ]
61
+ for g in generic_fr + generic_ar:
62
+ s = s.replace(g, "")
63
+
64
+ # normaliser les espaces
65
+ s = " ".join(s.split())
66
+ return s
67
+
68
+
69
+ def load_csv(path: Path, name_col: str, ctx_cols: list, enc: str) -> pd.DataFrame:
70
+ if not path.exists():
71
+ raise FileNotFoundError(path.resolve())
72
+ df = pd.read_csv(path, encoding=enc)
73
+ if name_col not in df.columns:
74
+ raise KeyError(f"Colonne '{name_col}' absente dans {path.name}.\n"
75
+ f"Colonnes dispo : {list(df.columns)}")
76
+ df["__name_raw__"] = df[name_col]
77
+ df["__name_norm__"] = df[name_col].apply(normalize_name)
78
+
79
+ # garder nom + colonnes utiles pour l'analyse
80
+ keep_cols = ["__name_raw__", "__name_norm__"]
81
+ for c in ctx_cols:
82
+ if c in df.columns:
83
+ keep_cols.append(c)
84
+ return df[keep_cols].copy()
85
+
86
+
87
+ def main():
88
+ # 1. Charger les deux CSV
89
+ df_a = load_csv(CSV_A, COL_NAME_A, CTX_COLS_A, ENC_A)
90
+ df_b = load_csv(CSV_B, COL_NAME_B, CTX_COLS_B, ENC_B)
91
+
92
+ print(f"[INFO] Lignes fichier A : {len(df_a)}")
93
+ print(f"[INFO] Lignes fichier B : {len(df_b)}")
94
+
95
+ # 2. Préparer une série de noms B pour RapidFuzz
96
+ names_b = df_b["__name_norm__"].tolist()
97
+
98
+ best_matches = []
99
+ for idx, row in df_a.iterrows():
100
+ name_a_norm = row["__name_norm__"]
101
+
102
+ if not name_a_norm:
103
+ best_matches.append({"score": 0, "b_index": None})
104
+ continue
105
+
106
+ # RapidFuzz: extractOne(label, choices, scorer=...)
107
+ match = process.extractOne(
108
+ name_a_norm,
109
+ names_b,
110
+ scorer=fuzz.token_sort_ratio,
111
+ )
112
+ if match is None:
113
+ best_matches.append({"score": 0, "b_index": None})
114
+ else:
115
+ label_b, score, b_idx = match
116
+ best_matches.append({"score": score, "b_index": b_idx})
117
+
118
+ # 3. Construire un DataFrame résultat
119
+ res = df_a.copy()
120
+ res["match_score"] = [m["score"] for m in best_matches]
121
+ res["b_index"] = [m["b_index"] for m in best_matches]
122
+
123
+ # joindre les infos du fichier B
124
+ res["name_b_raw"] = res["b_index"].apply(
125
+ lambda i: df_b.loc[i, "__name_raw__"] if pd.notna(i) else None
126
+ )
127
+ res["name_b_norm"] = res["b_index"].apply(
128
+ lambda i: df_b.loc[i, "__name_norm__"] if pd.notna(i) else None
129
+ )
130
+
131
+ # Ajout du contexte B (wilaya, delegation, etc.)
132
+ for c in CTX_COLS_B:
133
+ if c in df_b.columns:
134
+ col_b = f"{c}_b"
135
+ res[col_b] = res["b_index"].apply(
136
+ lambda i: df_b.loc[i, c] if pd.notna(i) else None
137
+ )
138
+
139
+ # 4. Séparer en 3 catégories
140
+ matches_surs = res[res["match_score"] >= HIGH_MATCH].copy()
141
+ matches_douteux = res[
142
+ (res["match_score"] >= LOW_MATCH) & (res["match_score"] < HIGH_MATCH)
143
+ ].copy()
144
+ non_trouves = res[res["match_score"] < LOW_MATCH].copy()
145
+
146
+ print(f"[INFO] Matchs sûrs (score >= {HIGH_MATCH}) : {len(matches_surs)}")
147
+ print(f"[INFO] Matchs douteux ({LOW_MATCH} <= score < {HIGH_MATCH}) : {len(matches_douteux)}")
148
+ print(f"[INFO] Non trouvés (score < {LOW_MATCH}) : {len(non_trouves)}")
149
+
150
+ # 5. Export CSV
151
+ matches_surs.to_csv(OUT_MATCHES, index=False, encoding="utf-8-sig")
152
+ matches_douteux.to_csv(OUT_MAYBE, index=False, encoding="utf-8-sig")
153
+ non_trouves.to_csv(OUT_MISSING, index=False, encoding="utf-8-sig")
154
+
155
+ print("[OK] Export :")
156
+ print(" ", OUT_MATCHES.resolve())
157
+ print(" ", OUT_MAYBE.resolve())
158
+ print(" ", OUT_MISSING.resolve())
159
+
160
+
161
+ if __name__ == "__main__":
162
+ main()
backend/compare_data.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compare_data.py
2
+ import sqlite3
3
+ import pandas as pd
4
+ from pathlib import Path
5
+
6
+ # ----------------- CONFIG -----------------
7
+
8
+ # Base SQLite des 141 sociétés enrichies
9
+ DB_PATH = Path("ba7ath_enriched.db")
10
+
11
+ # CSV complet des ~270 sociétés Trovit
12
+ CSV_PATH = Path("trovit_charikat_ahliya_all.csv")
13
+
14
+ # Table + colonne JSON dans SQLite
15
+ ENRICHED_TABLE = "enriched_companies"
16
+ DATA_COLUMN = "data"
17
+
18
+ # ----------------- CODE -----------------
19
+
20
+
21
+ def main():
22
+ # 1. Charger les 270 sociétés depuis le CSV
23
+ if not CSV_PATH.exists():
24
+ raise FileNotFoundError(f"CSV introuvable : {CSV_PATH.resolve()}")
25
+
26
+ df_270 = pd.read_csv(CSV_PATH)
27
+ print(f"[INFO] Sociétés dans le CSV Trovit : {len(df_270)}")
28
+
29
+ if "tax_id" not in df_270.columns:
30
+ raise KeyError(
31
+ "La colonne 'tax_id' est absente du CSV. "
32
+ "Vérifie l'en-tête de trovit_charikat_ahliya_all.csv."
33
+ )
34
+
35
+ # 2. Ouvrir la base SQLite
36
+ if not DB_PATH.exists():
37
+ raise FileNotFoundError(f"Base SQLite introuvable : {DB_PATH.resolve()}")
38
+
39
+ conn = sqlite3.connect(DB_PATH)
40
+ cur = conn.cursor()
41
+
42
+ # 3. Vérifier que la table existe bien
43
+ cur.execute(
44
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
45
+ (ENRICHED_TABLE,),
46
+ )
47
+ row = cur.fetchone()
48
+ if row is None:
49
+ tables = [
50
+ r[0]
51
+ for r in cur.execute(
52
+ "SELECT name FROM sqlite_master WHERE type='table'"
53
+ ).fetchall()
54
+ ]
55
+ conn.close()
56
+ raise RuntimeError(
57
+ f"La table '{ENRICHED_TABLE}' n'existe pas dans la base.\n"
58
+ f"Tables disponibles : {tables}"
59
+ )
60
+
61
+ # 4. Extraire les tax_id déjà présents dans data.rne
62
+ query = f"""
63
+ SELECT DISTINCT
64
+ json_extract({DATA_COLUMN}, '$.rne.tax_id') AS tax_id
65
+ FROM {ENRICHED_TABLE}
66
+ WHERE json_extract({DATA_COLUMN}, '$.rne.tax_id') IS NOT NULL
67
+ """
68
+ df_rne = pd.read_sql(query, conn)
69
+ conn.close()
70
+
71
+ print(f"[INFO] Sociétés avec tax_id dans la base : {len(df_rne)}")
72
+
73
+ # 5. Comparer par tax_id (270 vs 141)
74
+ merged = df_270.merge(df_rne, on="tax_id", how="left", indicator=True)
75
+
76
+ # 6. Garder celles absentes de la base
77
+ missing = merged[merged["_merge"] == "left_only"].drop(columns=["_merge"])
78
+ print(
79
+ "[INFO] Sociétés présentes dans le CSV mais absentes de la base :",
80
+ len(missing),
81
+ )
82
+
83
+ # 7. Sauvegarder le résultat
84
+ out_path = Path("trovit_missing_not_in_rne.csv")
85
+ missing.to_csv(out_path, index=False, encoding="utf-8-sig")
86
+ print(f"[OK] Fichier généré : {out_path.resolve()}")
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
backend/compare_names_with_qwen.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # compare_names_with_qwen.py
2
+ import csv
3
+ import json
4
+ import time
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # ---------------- CONFIG ----------------
15
+
16
+ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434/api/chat")
17
+ MODEL_NAME = os.getenv("MODEL_NAME", "qwen2.5:latest")
18
+
19
+ CSV_AR = Path(os.getenv("PATH_AHLYA_CSV", "Ahlya_Total_Feuil1.csv"))
20
+ CSV_FR = Path(os.getenv("PATH_RNE_CSV", "trovit_charikat_ahliya_all.csv"))
21
+
22
+ OUT_MATCHES = Path("matches_qwen.csv")
23
+ OUT_NOT_IN_TROVIT = Path("not_in_trovit_qwen.csv")
24
+
25
+ SLEEP_SECONDS = 0.05 # petite pause entre appels
26
+
27
+ # ----------------------------------------
28
+
29
+
30
+ def load_names_ar(path: Path):
31
+ """Charge la 1re colonne (noms en arabe)."""
32
+ if not path.exists():
33
+ raise FileNotFoundError(path.resolve())
34
+ rows = []
35
+ with path.open("r", encoding="utf-8-sig", newline="") as f:
36
+ reader = csv.reader(f)
37
+ header = next(reader, None)
38
+ for line in reader:
39
+ if not line:
40
+ continue
41
+ name_ar = (line[0] or "").strip()
42
+ if not name_ar:
43
+ continue
44
+ rows.append({"name_ar": name_ar})
45
+ print(f"[INFO] Noms AR chargés : {len(rows)}")
46
+ return rows
47
+
48
+
49
+ def load_names_fr(path: Path):
50
+ """Charge la 3e colonne (noms en français)."""
51
+ if not path.exists():
52
+ raise FileNotFoundError(path.resolve())
53
+ names_fr = []
54
+ with path.open("r", encoding="utf-8-sig", newline="") as f:
55
+ reader = csv.reader(f)
56
+ header = next(reader, None)
57
+ for line in reader:
58
+ if len(line) < 3:
59
+ continue
60
+ name_fr = (line[2] or "").strip()
61
+ if not name_fr:
62
+ continue
63
+ names_fr.append(name_fr)
64
+ print(f"[INFO] Noms FR chargés (Trovit) : {len(names_fr)}")
65
+ return names_fr
66
+
67
+
68
+ def build_fr_list_for_prompt(names_fr):
69
+ """Construit une liste numérotée lisible pour le prompt."""
70
+ lines = []
71
+ for i, name in enumerate(names_fr, start=1):
72
+ lines.append(f"{i}. {name}")
73
+ return "\n".join(lines)
74
+
75
+
76
+ def ask_qwen_match(name_ar, fr_list_text):
77
+ """Demande à Qwen si le nom AR correspond à un/plusieurs noms FR."""
78
+ system_prompt = (
79
+ "Tu es un assistant qui fait du rapprochement de noms de sociétés "
80
+ "entre l'arabe et le français.\n"
81
+ "Règles :\n"
82
+ "- Tu dois dire si le nom arabe désigne la même société qu'un ou plusieurs "
83
+ "noms français dans la liste.\n"
84
+ "- Prends en compte le sens, pas la traduction littérale exacte.\n"
85
+ "- Si tu n'es PAS sûr, considère qu'il n'y a PAS de correspondance.\n"
86
+ "- Réponds STRICTEMENT en JSON valide, sans texte autour.\n"
87
+ ' Format : {"match": true/false, "indexes": [liste_entiers], "reason": "texte court"}.\n'
88
+ "- Les indexes commencent à 1 et correspondent à la numérotation de la liste française."
89
+ )
90
+
91
+ user_prompt = (
92
+ "Nom de la société en arabe :\n"
93
+ f"{name_ar}\n\n"
94
+ "Liste des noms de sociétés en français :\n"
95
+ f"{fr_list_text}\n\n"
96
+ "Question :\n"
97
+ "- Le nom arabe correspond-il à une ou plusieurs sociétés françaises dans cette liste ?\n"
98
+ "- Si oui, donne les indexes exacts dans le champ \"indexes\".\n"
99
+ "- Si non, renvoie match=false et indexes=[]."
100
+ )
101
+
102
+ payload = {
103
+ "model": MODEL_NAME,
104
+ "messages": [
105
+ {"role": "system", "content": system_prompt},
106
+ {"role": "user", "content": user_prompt},
107
+ ],
108
+ "stream": False,
109
+ }
110
+
111
+ resp = requests.post(OLLAMA_URL, json=payload, timeout=300)
112
+ resp.raise_for_status()
113
+ data = resp.json()
114
+ content = data.get("message", {}).get("content", "").strip()
115
+ if not content and "response" in data:
116
+ content = data["response"].strip()
117
+
118
+ try:
119
+ result = json.loads(content)
120
+ except json.JSONDecodeError:
121
+ raise ValueError(f"Réponse non JSON de Qwen : {content}")
122
+
123
+ match = bool(result.get("match", False))
124
+ indexes = result.get("indexes", []) or []
125
+ if not isinstance(indexes, list):
126
+ indexes = []
127
+ reason = str(result.get("reason", "")).strip()
128
+
129
+ return match, indexes, reason
130
+
131
+
132
+ def main():
133
+ rows_ar = load_names_ar(CSV_AR)
134
+ names_fr = load_names_fr(CSV_FR)
135
+ fr_list_text = build_fr_list_for_prompt(names_fr)
136
+
137
+ matches = []
138
+ not_found = []
139
+
140
+ for i, row in enumerate(rows_ar, start=1):
141
+ name_ar = row["name_ar"]
142
+ print(f"[{i}/{len(rows_ar)}] Qwen compare : {name_ar}")
143
+
144
+ try:
145
+ match, indexes, reason = ask_qwen_match(name_ar, fr_list_text)
146
+ except Exception as e:
147
+ print(f" [ERREUR] {e}")
148
+ match, indexes, reason = False, [], f"error: {e}"
149
+
150
+ if match and indexes:
151
+ matched_names = [names_fr[idx - 1] for idx in indexes if 1 <= idx <= len(names_fr)]
152
+ matches.append({
153
+ "name_ar": name_ar,
154
+ "matched_indexes": ";".join(str(x) for x in indexes),
155
+ "matched_names_fr": " | ".join(matched_names),
156
+ "reason": reason,
157
+ })
158
+ else:
159
+ not_found.append({
160
+ "name_ar": name_ar,
161
+ "reason": reason,
162
+ })
163
+
164
+ time.sleep(SLEEP_SECONDS)
165
+
166
+ # Écriture des résultats
167
+ with OUT_MATCHES.open("w", encoding="utf-8-sig", newline="") as f:
168
+ fieldnames = ["name_ar", "matched_indexes", "matched_names_fr", "reason"]
169
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
170
+ writer.writeheader()
171
+ writer.writerows(matches)
172
+
173
+ with OUT_NOT_IN_TROVIT.open("w", encoding="utf-8-sig", newline="") as f:
174
+ fieldnames = ["name_ar", "reason"]
175
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
176
+ writer.writeheader()
177
+ writer.writerows(not_found)
178
+
179
+ print(f"[OK] Matchs écrits dans : {OUT_MATCHES.resolve()}")
180
+ print(f"[OK] Non présents (selon Qwen) : {OUT_NOT_IN_TROVIT.resolve()}")
181
+ print(f"[INFO] Total matchs : {len(matches)}, non trouvés : {len(not_found)}")
182
+
183
+
184
+ if __name__ == "__main__":
185
+ main()
backend/create_admin.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy.orm import Session
2
+ from app.database import SessionLocal, engine, Base
3
+ from app.models.user_models import User
4
+ from app.services.auth_service import get_password_hash
5
+ import sys
6
+
7
+ # Ensure tables exist
8
+ Base.metadata.create_all(bind=engine)
9
+
10
+ def create_admin_user(email, password, full_name):
11
+ db: Session = SessionLocal()
12
+ try:
13
+ user = db.query(User).filter(User.email == email).first()
14
+ if user:
15
+ print(f"User {email} already exists.")
16
+ return
17
+
18
+ hashed_password = get_password_hash(password)
19
+ new_user = User(
20
+ email=email,
21
+ hashed_password=hashed_password,
22
+ full_name=full_name,
23
+ is_active=True,
24
+ is_admin=True
25
+ )
26
+ db.add(new_user)
27
+ db.commit()
28
+ db.refresh(new_user)
29
+ print(f"Admin user {email} created successfully.")
30
+ except Exception as e:
31
+ print(f"Error creating user: {e}")
32
+ finally:
33
+ db.close()
34
+
35
+ if __name__ == "__main__":
36
+ if len(sys.argv) < 3:
37
+ print("Usage: python create_admin.py <email> <password> [full_name]")
38
+ sys.exit(1)
39
+
40
+ email = sys.argv[1]
41
+ password = sys.argv[2]
42
+ full_name = sys.argv[3] if len(sys.argv) > 3 else "Admin User"
43
+
44
+ create_admin_user(email, password, full_name)
backend/enrich_not_in_trovit.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # enrich_not_in_trovit.py
2
+ import pandas as pd
3
+ from pathlib import Path
4
+
5
+ # Fichiers d'entrée
6
+ CSV_NOT_IN = Path("not_in_trovit_qwen.csv")
7
+ CSV_AHLYA = Path("Ahlya_Total_Feuil1.csv")
8
+
9
+ # Fichier de sortie
10
+ CSV_OUT = Path("not_in_trovit_enriched.csv")
11
+
12
+ def main():
13
+ if not CSV_NOT_IN.exists():
14
+ raise FileNotFoundError(CSV_NOT_IN.resolve())
15
+ if not CSV_AHLYA.exists():
16
+ raise FileNotFoundError(CSV_AHLYA.resolve())
17
+
18
+ # 1. Charger les fichiers
19
+ df_not = pd.read_csv(CSV_NOT_IN, encoding="utf-8-sig")
20
+ df_ah = pd.read_csv(CSV_AHLYA, encoding="utf-8-sig")
21
+
22
+ # 2. Vérifier les colonnes attendues
23
+ if "name_ar" not in df_not.columns:
24
+ raise KeyError(f"'name_ar' manquant dans {CSV_NOT_IN.name} ; colonnes = {list(df_not.columns)}")
25
+
26
+ col_nom_ahlya = "اسم_الشركة"
27
+ if col_nom_ahlya not in df_ah.columns:
28
+ raise KeyError(f"'{col_nom_ahlya}' manquant dans {CSV_AHLYA.name} ; colonnes = {list(df_ah.columns)}")
29
+
30
+ # 3. Normalisation légère des noms des deux côtés
31
+ def norm(s):
32
+ if pd.isna(s):
33
+ return ""
34
+ return str(s).strip()
35
+
36
+ df_not["__key__"] = df_not["name_ar"].apply(norm)
37
+ df_ah["__key__"] = df_ah[col_nom_ahlya].apply(norm)
38
+
39
+ # 4. Colonnes à ramener depuis Ahlya
40
+ cols_details = [
41
+ col_nom_ahlya,
42
+ "الموضوع / النشاط",
43
+ "العنوان",
44
+ "الولاية",
45
+ "المعتمدية",
46
+ "المنطقة",
47
+ "النوع",
48
+ ]
49
+
50
+ # On garde seulement les colonnes utiles + clé
51
+ keep_ah = [c for c in cols_details if c in df_ah.columns] + ["__key__"]
52
+ df_ah_small = df_ah[keep_ah].drop_duplicates("__key__")
53
+
54
+ # 5. Merge left : toutes les lignes de not_in, détails pris dans Ahlya
55
+ df_merged = df_not.merge(
56
+ df_ah_small,
57
+ on="__key__",
58
+ how="left",
59
+ suffixes=("", "_ahlya"),
60
+ )
61
+
62
+ # 6. Nettoyage : on peut retirer la clé technique si tu veux
63
+ df_merged.drop(columns=["__key__"], inplace=True)
64
+
65
+ # 7. Sauvegarde
66
+ df_merged.to_csv(CSV_OUT, index=False, encoding="utf-8-sig")
67
+ print(f"[OK] Fichier enrichi écrit dans : {CSV_OUT.resolve()}")
68
+ print(f"Lignes : {len(df_merged)}")
69
+
70
+ if __name__ == "__main__":
71
+ main()
backend/inspect_db.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # inspect_db.py
2
+ import sqlite3
3
+ from pathlib import Path
4
+
5
+ # Essaie d'abord avec ce nom, puis adapte (microsite.db, database.sqlite, instance/app.db, etc.)
6
+ DB_PATH = Path("ba7ath_enriched.db")
7
+
8
+ def main():
9
+ print("=== Inspection de la base SQLite ===")
10
+ print("Chemin supposé :", DB_PATH.resolve())
11
+
12
+ if not DB_PATH.exists():
13
+ print("[ERREUR] Fichier introuvable :", DB_PATH.resolve())
14
+ return
15
+
16
+ print("Taille fichier (octets) :", DB_PATH.stat().st_size)
17
+
18
+ conn = sqlite3.connect(DB_PATH)
19
+
20
+ print("\n=== Bases attachées ===")
21
+ for row in conn.execute("PRAGMA database_list;"):
22
+ # schema, name, file
23
+ print(row)
24
+
25
+ print("\n=== Tables SQLite ===")
26
+ tables = [
27
+ r[0]
28
+ for r in conn.execute(
29
+ "SELECT name FROM sqlite_master WHERE type='table'"
30
+ ).fetchall()
31
+ ]
32
+ if not tables:
33
+ print("(aucune table utilisateur)")
34
+ for name in tables:
35
+ print("-", name)
36
+
37
+ print("\n=== Structure des tables ===")
38
+ for name in tables:
39
+ print(f"\nTable: {name}")
40
+ for col in conn.execute(f"PRAGMA table_info({name})"):
41
+ print(" ", col)
42
+
43
+ conn.close()
44
+
45
+ if __name__ == "__main__":
46
+ main()
backend/readme.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ba7ath OSINT API
3
+ emoji: 🛡️
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+
11
+ # Ba7ath OSINT API
12
+ Backend pour l'investigation et l'analyse de risque.
backend/test_auth_flow.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import sys
3
+
4
+ BASE_URL = "http://localhost:8000/api/v1"
5
+ EMAIL = "admin@ba7ath.com"
6
+ PASSWORD = "admin123"
7
+
8
+ def test_auth():
9
+ print(f"Testing auth on {BASE_URL}...")
10
+
11
+ # 1. Login
12
+ print("\n1. Logging in...")
13
+ try:
14
+ response = requests.post(f"{BASE_URL}/auth/login", data={"username": EMAIL, "password": PASSWORD})
15
+ if response.status_code != 200:
16
+ print(f"Login failed: {response.status_code} - {response.text}")
17
+ return
18
+
19
+ token_data = response.json()
20
+ access_token = token_data.get("access_token")
21
+ print(f"Login successful! Token: {access_token[:20]}...")
22
+ except Exception as e:
23
+ print(f"Login failed: {e}")
24
+ return
25
+
26
+ # 2. Access protected endpoint (Auth Me)
27
+ print("\n2. Accessing /auth/me (Protected)...")
28
+ headers = {"Authorization": f"Bearer {access_token}"}
29
+ response = requests.get(f"{BASE_URL}/auth/me", headers=headers)
30
+ if response.status_code == 200:
31
+ print(f"Success! User: {response.json().get('email')}")
32
+ else:
33
+ print(f"Failed: {response.status_code} - {response.text}")
34
+
35
+ # 3. Access protected endpoint (Stats)
36
+ print("\n3. Accessing /stats/national (Protected)...")
37
+ response = requests.get(f"{BASE_URL}/stats/national", headers=headers)
38
+ if response.status_code == 200:
39
+ print("Success! Stats retrieved.")
40
+ else:
41
+ print(f"Failed: {response.status_code} - {response.text}")
42
+
43
+ # 4. Access without token (Expected Failure)
44
+ print("\n4. Accessing /stats/national WITHOUT token...")
45
+ response = requests.get(f"{BASE_URL}/stats/national")
46
+ if response.status_code == 401:
47
+ print("Success! Request rejected as expected (401 Unauthorized).")
48
+ else:
49
+ print(f"Failed! Expected 401, got {response.status_code}")
50
+
51
+ if __name__ == "__main__":
52
+ test_auth()
docs/API_Reference.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📖 API Reference
2
+
3
+ Tous les endpoints sont préfixés par `/api/v1`.
4
+ **Base URL Production**: `https://ahlya-production.up.railway.app/api/v1`
5
+
6
+ ## 🔐 Authentification
7
+ La plupart des routes nécessitent un token JWT valide.
8
+
9
+ | Header | Valeur |
10
+ | :--- | :--- |
11
+ | `Authorization` | `Bearer <access_token>` |
12
+
13
+ ---
14
+
15
+ ## 🔑 Auth Endpoints
16
+
17
+ ### Login
18
+ `POST /auth/login`
19
+
20
+ Authentification via formulaire standard OAuth2.
21
+
22
+ - **Request Body** (`application/x-www-form-urlencoded`):
23
+ - `username`: Email de l'utilisateur.
24
+ - `password`: Mot de passe.
25
+ - **Success (200)**:
26
+ ```json
27
+ {
28
+ "access_token": "eyJhbG...",
29
+ "token_type": "bearer"
30
+ }
31
+ ```
32
+
33
+ ---
34
+
35
+ ## 📊 Statistiques & Risques
36
+
37
+ ### Statistiques Nationales
38
+ `GET /stats/national` (PROTÉGÉ)
39
+
40
+ Retourne les métriques agrégées pour l'ensemble du pays.
41
+
42
+ - **Exemple de réponse**:
43
+ ```json
44
+ {
45
+ "total_companies": 31000,
46
+ "top_wilayas": ["Tunis", "Sousse", "Sfax"],
47
+ "risk_index": 4.2
48
+ }
49
+ ```
50
+
51
+ ### Risques par Wilaya
52
+ `GET /risk/wilayas` (PROTÉGÉ)
53
+
54
+ Liste les scores de risque pour toutes les wilayas.
55
+
56
+ ---
57
+
58
+ ## 📂 Enrichment (Core Data)
59
+
60
+ ### Liste des sociétés enrichies
61
+ `GET /enrichment/list` (PROTÉGÉ)
62
+
63
+ - **Paramètres**:
64
+ - `page` (int): Par défaut 1.
65
+ - `per_page` (int): Par défaut 12.
66
+ - `search` (str): Recherche par nom.
67
+ - `wilaya` (str): Filtre par wilaya.
68
+ - `has_red_flags` (bool): Filtre les cas critiques.
69
+
70
+ - **Response**:
71
+ ```json
72
+ {
73
+ "companies": [...],
74
+ "total": 150,
75
+ "total_pages": 13
76
+ }
77
+ ```
78
+
79
+ ### Profil complet
80
+ `GET /enrichment/profile/{company_id}` (PROTÉGÉ)
81
+
82
+ Retourne l'intégralité des données (RNE, JORT, Marchés) et les Red Flags calculés.
83
+
84
+ ---
85
+
86
+ ## 🛠️ User Management (Admin Only)
87
+
88
+ ### Liste des utilisateurs
89
+ `GET /auth/users` (PROTECTED ADMIN)
90
+
91
+ Retourne la liste des utilisateurs du système.
92
+
93
+ ### Création d'utilisateur
94
+ `POST /auth/users` (PROTECTED ADMIN)
95
+ - **Body**: `{ "email": "...", "password": "...", "is_admin": true }`
96
+
97
+ ---
98
+
99
+ ## 📝 Exemple Curl
100
+ ```bash
101
+ curl -X GET "https://ahlya-production.up.railway.app/api/v1/enrichment/list" \
102
+ -H "Authorization: Bearer <votre_token>"
103
+ ```
docs/Authentication_Guide.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🔐 Authentication Guide
2
+
3
+ Le système utilise une authentification basée sur les **JSON Web Tokens (JWT)** pour sécuriser les données sensibles d'investigation.
4
+
5
+ ## 🔄 Flux d'Authentification
6
+
7
+ ```mermaid
8
+ sequenceDiagram
9
+ participant User as Utilisateur
10
+ participant FE as Frontend (React)
11
+ participant BE as Backend (FastAPI)
12
+ participant DB as SQLite
13
+
14
+ User->>FE: Saisie Email/Password
15
+ FE->>BE: POST /api/v1/auth/login
16
+ BE->>DB: Vérifier User / Argon2 Hash
17
+ DB-->>BE: User Valide
18
+ BE-->>FE: Retourne JWT Access Token
19
+ FE->>FE: Stockage dans localStorage
20
+ FE->>BE: GET /api/v1/enriched (Header Bearer)
21
+ BE->>BE: Validation Signature JWT
22
+ BE-->>FE: Retourne Données
23
+ ```
24
+
25
+ ## 🛠️ Configuration Backend
26
+ Le secret et l'algorithme sont définis dans les variables d'environnement.
27
+
28
+ - **Variables Clés**:
29
+ - `SECRET_KEY`: Utilisée pour signer les tokens (indispensable en prod).
30
+ - `ALGORITHM`: Généralement `HS256`.
31
+ - `ACCESS_TOKEN_EXPIRE_MINUTES`: Durée de validité.
32
+
33
+ ## 💻 Implémentation Frontend (`AuthContext`)
34
+ La gestion de l'état `user` et `token` est centralisée dans `src/context/AuthContext.jsx`.
35
+
36
+ ### Usage dans les services :
37
+ Pour appeler une API protégée, utilisez le helper `authenticatedFetch` dans `src/services/api.js` qui injecte le header `Authorization`.
38
+
39
+ ```javascript
40
+ const getAuthHeaders = () => {
41
+ const token = localStorage.getItem('token');
42
+ return token ? { 'Authorization': `Bearer ${token}` } : {};
43
+ };
44
+ ```
45
+
46
+ ## 🛡️ Rôles et Permissions
47
+ Le système distingue deux niveaux :
48
+ 1. **Utilisateur Actif**: Accès aux données d'investigation.
49
+ 2. **Administrateur** (`is_admin=true`): Accès au dashboard admin et gestion des utilisateurs.
50
+
51
+ ## 👤 Création du Premier Admin
52
+ Si la base de données est vide, utilisez le script utilitaire :
53
+ ```bash
54
+ python create_admin.py
55
+ ```
56
+ **Admin par défaut**:
57
+ - **Email**: `ba77ath@proton.me`
58
+ - **Password**: `Apostroph03`
docs/Contributing_Guide.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🤝 Contributing Guide
2
+
3
+ Merci de contribuer à la plateforme **Ba7ath** ! Ce document définit les standards et le workflow pour maintenir la qualité du projet.
4
+
5
+ ## 🌿 Workflow Git
6
+ 1. **Branching**: Créez une branche descriptive pour chaque feature ou bugfix.
7
+ - `feat/nom-de-la-feature`
8
+ - `fix/nom-du-bug`
9
+ - `docs/nom-de-la-doc`
10
+ 2. **Pull Requests**:
11
+ - Décrivez clairement les changements effectués.
12
+ - Liez la PR à une issue si elle existe.
13
+ - Assurez-vous que le build passe avant de demander une review.
14
+
15
+ ## 📝 Standards de Code
16
+
17
+ ### Backend (Python)
18
+ - Respectez la **PEP 8**.
19
+ - Utilisez des **type hints** pour toutes les fonctions FastAPI.
20
+ - Commentez les logiques OSINT complexes.
21
+
22
+ ### Frontend (React)
23
+ - Utilisez des **Functional Components** avec hooks.
24
+ - **Tailwind CSS** : Évitez les styles inline ou le CSS personnalisé quand c'est possible.
25
+ - Nommez vos composants en `PascalCase`.
26
+
27
+ ### Architecture
28
+ - Ne jamais coder en dur (hardcode) de secrets ou d'URLs de production.
29
+ - Utilisez toujours `src/services/api.js` pour les appels backend.
30
+
31
+ ## 💬 Messages de Commit
32
+ Suivez la convention **Conventional Commits** :
33
+ - `feat: ajouter la comparaison par wilaya`
34
+ - `fix: corriger le hachage des mots de passe`
35
+ - `docs: mettre à jour l'architecture frontend`
36
+
37
+ ---
38
+
39
+ ## 🛡️ Sécurité
40
+ Si vous découvrez une faille de sécurité, ne créez pas d'issue publique. Contactez directement l'équipe à `ba77ath@proton.me`.
docs/Database_Schema.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🗄️ Database Schema
2
+
3
+ Le projet utilise **SQLite** pour sa simplicité de déploiement et ses performances suffisantes pour un outil d'investigation spécialisé.
4
+
5
+ **Fichier**: `backend/ba7ath_enriched.db`
6
+
7
+ ## 📊 Diagramme E-R
8
+
9
+ ```mermaid
10
+ erDiagram
11
+ USER ||--o{ INVESTIGATION_NOTE : creates
12
+ ENRICHED_COMPANY ||--o{ INVESTIGATION_NOTE : has
13
+ WATCH_COMPANY ||--o{ ENRICHED_COMPANY : becomes
14
+
15
+ USER {
16
+ int id PK
17
+ string email UK
18
+ string hashed_password
19
+ string full_name
20
+ boolean is_active
21
+ boolean is_admin
22
+ }
23
+
24
+ ENRICHED_COMPANY {
25
+ string company_id PK
26
+ string company_name
27
+ string wilaya
28
+ json data
29
+ json metrics
30
+ string enriched_by
31
+ datetime enriched_at
32
+ }
33
+
34
+ INVESTIGATION_NOTE {
35
+ string id PK
36
+ string company_id FK
37
+ string title
38
+ text content
39
+ datetime created_at
40
+ string created_by
41
+ json tags
42
+ }
43
+
44
+ WATCH_COMPANY {
45
+ string id PK
46
+ string name_ar
47
+ string wilaya
48
+ string etat_enregistrement
49
+ datetime detected_trovit_at
50
+ }
51
+ ```
52
+
53
+ ---
54
+
55
+ ## 📑 Tables Détail
56
+
57
+ ### 1. `users`
58
+ Stocke les identifiants et les niveaux de privilèges.
59
+ - `hashed_password`: Hachage sécurisé (Argon2).
60
+
61
+ ### 2. `enriched_companies`
62
+ C'est le cœur de la plateforme. Les colonnes `data` et `metrics` sont de type JSON.
63
+ - **data**: Contient les données brutes extraites (RNE, JORT, Marchés).
64
+ - **metrics**: Contient les scores de risque et la liste des Red Flags détectés.
65
+
66
+ ### 3. `investigation_notes`
67
+ Permet aux journalistes d'ajouter des preuves textuelles ou des commentaires sur une société spécifique.
68
+
69
+ ### 4. `watch_companies`
70
+ Liste des sociétés identifiées comme "Ahlia" mais non encore trouvées dans les registres officiels (RNE).
71
+
72
+ ---
73
+
74
+ ## 📁 Migration et Initialisation
75
+ La base de données est automatiquement créée et les tables initialisées lors du démarrage du backend :
76
+ ```python
77
+ # backend/app/main.py
78
+ @app.on_event("startup")
79
+ async def startup_event():
80
+ Base.metadata.create_all(bind=engine)
81
+ ```
docs/Deployment_Guide.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Deployment Guide
2
+
3
+ Le projet est conçu pour un déploiement Cloud moderne et automatisé.
4
+
5
+ ## 📁 Backend : Railway
6
+
7
+ Le backend FastAPI est hébergé sur **Railway**.
8
+
9
+ ### Configuration
10
+ 1. **Repository**: Liez votre repository GitHub à Railway.
11
+ 2. **Volumes** (CRITIQUE) :
12
+ - SQLite nécessite un stockage persistant.
13
+ - Créez un Volume Railway nommé `data` monté sur `/app/data`.
14
+ - Modifiez votre `DATABASE_URL` pour pointer vers `/app/data/ba7ath_enriched.db`.
15
+ 3. **Variables d'environnement** :
16
+ - `SECRET_KEY`: Une chaîne aléatoire longue.
17
+ - `ALGORITHM`: `HS256`.
18
+ - `CORS_ORIGINS`: Liste des domaines autorisés (ex: `https://ahlya-investigations.vercel.app`).
19
+
20
+ ---
21
+
22
+ ## 🎨 Frontend : Vercel
23
+
24
+ Le frontend React est hébergé sur **Vercel**.
25
+
26
+ ### Configuration
27
+ 1. **Framework Preset**: Vite.
28
+ 2. **Build Command**: `npm run build`.
29
+ 3. **Output Directory**: `dist`. (Ou `build` selon votre config `vite.config.js`).
30
+ 4. **Environment Variables**:
31
+ - `VITE_API_URL`: `https://votre-app-backend.up.railway.app/api/v1`.
32
+
33
+ ---
34
+
35
+ ## 🔄 Pipeline CI/CD
36
+ Toute modification poussée sur la branche `main` déclenche automatiquement :
37
+ 1. Un redeploy sur Railway (Backend).
38
+ 2. Un redeploy sur Vercel (Frontend).
39
+
40
+ > [!WARNING]
41
+ > Assurez-vous de migrer les données CSV vers la base SQLite SQL avant le déploiement final pour ne pas avoir une base vide en production.
docs/Development_Guide.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🛠️ Development Guide
2
+
3
+ Ce guide détaille comment mettre en place l'environnement de développement local pour contribuer au projet Ba7ath.
4
+
5
+ ## 📋 Prérequis
6
+ - **Python 3.10+**
7
+ - **Node.js 18+**
8
+ - **Git**
9
+
10
+ ---
11
+
12
+ ## 🐍 Backend Setup (FastAPI)
13
+
14
+ 1. **Cloner le repository** :
15
+ ```bash
16
+ git clone <repo_url>
17
+ cd Ba7ath_scripts/Scrap_Ahlya/microsite
18
+ ```
19
+
20
+ 2. **Créer l'environnement virtuel** :
21
+ ```bash
22
+ cd backend
23
+ python -m venv venv
24
+ source venv/bin/activate # Windows: venv\Scripts\activate
25
+ ```
26
+
27
+ 3. **Installer les dépendances** :
28
+ ```bash
29
+ pip install -r requirements.txt
30
+ ```
31
+
32
+ 4. **Variables d'environnement** :
33
+ Créez un fichier `.env` dans `backend/` :
34
+ ```env
35
+ SECRET_KEY=votre_cle_secrete_ultra_securisee
36
+ ALGORITHM=HS256
37
+ ```
38
+
39
+ 5. **Lancer le serveur** :
40
+ ```bash
41
+ uvicorn app.main:app --reload --port 8000
42
+ ```
43
+
44
+ ---
45
+
46
+ ## ⚛️ Frontend Setup (React)
47
+
48
+ 1. **Installer les dépendances** :
49
+ ```bash
50
+ cd microsite
51
+ npm install
52
+ ```
53
+
54
+ 2. **Variables d'environnement** :
55
+ Créez un fichier `.env` dans `microsite/` :
56
+ ```env
57
+ VITE_API_URL=http://localhost:8000/api/v1
58
+ ```
59
+
60
+ 3. **Lancer le serveur de dev** :
61
+ ```bash
62
+ npm run dev
63
+ ```
64
+ L'application sera accessible sur `http://localhost:5173`.
65
+
66
+ ---
67
+
68
+ ## 🚀 Scripts Utilitaires
69
+
70
+ - **`backend/create_admin.py`** : Recrée l'utilisateur administrateur par défaut.
71
+ - **`start_all.bat`** (Windows) : Script pour lancer simultanément le backend et le frontend en développement.
72
+
73
+ ## 🧪 Tests Rapides
74
+ Pour vérifier que l'API répond correctement après installation :
75
+ ```bash
76
+ curl http://localhost:8000/
77
+ # Réponse attendue: {"message": "Ba7ath OSINT API is running"}
78
+ ```
docs/Frontend_Architecture.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 💻 Frontend Architecture
2
+
3
+ L'application est une **Single Page Application (SPA)** moderne construite avec **React 18** et **Vite**.
4
+
5
+ ## 🏗️ Structure des Dossiers
6
+
7
+ ```text
8
+ microsite/
9
+ ├── public/ # Assets statiques
10
+ ├── src/
11
+ │ ├── components/ # Composants réutilisables (Map, Widgets, Modals)
12
+ │ ├── context/ # AuthContext pour la gestion globale
13
+ │ ├── pages/ # Vues principales (Home, Admin, Enriched)
14
+ │ ├── services/ # Appels API et configuration
15
+ │ ├── App.jsx # Router et layout global
16
+ │ └── index.css # Tailwind et styles globaux
17
+ └── vite.config.js # Configuration de build
18
+ ```
19
+
20
+ ## 🚦 Routing (`App.jsx`)
21
+ Le routage est géré par `react-router-dom`. Les routes sensibles sont protégées.
22
+
23
+ ```jsx
24
+ <Routes>
25
+ <Route path="/login" element={<LoginPage />} />
26
+ <Route element={<ProtectedRoute />}>
27
+ <Route path="/" element={<HomeDashboard />} />
28
+ <Route path="/enriched" element={<EnrichedCompaniesPage />} />
29
+ <Route path="/admin" element={<AdminDashboard />} adminOnly={true} />
30
+ </Route>
31
+ </Routes>
32
+ ```
33
+
34
+ ## 🔐 Gestion de l'État : `AuthContext`
35
+ Un contexte React global gère :
36
+ - L'utilisateur actuel (`user`).
37
+ - La persistance du token (`localStorage`).
38
+ - Les méthodes `login` / `logout`.
39
+
40
+ ## 📦 Composants Clés
41
+
42
+ ### Visualisation
43
+ - **`RegionPanel`**: Affiche les statistiques détaillées d'une wilaya sélectionnée sur la carte.
44
+ - **`SubScoresRadar`**: Graphique radar (Chart.js) montrant les différents axes de risque.
45
+ - **`StatisticalComparisonGrid`**: Grille de comparaison entre wilayas.
46
+
47
+ ### Investigation
48
+ - **`InvestigationWizard`**: Formulaire pas-à-pas pour guider l'analyse.
49
+ - **`ManualEnrichmentWizard`**: Interface de saisie pour ajouter de nouvelles données d'enrichissement.
50
+
51
+ ## 🎨 Design System
52
+ - **Tailwind CSS**: Utilisé pour tout le styling.
53
+ - **Inter / Noto Sans Arabic**: Polices utilisées pour une lisibilité maximale bilingue.
54
+ - **Glassmorphism**: Appliqué sur les modals et les overlays pour un aspect premium.
55
+
56
+ ---
57
+
58
+ ## 🔌 Intégration API
59
+ Tous les appels passent par `src/services/api.js` qui utilise un wrapper `authenticatedFetch` pour garantir que le token est envoyé si disponible.
docs/OSINT_Methodology.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🕵️ OSINT Methodology
2
+
3
+ La plateforme Ba7ath ne se contente pas d'afficher des données ; elle les transforme en **renseignements actionnables** grâce à une méthodologie d'enrichissement rigoureuse.
4
+
5
+ ## 📡 Sources de Données
6
+
7
+ 1. **RNE (Registre National des Entreprises)** : Source officielle pour le statut légal, le capital social, l'adresse et les actionnaires.
8
+ 2. **JORT (Journal Officiel de la République Tunisienne)** : Extraction des annonces de création, de modification de capital et de liquidation.
9
+ 3. **Marchés Publics (TUNEPS / Observatoire)** : Données sur les contrats remportés par les sociétés citoyennes.
10
+ 4. **Scraping Web (Trovit / Web)** : Identification précoce des sociétés non encore officiellement enregistrées.
11
+
12
+ ---
13
+
14
+ ## 🚩 Calcul des Red Flags (Signaux d'Alerte)
15
+
16
+ Le système applique des algorithmes automatiques pour détecter des patterns suspects :
17
+
18
+ ### 1. Ratio Financier Critiques
19
+ - **Logique**: Si `Valeur totale des contrats / Capital social > 10`.
20
+ - **Interprétation**: Une société avec un capital très faible remportant des marchés massifs peut indiquer une structure "écran" ou un manque de capacité réelle.
21
+ - **Badge**: `FINANCIAL_RATIO` (Severity: HIGH).
22
+
23
+ ### 2. Méthodes de Passation
24
+ - **Logique**: Si `Marchés de gré à gré (Direct) > 50%` du total des contrats.
25
+ - **Interprétation**: Une dépendance excessive aux contrats non-concurrentiels est un indicateur de risque de favoritisme.
26
+ - **Badge**: `PROCUREMENT_METHOD` (Severity: HIGH).
27
+
28
+ ### 3. Gouvernance
29
+ - **Logique**: Détection d'actionnaire unique ou de liens croisés entre sociétés Ahlia d'une même région.
30
+ - **Badge**: `GOVERNANCE` (Severity: MEDIUM).
31
+
32
+ ---
33
+
34
+ ## 🧪 Processus d'Enrichissement Manuel
35
+
36
+ Le **ManualEnrichmentWizard** permet aux journalistes d'ajouter une couche d'analyse humaine :
37
+ 1. **Saisie des données RNE** : Validation des numéros de registre.
38
+ 2. **Ajout de contrats** : Saisie manuelle si TUNEPS n'est pas à jour.
39
+ 3. **Calcul Auto** : Le système recalcule instantanément les scores dès que les données sont enregistrées.
40
+
41
+ ## 📈 Indice de Risque Régional
42
+ Le score d'une wilaya est la moyenne pondérée des scores de risque des sociétés Ahlia qui y sont basées. Cela permet de cartographier les "zones grises" au niveau national.
docs/README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📂 Ba7ath / Ahlya Investigations
2
+
3
+ > **Ba7ath** (البحث - La Recherche) est une plateforme OSINT de datajournalisme dédiée à l'investigation sur les sociétés citoyennes (Ahlia - أهلية) en Tunisie.
4
+
5
+ [![Status: Functional](https://img.shields.io/badge/Status-Functional-success.svg)](#)
6
+ [![Stack: FastAPI + React](https://img.shields.io/badge/Stack-FastAPI%20%2B%20React-blue.svg)](#)
7
+
8
+ ## 📌 Mission
9
+ Ce projet permet aux journalistes et analystes d'explorer, de cartographier et d'enrichir les données sur les sociétés Ahlia tunisiennes, en identifiant les anomalies financières, les structures de gouvernance suspectes et les signaux de risque OSINT.
10
+
11
+ ---
12
+
13
+ ## 🏗️ Architecture du Système
14
+
15
+ ```mermaid
16
+ graph TD
17
+ subgraph Frontend [React SPA - Vercel]
18
+ UI[Interface Utilisateur]
19
+ State[AuthContext & State]
20
+ Map[Leaflet Map]
21
+ Charts[Chart.js / Radar]
22
+ end
23
+
24
+ subgraph Backend [FastAPI - Railway]
25
+ API[V1 API Endpoints]
26
+ Auth[JWT JWT Service]
27
+ Logic[Business Logic / Red Flags]
28
+ end
29
+
30
+ subgraph Data [Storage]
31
+ DB[(SQLite - ba7ath_enriched.db)]
32
+ Vol[Railway Persistent Volume]
33
+ end
34
+
35
+ UI --> State
36
+ State --> API
37
+ API --> Auth
38
+ API --> Logic
39
+ Logic --> DB
40
+ DB -.-> Vol
41
+ ```
42
+
43
+ ---
44
+
45
+ ## 🛠️ Stack Technique
46
+
47
+ ### Backend
48
+ - **Framework**: FastAPI (Python)
49
+ - **Base de données**: SQLite avec SQLAlchemy ORM.
50
+ - **Authentification**: JWT Bearer avec hachage Argon2.
51
+ - **Service OSINT**: Logique personnalisée de détection de "Red Flags".
52
+
53
+ ### Frontend
54
+ - **Framework**: React 18 (Vite).
55
+ - **Styling**: Tailwind CSS pour une interface premium et responsive.
56
+ - **Cartographie**: React-Leaflet pour la visualisation géographique des risques.
57
+ - **Visualisation**: Chart.js pour les graphiques radar et de comparaison.
58
+
59
+ ---
60
+
61
+ ## 🚀 Quick Start (Local)
62
+
63
+ ### 1. Backend
64
+ ```bash
65
+ cd backend
66
+ python -m venv venv
67
+ source venv/bin/activate # venv\Scripts\activate sur Windows
68
+ pip install -r requirements.txt
69
+ python create_admin.py # Initialiser l'admin par défaut
70
+ uvicorn app.main:app --reload
71
+ ```
72
+
73
+ ### 2. Frontend
74
+ ```bash
75
+ cd microsite
76
+ npm install
77
+ npm run dev
78
+ ```
79
+
80
+ ---
81
+
82
+ ## 📖 Documentation Détaillée
83
+
84
+ 1. [**API Reference**](API_Reference.md) : Détail des endpoints et formats.
85
+ 2. [**Authentication Guide**](Authentication_Guide.md) : Flux JWT et gestion admin.
86
+ 3. [**Frontend Architecture**](Frontend_Architecture.md) : Structure des composants et hooks.
87
+ 4. [**Database Schema**](Database_Schema.md) : Modèles SQLAlchemy et colonnes enrichies.
88
+ 5. [**Deployment Guide**](Deployment_Guide.md) : Procédures Railway/Vercel.
89
+ 6. [**OSINT Methodology**](OSINT_Methodology.md) : Calcul des risques et sources.
90
+ 7. [**Troubleshooting**](Troubleshooting.md) : Problèmes connus et solutions.
91
+ 8. [**Development Guide**](Development_Guide.md) : Workflow de contribution.
92
+
93
+ ---
94
+
95
+ ## 🕵️ Méthodologie OSINT
96
+ La plateforme agrège des données provenant du **RNE** (Registre National des Entreprises), du **JORT** (Journal Officiel) et des données de marchés publics pour générer des scores de risque basés sur :
97
+ - Le ratio Capital / Valeur des contrats.
98
+ - La fréquence des marchés de gré à gré (بالتراضي).
99
+ - La structure de gouvernance (Actionnaire unique, etc.).
100
+
101
+ ---
102
+
103
+ ## ⚖️ Licence
104
+ Projet interne - Tous droits réservés.
docs/Troubleshooting.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🔍 Troubleshooting Guide
2
+
3
+ Ce guide recense les erreurs courantes rencontrées lors du développement ou du déploiement de la plateforme Ba7ath.
4
+
5
+ ## 1. Erreurs d'Authentification
6
+
7
+ ### Symptôme : "401 Unauthorized" ou "403 Forbidden"
8
+ - **Cause 1**: Le token JWT a expiré.
9
+ - **Solution**: Se déconnecter et se reconnecter.
10
+ - **Cause 2**: Le frontend n'envoie pas le header `Authorization`.
11
+ - **Diagnostic**: Vérifiez dans l'onglet Network de votre navigateur si le header `Authorization: Bearer <token>` est présent.
12
+ - **Fix**: Assurez-vous que l'appel API utilise `authenticatedFetch`.
13
+
14
+ ### Symptôme : Erreur de signature du token après redémarrage
15
+ - **Cause**: La `SECRET_KEY` n'est pas fixe et change à chaque redémarrage du serveur.
16
+ - **Fix**: Définir une `SECRET_KEY` statique dans les variables d'environnement.
17
+
18
+ ---
19
+
20
+ ## 2. Erreurs de Données (API 404)
21
+
22
+ ### Symptôme : Les données enriched sont inaccessibles
23
+ - **Diagnostic**: L'URL appelée est incorrecte (ex: `/enrichment/list` au lieu de `/api/v1/enrichment/list`).
24
+ - **Fix**: Centraliser `API_BASE_URL` dans `config.js` et s'assurer qu'il inclut `/api/v1`.
25
+
26
+ ### Symptôme : Les sociétés disparaissent au redéploiement Railway
27
+ - **Cause**: La base SQLite n'est pas sur un volume persistant.
28
+ - **Fix**: Monter un Volume Railway et pointer le chemin de la DB vers ce volume (`/data/ba7ath_enriched.db`).
29
+
30
+ ---
31
+
32
+ ## 3. Erreurs de Build (Frontend)
33
+
34
+ ### Symptôme : `vite:html-inline-proxy` error
35
+ - **Cause**: Présence de blocs `<style>` inline dans `index.html` (bug spécifique à certains environnements Windows).
36
+ - **Fix**: Déplacer les styles vers `index.css` et configurer les polices dans `tailwind.config.js`.
37
+
38
+ ---
39
+
40
+ ## 🛠️ Diagnostics Utiles
41
+
42
+ **Logs Backend** :
43
+ ```bash
44
+ # Sur Railway
45
+ railway logs
46
+ ```
47
+
48
+ **Debugger React** :
49
+ Utilisez les **React DevTools** pour vérifier si `AuthContext` possède bien l'état `user` après le login.
index.html ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="ar" dir="rtl">
3
+
4
+ <head>
5
+ <meta charset="utf-8" />
6
+ <link rel="icon" href="/favicon.ico" />
7
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
8
+ <meta name="theme-color" content="#10B981" />
9
+
10
+ <meta name="description" content="لوحة تفاعلية لقراءة بيانات الشركات الأهلية في تونس حسب الولاية والنشاط." />
11
+
12
+ <link rel="apple-touch-icon" href="/logo192.png" />
13
+
14
+ <!-- خط عربي (اختياري) -->
15
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
16
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
17
+ <link href="https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@300;400;600;700&display=swap"
18
+ rel="stylesheet" />
19
+
20
+ <!-- ملف manifest لتطبيق الويب -->
21
+ <link rel="manifest" href="/manifest.json" />
22
+
23
+ <title>الشركات الأهلية في تونس</title>
24
+
25
+
26
+ </head>
27
+
28
+ <body>
29
+ <noscript>يجب تفعيل جافاسكريبت لتشغيل هذا التطبيق.</noscript>
30
+ <div id="root"></div>
31
+ <script type="module" src="/src/index.jsx"></script>
32
+ </body>
33
+
34
+ </html>
package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "microsite",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "@testing-library/dom": "^10.4.1",
7
+ "@testing-library/jest-dom": "^6.9.1",
8
+ "@testing-library/react": "^16.3.2",
9
+ "@testing-library/user-event": "^13.5.0",
10
+ "chart.js": "^4.5.1",
11
+ "framer-motion": "^12.34.3",
12
+ "leaflet": "^1.9.4",
13
+ "lucide-react": "^0.563.0",
14
+ "react": "^19.2.4",
15
+ "react-chartjs-2": "^5.3.1",
16
+ "react-dom": "^19.2.4",
17
+ "react-leaflet": "^5.0.0",
18
+ "react-router-dom": "^7.13.0",
19
+ "recharts": "^3.7.0"
20
+ },
21
+ "scripts": {
22
+ "dev": "vite",
23
+ "start": "vite",
24
+ "build": "vite build",
25
+ "preview": "vite preview",
26
+ "test": "react-scripts test",
27
+ "eject": "react-scripts eject"
28
+ },
29
+ "eslintConfig": {
30
+ "extends": [
31
+ "react-app",
32
+ "react-app/jest"
33
+ ]
34
+ },
35
+ "browserslist": {
36
+ "production": [
37
+ ">0.2%",
38
+ "not dead",
39
+ "not op_mini all"
40
+ ],
41
+ "development": [
42
+ "last 1 chrome version",
43
+ "last 1 firefox version",
44
+ "last 1 safari version"
45
+ ]
46
+ },
47
+ "devDependencies": {
48
+ "@vitejs/plugin-react": "^5.1.3",
49
+ "autoprefixer": "^10.4.24",
50
+ "postcss": "^8.5.6",
51
+ "tailwindcss": "^3.4.19",
52
+ "vite": "^7.3.1"
53
+ }
54
+ }
postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ };
project_tree.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ def list_files(startpath):
5
+ output = []
6
+ for root, dirs, files in os.walk(startpath):
7
+ level = root.replace(startpath, '').count(os.sep)
8
+ indent = ' ' * 4 * (level)
9
+ output.append('{}{}/'.format(indent, os.path.basename(root)))
10
+ subindent = ' ' * 4 * (level + 1)
11
+ for f in files:
12
+ if not f.startswith("."):
13
+ output.append('{}{}'.format(subindent, f))
14
+ return "\n".join(output)
15
+
16
+ print(list_files('.'))