Spaces:
Sleeping
Sleeping
Commit ·
4898cbf
1
Parent(s): 261b91c
Déploiement propre : racine fixée, frontend et DB retirés
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +18 -50
- backend/.python-version → .python-version +0 -0
- README.md +12 -79
- backend/ahlya_vs_trovit_fuzzy.py → ahlya_vs_trovit_fuzzy.py +0 -0
- backend/app.py → app.py +0 -0
- {backend/app → app}/api/enrichment.py +0 -0
- {backend/app → app}/api/v1/auth.py +0 -0
- {backend/app → app}/api/v1/companies.py +0 -0
- {backend/app → app}/api/v1/investigate.py +0 -0
- {backend/app → app}/api/v1/meta.py +0 -0
- {backend/app → app}/api/v1/risk.py +0 -0
- {backend/app → app}/api/v1/stats.py +0 -0
- {backend/app → app}/data/companies.json +0 -0
- {backend/app → app}/data/stats.json +0 -0
- {backend/app → app}/database.py +0 -0
- {backend/app → app}/main.py +0 -0
- {backend/app → app}/models/enrichment_models.py +0 -0
- {backend/app → app}/models/schemas.py +0 -0
- {backend/app → app}/models/user_models.py +0 -0
- {backend/app → app}/schemas/auth_schemas.py +0 -0
- {backend/app → app}/services/aggregation.py +0 -0
- {backend/app → app}/services/auth_service.py +0 -0
- {backend/app → app}/services/data_loader.py +0 -0
- {backend/app → app}/services/llm_service.py +0 -0
- {backend/app → app}/services/osint_links.py +0 -0
- {backend/app → app}/services/risk_engine.py +0 -0
- backend/.gitignore +0 -18
- backend/compare_by_name_fuzzy.py +0 -162
- backend/compare_data.py +0 -90
- backend/compare_names_with_qwen.py +0 -185
- backend/create_admin.py +0 -44
- backend/enrich_not_in_trovit.py +0 -71
- backend/inspect_db.py +0 -46
- backend/readme.md +0 -12
- docs/API_Reference.md +0 -103
- docs/Authentication_Guide.md +0 -58
- docs/Contributing_Guide.md +0 -40
- docs/Database_Schema.md +0 -81
- docs/Deployment_Guide.md +0 -41
- docs/Development_Guide.md +0 -78
- docs/Frontend_Architecture.md +0 -59
- docs/OSINT_Methodology.md +0 -42
- docs/README.md +0 -104
- docs/Troubleshooting.md +0 -49
- index.html +0 -34
- package-lock.json +0 -0
- package.json +0 -54
- postcss.config.js +0 -6
- project_tree.py +0 -16
- public/data/enrich_companies.py +0 -95
.gitignore
CHANGED
|
@@ -1,50 +1,18 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
#
|
| 4 |
-
/
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
.
|
| 16 |
-
.env
|
| 17 |
-
.env.
|
| 18 |
-
.
|
| 19 |
-
.env.production.local
|
| 20 |
-
npm-debug.log*
|
| 21 |
-
yarn-debug.log*
|
| 22 |
-
yarn-error.log*
|
| 23 |
-
|
| 24 |
-
# --- Python backend / Ba7ath ---
|
| 25 |
-
|
| 26 |
-
# Environnements virtuels
|
| 27 |
-
venv/
|
| 28 |
-
.env/
|
| 29 |
-
.env.*
|
| 30 |
-
.env.*
|
| 31 |
-
.env
|
| 32 |
-
|
| 33 |
-
# Bytecode / cache
|
| 34 |
-
__pycache__/
|
| 35 |
-
*.py[cod]
|
| 36 |
-
*.pyo
|
| 37 |
-
*.pyd
|
| 38 |
-
|
| 39 |
-
# Bases et données locales
|
| 40 |
-
# *.db
|
| 41 |
-
*.sqlite3
|
| 42 |
-
instance/
|
| 43 |
-
|
| 44 |
-
# Logs
|
| 45 |
-
*.log
|
| 46 |
-
logs/
|
| 47 |
-
|
| 48 |
-
.vercel
|
| 49 |
-
backend/.env
|
| 50 |
-
backend/bulk_test.py
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
# Ignorer tout le dossier des scripts sensibles
|
| 4 |
+
app/scripts/
|
| 5 |
+
force_admin.py
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# Ignorer systématiquement les bases de données (Excel et CSV)
|
| 9 |
+
*.xlsx
|
| 10 |
+
*.csv
|
| 11 |
+
|
| 12 |
+
# Ignorer les journaux de progression et fichiers temporaires
|
| 13 |
+
ba7ath_progress.txt
|
| 14 |
+
*.log
|
| 15 |
+
*.txt
|
| 16 |
+
.env
|
| 17 |
+
.env.*
|
| 18 |
+
../.env*.db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/.python-version → .python-version
RENAMED
|
File without changes
|
README.md
CHANGED
|
@@ -1,79 +1,12 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 🛡️
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: blue
|
| 6 |
-
sdk: gradio
|
| 7 |
-
app_file: app.py
|
| 8 |
-
pinned: false
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
## Available Scripts
|
| 15 |
-
|
| 16 |
-
In the project directory, you can run:
|
| 17 |
-
|
| 18 |
-
### `npm start`
|
| 19 |
-
|
| 20 |
-
Runs the app in the development mode.\
|
| 21 |
-
Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
|
| 22 |
-
|
| 23 |
-
The page will reload when you make changes.\
|
| 24 |
-
You may also see any lint errors in the console.
|
| 25 |
-
|
| 26 |
-
### `npm test`
|
| 27 |
-
|
| 28 |
-
Launches the test runner in the interactive watch mode.\
|
| 29 |
-
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
|
| 30 |
-
|
| 31 |
-
### `npm run build`
|
| 32 |
-
|
| 33 |
-
Builds the app for production to the `build` folder.\
|
| 34 |
-
It correctly bundles React in production mode and optimizes the build for the best performance.
|
| 35 |
-
|
| 36 |
-
The build is minified and the filenames include the hashes.\
|
| 37 |
-
Your app is ready to be deployed!
|
| 38 |
-
|
| 39 |
-
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
|
| 40 |
-
|
| 41 |
-
### `npm run eject`
|
| 42 |
-
|
| 43 |
-
**Note: this is a one-way operation. Once you `eject`, you can't go back!**
|
| 44 |
-
|
| 45 |
-
If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
|
| 46 |
-
|
| 47 |
-
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
|
| 48 |
-
|
| 49 |
-
You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
|
| 50 |
-
|
| 51 |
-
## Learn More
|
| 52 |
-
|
| 53 |
-
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
|
| 54 |
-
|
| 55 |
-
To learn React, check out the [React documentation](https://reactjs.org/).
|
| 56 |
-
|
| 57 |
-
### Code Splitting
|
| 58 |
-
|
| 59 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
|
| 60 |
-
|
| 61 |
-
### Analyzing the Bundle Size
|
| 62 |
-
|
| 63 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
|
| 64 |
-
|
| 65 |
-
### Making a Progressive Web App
|
| 66 |
-
|
| 67 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
|
| 68 |
-
|
| 69 |
-
### Advanced Configuration
|
| 70 |
-
|
| 71 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
|
| 72 |
-
|
| 73 |
-
### Deployment
|
| 74 |
-
|
| 75 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
|
| 76 |
-
|
| 77 |
-
### `npm run build` fails to minify
|
| 78 |
-
|
| 79 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Ba7ath OSINT API
|
| 3 |
+
emoji: 🛡️
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: app.py
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Ba7ath OSINT API
|
| 12 |
+
Backend pour l'investigation et l'analyse de risque.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/ahlya_vs_trovit_fuzzy.py → ahlya_vs_trovit_fuzzy.py
RENAMED
|
File without changes
|
backend/app.py → app.py
RENAMED
|
File without changes
|
{backend/app → app}/api/enrichment.py
RENAMED
|
File without changes
|
{backend/app → app}/api/v1/auth.py
RENAMED
|
File without changes
|
{backend/app → app}/api/v1/companies.py
RENAMED
|
File without changes
|
{backend/app → app}/api/v1/investigate.py
RENAMED
|
File without changes
|
{backend/app → app}/api/v1/meta.py
RENAMED
|
File without changes
|
{backend/app → app}/api/v1/risk.py
RENAMED
|
File without changes
|
{backend/app → app}/api/v1/stats.py
RENAMED
|
File without changes
|
{backend/app → app}/data/companies.json
RENAMED
|
File without changes
|
{backend/app → app}/data/stats.json
RENAMED
|
File without changes
|
{backend/app → app}/database.py
RENAMED
|
File without changes
|
{backend/app → app}/main.py
RENAMED
|
File without changes
|
{backend/app → app}/models/enrichment_models.py
RENAMED
|
File without changes
|
{backend/app → app}/models/schemas.py
RENAMED
|
File without changes
|
{backend/app → app}/models/user_models.py
RENAMED
|
File without changes
|
{backend/app → app}/schemas/auth_schemas.py
RENAMED
|
File without changes
|
{backend/app → app}/services/aggregation.py
RENAMED
|
File without changes
|
{backend/app → app}/services/auth_service.py
RENAMED
|
File without changes
|
{backend/app → app}/services/data_loader.py
RENAMED
|
File without changes
|
{backend/app → app}/services/llm_service.py
RENAMED
|
File without changes
|
{backend/app → app}/services/osint_links.py
RENAMED
|
File without changes
|
{backend/app → app}/services/risk_engine.py
RENAMED
|
File without changes
|
backend/.gitignore
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
venv/
|
| 2 |
-
__pycache__/
|
| 3 |
-
# Ignorer tout le dossier des scripts sensibles
|
| 4 |
-
app/scripts/
|
| 5 |
-
force_admin.py
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
# Ignorer systématiquement les bases de données (Excel et CSV)
|
| 9 |
-
*.xlsx
|
| 10 |
-
*.csv
|
| 11 |
-
|
| 12 |
-
# Ignorer les journaux de progression et fichiers temporaires
|
| 13 |
-
ba7ath_progress.txt
|
| 14 |
-
*.log
|
| 15 |
-
*.txt
|
| 16 |
-
.env
|
| 17 |
-
.env.*
|
| 18 |
-
../.env*.db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/compare_by_name_fuzzy.py
DELETED
|
@@ -1,162 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
from pathlib import Path
|
| 3 |
-
from rapidfuzz import process, fuzz
|
| 4 |
-
|
| 5 |
-
# ------------- CONFIG À ADAPTER --------------
|
| 6 |
-
|
| 7 |
-
# CSV A : ta "liste politique / terrain"
|
| 8 |
-
CSV_A = Path("liste_270.csv") # ex : Google Sheet complet
|
| 9 |
-
|
| 10 |
-
# CSV B : la liste des stés qui ont un RNE (ex. Trovit / base enrichie)
|
| 11 |
-
CSV_B = Path("liste_rne_ou_trovit.csv")
|
| 12 |
-
|
| 13 |
-
# Nom des colonnes contenant les NOMS à comparer
|
| 14 |
-
# A peut être en arabe, B en français, ou l'inverse.
|
| 15 |
-
# Idéalement, tu rajoutes dans chaque CSV une colonne 'name_canon'
|
| 16 |
-
# (normalisée/ traduite avec Qwen) et tu mets ces noms ici.
|
| 17 |
-
COL_NAME_A = "name" # ex : "Nom société (FR)" ou "الاسم"
|
| 18 |
-
COL_NAME_B = "name" # ex : nom Trovit en arabe
|
| 19 |
-
|
| 20 |
-
# (Optionnel) colonnes de contexte à garder pour l'analyse
|
| 21 |
-
CTX_COLS_A = ["wilaya", "delegation"] # adapte à ton fichier
|
| 22 |
-
CTX_COLS_B = ["wilaya", "delegation"] # idem
|
| 23 |
-
|
| 24 |
-
# Seuils fuzzy
|
| 25 |
-
# score >= HIGH_MATCH -> match sûr
|
| 26 |
-
# LOW_MATCH <= score < HIGH_MATCH -> match douteux (à vérifier à la main / par LLM)
|
| 27 |
-
# score < LOW_MATCH -> considéré comme "non trouvé"
|
| 28 |
-
HIGH_MATCH = 90
|
| 29 |
-
LOW_MATCH = 70
|
| 30 |
-
|
| 31 |
-
# Fichiers de sortie
|
| 32 |
-
OUT_MATCHES = Path("matches_surs.csv")
|
| 33 |
-
OUT_MAYBE = Path("matches_douteux.csv")
|
| 34 |
-
OUT_MISSING = Path("non_trouves_par_nom.csv")
|
| 35 |
-
|
| 36 |
-
# Encodage (UTF‑8 avec BOM fonctionne bien pour arabe + Excel)
|
| 37 |
-
ENC_A = "utf-8-sig"
|
| 38 |
-
ENC_B = "utf-8-sig"
|
| 39 |
-
|
| 40 |
-
# ------------- FONCTIONS ---------------------
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
def normalize_name(s: str) -> str:
|
| 44 |
-
"""Nettoyage léger pour comparer les noms."""
|
| 45 |
-
if pd.isna(s):
|
| 46 |
-
return ""
|
| 47 |
-
s = str(s).strip()
|
| 48 |
-
|
| 49 |
-
# mettre en minuscules pour la partie latine
|
| 50 |
-
s = s.lower()
|
| 51 |
-
|
| 52 |
-
# enlever quelques termes génériques FR/AR
|
| 53 |
-
generic_fr = [
|
| 54 |
-
"societe", "société", "ste", "sa", "sarl",
|
| 55 |
-
"société anonyme", "société à responsabilité limitée",
|
| 56 |
-
]
|
| 57 |
-
generic_ar = [
|
| 58 |
-
"شركة", "الشركة", "الاهلية", "الأهلية", "الجهوية",
|
| 59 |
-
"المحلية", "شركة أهلية", "شركة الاهلية", "شركة الأهلية",
|
| 60 |
-
]
|
| 61 |
-
for g in generic_fr + generic_ar:
|
| 62 |
-
s = s.replace(g, "")
|
| 63 |
-
|
| 64 |
-
# normaliser les espaces
|
| 65 |
-
s = " ".join(s.split())
|
| 66 |
-
return s
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
def load_csv(path: Path, name_col: str, ctx_cols: list, enc: str) -> pd.DataFrame:
|
| 70 |
-
if not path.exists():
|
| 71 |
-
raise FileNotFoundError(path.resolve())
|
| 72 |
-
df = pd.read_csv(path, encoding=enc)
|
| 73 |
-
if name_col not in df.columns:
|
| 74 |
-
raise KeyError(f"Colonne '{name_col}' absente dans {path.name}.\n"
|
| 75 |
-
f"Colonnes dispo : {list(df.columns)}")
|
| 76 |
-
df["__name_raw__"] = df[name_col]
|
| 77 |
-
df["__name_norm__"] = df[name_col].apply(normalize_name)
|
| 78 |
-
|
| 79 |
-
# garder nom + colonnes utiles pour l'analyse
|
| 80 |
-
keep_cols = ["__name_raw__", "__name_norm__"]
|
| 81 |
-
for c in ctx_cols:
|
| 82 |
-
if c in df.columns:
|
| 83 |
-
keep_cols.append(c)
|
| 84 |
-
return df[keep_cols].copy()
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def main():
|
| 88 |
-
# 1. Charger les deux CSV
|
| 89 |
-
df_a = load_csv(CSV_A, COL_NAME_A, CTX_COLS_A, ENC_A)
|
| 90 |
-
df_b = load_csv(CSV_B, COL_NAME_B, CTX_COLS_B, ENC_B)
|
| 91 |
-
|
| 92 |
-
print(f"[INFO] Lignes fichier A : {len(df_a)}")
|
| 93 |
-
print(f"[INFO] Lignes fichier B : {len(df_b)}")
|
| 94 |
-
|
| 95 |
-
# 2. Préparer une série de noms B pour RapidFuzz
|
| 96 |
-
names_b = df_b["__name_norm__"].tolist()
|
| 97 |
-
|
| 98 |
-
best_matches = []
|
| 99 |
-
for idx, row in df_a.iterrows():
|
| 100 |
-
name_a_norm = row["__name_norm__"]
|
| 101 |
-
|
| 102 |
-
if not name_a_norm:
|
| 103 |
-
best_matches.append({"score": 0, "b_index": None})
|
| 104 |
-
continue
|
| 105 |
-
|
| 106 |
-
# RapidFuzz: extractOne(label, choices, scorer=...)
|
| 107 |
-
match = process.extractOne(
|
| 108 |
-
name_a_norm,
|
| 109 |
-
names_b,
|
| 110 |
-
scorer=fuzz.token_sort_ratio,
|
| 111 |
-
)
|
| 112 |
-
if match is None:
|
| 113 |
-
best_matches.append({"score": 0, "b_index": None})
|
| 114 |
-
else:
|
| 115 |
-
label_b, score, b_idx = match
|
| 116 |
-
best_matches.append({"score": score, "b_index": b_idx})
|
| 117 |
-
|
| 118 |
-
# 3. Construire un DataFrame résultat
|
| 119 |
-
res = df_a.copy()
|
| 120 |
-
res["match_score"] = [m["score"] for m in best_matches]
|
| 121 |
-
res["b_index"] = [m["b_index"] for m in best_matches]
|
| 122 |
-
|
| 123 |
-
# joindre les infos du fichier B
|
| 124 |
-
res["name_b_raw"] = res["b_index"].apply(
|
| 125 |
-
lambda i: df_b.loc[i, "__name_raw__"] if pd.notna(i) else None
|
| 126 |
-
)
|
| 127 |
-
res["name_b_norm"] = res["b_index"].apply(
|
| 128 |
-
lambda i: df_b.loc[i, "__name_norm__"] if pd.notna(i) else None
|
| 129 |
-
)
|
| 130 |
-
|
| 131 |
-
# Ajout du contexte B (wilaya, delegation, etc.)
|
| 132 |
-
for c in CTX_COLS_B:
|
| 133 |
-
if c in df_b.columns:
|
| 134 |
-
col_b = f"{c}_b"
|
| 135 |
-
res[col_b] = res["b_index"].apply(
|
| 136 |
-
lambda i: df_b.loc[i, c] if pd.notna(i) else None
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
# 4. Séparer en 3 catégories
|
| 140 |
-
matches_surs = res[res["match_score"] >= HIGH_MATCH].copy()
|
| 141 |
-
matches_douteux = res[
|
| 142 |
-
(res["match_score"] >= LOW_MATCH) & (res["match_score"] < HIGH_MATCH)
|
| 143 |
-
].copy()
|
| 144 |
-
non_trouves = res[res["match_score"] < LOW_MATCH].copy()
|
| 145 |
-
|
| 146 |
-
print(f"[INFO] Matchs sûrs (score >= {HIGH_MATCH}) : {len(matches_surs)}")
|
| 147 |
-
print(f"[INFO] Matchs douteux ({LOW_MATCH} <= score < {HIGH_MATCH}) : {len(matches_douteux)}")
|
| 148 |
-
print(f"[INFO] Non trouvés (score < {LOW_MATCH}) : {len(non_trouves)}")
|
| 149 |
-
|
| 150 |
-
# 5. Export CSV
|
| 151 |
-
matches_surs.to_csv(OUT_MATCHES, index=False, encoding="utf-8-sig")
|
| 152 |
-
matches_douteux.to_csv(OUT_MAYBE, index=False, encoding="utf-8-sig")
|
| 153 |
-
non_trouves.to_csv(OUT_MISSING, index=False, encoding="utf-8-sig")
|
| 154 |
-
|
| 155 |
-
print("[OK] Export :")
|
| 156 |
-
print(" ", OUT_MATCHES.resolve())
|
| 157 |
-
print(" ", OUT_MAYBE.resolve())
|
| 158 |
-
print(" ", OUT_MISSING.resolve())
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
if __name__ == "__main__":
|
| 162 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/compare_data.py
DELETED
|
@@ -1,90 +0,0 @@
|
|
| 1 |
-
# compare_data.py
|
| 2 |
-
import sqlite3
|
| 3 |
-
import pandas as pd
|
| 4 |
-
from pathlib import Path
|
| 5 |
-
|
| 6 |
-
# ----------------- CONFIG -----------------
|
| 7 |
-
|
| 8 |
-
# Base SQLite des 141 sociétés enrichies
|
| 9 |
-
DB_PATH = Path("ba7ath_enriched.db")
|
| 10 |
-
|
| 11 |
-
# CSV complet des ~270 sociétés Trovit
|
| 12 |
-
CSV_PATH = Path("trovit_charikat_ahliya_all.csv")
|
| 13 |
-
|
| 14 |
-
# Table + colonne JSON dans SQLite
|
| 15 |
-
ENRICHED_TABLE = "enriched_companies"
|
| 16 |
-
DATA_COLUMN = "data"
|
| 17 |
-
|
| 18 |
-
# ----------------- CODE -----------------
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
def main():
|
| 22 |
-
# 1. Charger les 270 sociétés depuis le CSV
|
| 23 |
-
if not CSV_PATH.exists():
|
| 24 |
-
raise FileNotFoundError(f"CSV introuvable : {CSV_PATH.resolve()}")
|
| 25 |
-
|
| 26 |
-
df_270 = pd.read_csv(CSV_PATH)
|
| 27 |
-
print(f"[INFO] Sociétés dans le CSV Trovit : {len(df_270)}")
|
| 28 |
-
|
| 29 |
-
if "tax_id" not in df_270.columns:
|
| 30 |
-
raise KeyError(
|
| 31 |
-
"La colonne 'tax_id' est absente du CSV. "
|
| 32 |
-
"Vérifie l'en-tête de trovit_charikat_ahliya_all.csv."
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
# 2. Ouvrir la base SQLite
|
| 36 |
-
if not DB_PATH.exists():
|
| 37 |
-
raise FileNotFoundError(f"Base SQLite introuvable : {DB_PATH.resolve()}")
|
| 38 |
-
|
| 39 |
-
conn = sqlite3.connect(DB_PATH)
|
| 40 |
-
cur = conn.cursor()
|
| 41 |
-
|
| 42 |
-
# 3. Vérifier que la table existe bien
|
| 43 |
-
cur.execute(
|
| 44 |
-
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
| 45 |
-
(ENRICHED_TABLE,),
|
| 46 |
-
)
|
| 47 |
-
row = cur.fetchone()
|
| 48 |
-
if row is None:
|
| 49 |
-
tables = [
|
| 50 |
-
r[0]
|
| 51 |
-
for r in cur.execute(
|
| 52 |
-
"SELECT name FROM sqlite_master WHERE type='table'"
|
| 53 |
-
).fetchall()
|
| 54 |
-
]
|
| 55 |
-
conn.close()
|
| 56 |
-
raise RuntimeError(
|
| 57 |
-
f"La table '{ENRICHED_TABLE}' n'existe pas dans la base.\n"
|
| 58 |
-
f"Tables disponibles : {tables}"
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
-
# 4. Extraire les tax_id déjà présents dans data.rne
|
| 62 |
-
query = f"""
|
| 63 |
-
SELECT DISTINCT
|
| 64 |
-
json_extract({DATA_COLUMN}, '$.rne.tax_id') AS tax_id
|
| 65 |
-
FROM {ENRICHED_TABLE}
|
| 66 |
-
WHERE json_extract({DATA_COLUMN}, '$.rne.tax_id') IS NOT NULL
|
| 67 |
-
"""
|
| 68 |
-
df_rne = pd.read_sql(query, conn)
|
| 69 |
-
conn.close()
|
| 70 |
-
|
| 71 |
-
print(f"[INFO] Sociétés avec tax_id dans la base : {len(df_rne)}")
|
| 72 |
-
|
| 73 |
-
# 5. Comparer par tax_id (270 vs 141)
|
| 74 |
-
merged = df_270.merge(df_rne, on="tax_id", how="left", indicator=True)
|
| 75 |
-
|
| 76 |
-
# 6. Garder celles absentes de la base
|
| 77 |
-
missing = merged[merged["_merge"] == "left_only"].drop(columns=["_merge"])
|
| 78 |
-
print(
|
| 79 |
-
"[INFO] Sociétés présentes dans le CSV mais absentes de la base :",
|
| 80 |
-
len(missing),
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
# 7. Sauvegarder le résultat
|
| 84 |
-
out_path = Path("trovit_missing_not_in_rne.csv")
|
| 85 |
-
missing.to_csv(out_path, index=False, encoding="utf-8-sig")
|
| 86 |
-
print(f"[OK] Fichier généré : {out_path.resolve()}")
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
if __name__ == "__main__":
|
| 90 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/compare_names_with_qwen.py
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
# compare_names_with_qwen.py
|
| 2 |
-
import csv
|
| 3 |
-
import json
|
| 4 |
-
import time
|
| 5 |
-
import os
|
| 6 |
-
from pathlib import Path
|
| 7 |
-
|
| 8 |
-
import requests
|
| 9 |
-
from dotenv import load_dotenv
|
| 10 |
-
|
| 11 |
-
# Load environment variables
|
| 12 |
-
load_dotenv()
|
| 13 |
-
|
| 14 |
-
# ---------------- CONFIG ----------------
|
| 15 |
-
|
| 16 |
-
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434/api/chat")
|
| 17 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "qwen2.5:latest")
|
| 18 |
-
|
| 19 |
-
CSV_AR = Path(os.getenv("PATH_AHLYA_CSV", "Ahlya_Total_Feuil1.csv"))
|
| 20 |
-
CSV_FR = Path(os.getenv("PATH_RNE_CSV", "trovit_charikat_ahliya_all.csv"))
|
| 21 |
-
|
| 22 |
-
OUT_MATCHES = Path("matches_qwen.csv")
|
| 23 |
-
OUT_NOT_IN_TROVIT = Path("not_in_trovit_qwen.csv")
|
| 24 |
-
|
| 25 |
-
SLEEP_SECONDS = 0.05 # petite pause entre appels
|
| 26 |
-
|
| 27 |
-
# ----------------------------------------
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def load_names_ar(path: Path):
|
| 31 |
-
"""Charge la 1re colonne (noms en arabe)."""
|
| 32 |
-
if not path.exists():
|
| 33 |
-
raise FileNotFoundError(path.resolve())
|
| 34 |
-
rows = []
|
| 35 |
-
with path.open("r", encoding="utf-8-sig", newline="") as f:
|
| 36 |
-
reader = csv.reader(f)
|
| 37 |
-
header = next(reader, None)
|
| 38 |
-
for line in reader:
|
| 39 |
-
if not line:
|
| 40 |
-
continue
|
| 41 |
-
name_ar = (line[0] or "").strip()
|
| 42 |
-
if not name_ar:
|
| 43 |
-
continue
|
| 44 |
-
rows.append({"name_ar": name_ar})
|
| 45 |
-
print(f"[INFO] Noms AR chargés : {len(rows)}")
|
| 46 |
-
return rows
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def load_names_fr(path: Path):
|
| 50 |
-
"""Charge la 3e colonne (noms en français)."""
|
| 51 |
-
if not path.exists():
|
| 52 |
-
raise FileNotFoundError(path.resolve())
|
| 53 |
-
names_fr = []
|
| 54 |
-
with path.open("r", encoding="utf-8-sig", newline="") as f:
|
| 55 |
-
reader = csv.reader(f)
|
| 56 |
-
header = next(reader, None)
|
| 57 |
-
for line in reader:
|
| 58 |
-
if len(line) < 3:
|
| 59 |
-
continue
|
| 60 |
-
name_fr = (line[2] or "").strip()
|
| 61 |
-
if not name_fr:
|
| 62 |
-
continue
|
| 63 |
-
names_fr.append(name_fr)
|
| 64 |
-
print(f"[INFO] Noms FR chargés (Trovit) : {len(names_fr)}")
|
| 65 |
-
return names_fr
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
def build_fr_list_for_prompt(names_fr):
|
| 69 |
-
"""Construit une liste numérotée lisible pour le prompt."""
|
| 70 |
-
lines = []
|
| 71 |
-
for i, name in enumerate(names_fr, start=1):
|
| 72 |
-
lines.append(f"{i}. {name}")
|
| 73 |
-
return "\n".join(lines)
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def ask_qwen_match(name_ar, fr_list_text):
|
| 77 |
-
"""Demande à Qwen si le nom AR correspond à un/plusieurs noms FR."""
|
| 78 |
-
system_prompt = (
|
| 79 |
-
"Tu es un assistant qui fait du rapprochement de noms de sociétés "
|
| 80 |
-
"entre l'arabe et le français.\n"
|
| 81 |
-
"Règles :\n"
|
| 82 |
-
"- Tu dois dire si le nom arabe désigne la même société qu'un ou plusieurs "
|
| 83 |
-
"noms français dans la liste.\n"
|
| 84 |
-
"- Prends en compte le sens, pas la traduction littérale exacte.\n"
|
| 85 |
-
"- Si tu n'es PAS sûr, considère qu'il n'y a PAS de correspondance.\n"
|
| 86 |
-
"- Réponds STRICTEMENT en JSON valide, sans texte autour.\n"
|
| 87 |
-
' Format : {"match": true/false, "indexes": [liste_entiers], "reason": "texte court"}.\n'
|
| 88 |
-
"- Les indexes commencent à 1 et correspondent à la numérotation de la liste française."
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
user_prompt = (
|
| 92 |
-
"Nom de la société en arabe :\n"
|
| 93 |
-
f"{name_ar}\n\n"
|
| 94 |
-
"Liste des noms de sociétés en français :\n"
|
| 95 |
-
f"{fr_list_text}\n\n"
|
| 96 |
-
"Question :\n"
|
| 97 |
-
"- Le nom arabe correspond-il à une ou plusieurs sociétés françaises dans cette liste ?\n"
|
| 98 |
-
"- Si oui, donne les indexes exacts dans le champ \"indexes\".\n"
|
| 99 |
-
"- Si non, renvoie match=false et indexes=[]."
|
| 100 |
-
)
|
| 101 |
-
|
| 102 |
-
payload = {
|
| 103 |
-
"model": MODEL_NAME,
|
| 104 |
-
"messages": [
|
| 105 |
-
{"role": "system", "content": system_prompt},
|
| 106 |
-
{"role": "user", "content": user_prompt},
|
| 107 |
-
],
|
| 108 |
-
"stream": False,
|
| 109 |
-
}
|
| 110 |
-
|
| 111 |
-
resp = requests.post(OLLAMA_URL, json=payload, timeout=300)
|
| 112 |
-
resp.raise_for_status()
|
| 113 |
-
data = resp.json()
|
| 114 |
-
content = data.get("message", {}).get("content", "").strip()
|
| 115 |
-
if not content and "response" in data:
|
| 116 |
-
content = data["response"].strip()
|
| 117 |
-
|
| 118 |
-
try:
|
| 119 |
-
result = json.loads(content)
|
| 120 |
-
except json.JSONDecodeError:
|
| 121 |
-
raise ValueError(f"Réponse non JSON de Qwen : {content}")
|
| 122 |
-
|
| 123 |
-
match = bool(result.get("match", False))
|
| 124 |
-
indexes = result.get("indexes", []) or []
|
| 125 |
-
if not isinstance(indexes, list):
|
| 126 |
-
indexes = []
|
| 127 |
-
reason = str(result.get("reason", "")).strip()
|
| 128 |
-
|
| 129 |
-
return match, indexes, reason
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
def main():
|
| 133 |
-
rows_ar = load_names_ar(CSV_AR)
|
| 134 |
-
names_fr = load_names_fr(CSV_FR)
|
| 135 |
-
fr_list_text = build_fr_list_for_prompt(names_fr)
|
| 136 |
-
|
| 137 |
-
matches = []
|
| 138 |
-
not_found = []
|
| 139 |
-
|
| 140 |
-
for i, row in enumerate(rows_ar, start=1):
|
| 141 |
-
name_ar = row["name_ar"]
|
| 142 |
-
print(f"[{i}/{len(rows_ar)}] Qwen compare : {name_ar}")
|
| 143 |
-
|
| 144 |
-
try:
|
| 145 |
-
match, indexes, reason = ask_qwen_match(name_ar, fr_list_text)
|
| 146 |
-
except Exception as e:
|
| 147 |
-
print(f" [ERREUR] {e}")
|
| 148 |
-
match, indexes, reason = False, [], f"error: {e}"
|
| 149 |
-
|
| 150 |
-
if match and indexes:
|
| 151 |
-
matched_names = [names_fr[idx - 1] for idx in indexes if 1 <= idx <= len(names_fr)]
|
| 152 |
-
matches.append({
|
| 153 |
-
"name_ar": name_ar,
|
| 154 |
-
"matched_indexes": ";".join(str(x) for x in indexes),
|
| 155 |
-
"matched_names_fr": " | ".join(matched_names),
|
| 156 |
-
"reason": reason,
|
| 157 |
-
})
|
| 158 |
-
else:
|
| 159 |
-
not_found.append({
|
| 160 |
-
"name_ar": name_ar,
|
| 161 |
-
"reason": reason,
|
| 162 |
-
})
|
| 163 |
-
|
| 164 |
-
time.sleep(SLEEP_SECONDS)
|
| 165 |
-
|
| 166 |
-
# Écriture des résultats
|
| 167 |
-
with OUT_MATCHES.open("w", encoding="utf-8-sig", newline="") as f:
|
| 168 |
-
fieldnames = ["name_ar", "matched_indexes", "matched_names_fr", "reason"]
|
| 169 |
-
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 170 |
-
writer.writeheader()
|
| 171 |
-
writer.writerows(matches)
|
| 172 |
-
|
| 173 |
-
with OUT_NOT_IN_TROVIT.open("w", encoding="utf-8-sig", newline="") as f:
|
| 174 |
-
fieldnames = ["name_ar", "reason"]
|
| 175 |
-
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 176 |
-
writer.writeheader()
|
| 177 |
-
writer.writerows(not_found)
|
| 178 |
-
|
| 179 |
-
print(f"[OK] Matchs écrits dans : {OUT_MATCHES.resolve()}")
|
| 180 |
-
print(f"[OK] Non présents (selon Qwen) : {OUT_NOT_IN_TROVIT.resolve()}")
|
| 181 |
-
print(f"[INFO] Total matchs : {len(matches)}, non trouvés : {len(not_found)}")
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
if __name__ == "__main__":
|
| 185 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/create_admin.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
from sqlalchemy.orm import Session
|
| 2 |
-
from app.database import SessionLocal, engine, Base
|
| 3 |
-
from app.models.user_models import User
|
| 4 |
-
from app.services.auth_service import get_password_hash
|
| 5 |
-
import sys
|
| 6 |
-
|
| 7 |
-
# Ensure tables exist
|
| 8 |
-
Base.metadata.create_all(bind=engine)
|
| 9 |
-
|
| 10 |
-
def create_admin_user(email, password, full_name):
|
| 11 |
-
db: Session = SessionLocal()
|
| 12 |
-
try:
|
| 13 |
-
user = db.query(User).filter(User.email == email).first()
|
| 14 |
-
if user:
|
| 15 |
-
print(f"User {email} already exists.")
|
| 16 |
-
return
|
| 17 |
-
|
| 18 |
-
hashed_password = get_password_hash(password)
|
| 19 |
-
new_user = User(
|
| 20 |
-
email=email,
|
| 21 |
-
hashed_password=hashed_password,
|
| 22 |
-
full_name=full_name,
|
| 23 |
-
is_active=True,
|
| 24 |
-
is_admin=True
|
| 25 |
-
)
|
| 26 |
-
db.add(new_user)
|
| 27 |
-
db.commit()
|
| 28 |
-
db.refresh(new_user)
|
| 29 |
-
print(f"Admin user {email} created successfully.")
|
| 30 |
-
except Exception as e:
|
| 31 |
-
print(f"Error creating user: {e}")
|
| 32 |
-
finally:
|
| 33 |
-
db.close()
|
| 34 |
-
|
| 35 |
-
if __name__ == "__main__":
|
| 36 |
-
if len(sys.argv) < 3:
|
| 37 |
-
print("Usage: python create_admin.py <email> <password> [full_name]")
|
| 38 |
-
sys.exit(1)
|
| 39 |
-
|
| 40 |
-
email = sys.argv[1]
|
| 41 |
-
password = sys.argv[2]
|
| 42 |
-
full_name = sys.argv[3] if len(sys.argv) > 3 else "Admin User"
|
| 43 |
-
|
| 44 |
-
create_admin_user(email, password, full_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/enrich_not_in_trovit.py
DELETED
|
@@ -1,71 +0,0 @@
|
|
| 1 |
-
# enrich_not_in_trovit.py
|
| 2 |
-
import pandas as pd
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
|
| 5 |
-
# Fichiers d'entrée
|
| 6 |
-
CSV_NOT_IN = Path("not_in_trovit_qwen.csv")
|
| 7 |
-
CSV_AHLYA = Path("Ahlya_Total_Feuil1.csv")
|
| 8 |
-
|
| 9 |
-
# Fichier de sortie
|
| 10 |
-
CSV_OUT = Path("not_in_trovit_enriched.csv")
|
| 11 |
-
|
| 12 |
-
def main():
|
| 13 |
-
if not CSV_NOT_IN.exists():
|
| 14 |
-
raise FileNotFoundError(CSV_NOT_IN.resolve())
|
| 15 |
-
if not CSV_AHLYA.exists():
|
| 16 |
-
raise FileNotFoundError(CSV_AHLYA.resolve())
|
| 17 |
-
|
| 18 |
-
# 1. Charger les fichiers
|
| 19 |
-
df_not = pd.read_csv(CSV_NOT_IN, encoding="utf-8-sig")
|
| 20 |
-
df_ah = pd.read_csv(CSV_AHLYA, encoding="utf-8-sig")
|
| 21 |
-
|
| 22 |
-
# 2. Vérifier les colonnes attendues
|
| 23 |
-
if "name_ar" not in df_not.columns:
|
| 24 |
-
raise KeyError(f"'name_ar' manquant dans {CSV_NOT_IN.name} ; colonnes = {list(df_not.columns)}")
|
| 25 |
-
|
| 26 |
-
col_nom_ahlya = "اسم_الشركة"
|
| 27 |
-
if col_nom_ahlya not in df_ah.columns:
|
| 28 |
-
raise KeyError(f"'{col_nom_ahlya}' manquant dans {CSV_AHLYA.name} ; colonnes = {list(df_ah.columns)}")
|
| 29 |
-
|
| 30 |
-
# 3. Normalisation légère des noms des deux côtés
|
| 31 |
-
def norm(s):
|
| 32 |
-
if pd.isna(s):
|
| 33 |
-
return ""
|
| 34 |
-
return str(s).strip()
|
| 35 |
-
|
| 36 |
-
df_not["__key__"] = df_not["name_ar"].apply(norm)
|
| 37 |
-
df_ah["__key__"] = df_ah[col_nom_ahlya].apply(norm)
|
| 38 |
-
|
| 39 |
-
# 4. Colonnes à ramener depuis Ahlya
|
| 40 |
-
cols_details = [
|
| 41 |
-
col_nom_ahlya,
|
| 42 |
-
"الموضوع / النشاط",
|
| 43 |
-
"العنوان",
|
| 44 |
-
"الولاية",
|
| 45 |
-
"المعتمدية",
|
| 46 |
-
"المنطقة",
|
| 47 |
-
"النوع",
|
| 48 |
-
]
|
| 49 |
-
|
| 50 |
-
# On garde seulement les colonnes utiles + clé
|
| 51 |
-
keep_ah = [c for c in cols_details if c in df_ah.columns] + ["__key__"]
|
| 52 |
-
df_ah_small = df_ah[keep_ah].drop_duplicates("__key__")
|
| 53 |
-
|
| 54 |
-
# 5. Merge left : toutes les lignes de not_in, détails pris dans Ahlya
|
| 55 |
-
df_merged = df_not.merge(
|
| 56 |
-
df_ah_small,
|
| 57 |
-
on="__key__",
|
| 58 |
-
how="left",
|
| 59 |
-
suffixes=("", "_ahlya"),
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
# 6. Nettoyage : on peut retirer la clé technique si tu veux
|
| 63 |
-
df_merged.drop(columns=["__key__"], inplace=True)
|
| 64 |
-
|
| 65 |
-
# 7. Sauvegarde
|
| 66 |
-
df_merged.to_csv(CSV_OUT, index=False, encoding="utf-8-sig")
|
| 67 |
-
print(f"[OK] Fichier enrichi écrit dans : {CSV_OUT.resolve()}")
|
| 68 |
-
print(f"Lignes : {len(df_merged)}")
|
| 69 |
-
|
| 70 |
-
if __name__ == "__main__":
|
| 71 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/inspect_db.py
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
# inspect_db.py
|
| 2 |
-
import sqlite3
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
|
| 5 |
-
# Essaie d'abord avec ce nom, puis adapte (microsite.db, database.sqlite, instance/app.db, etc.)
|
| 6 |
-
DB_PATH = Path("ba7ath_enriched.db")
|
| 7 |
-
|
| 8 |
-
def main():
|
| 9 |
-
print("=== Inspection de la base SQLite ===")
|
| 10 |
-
print("Chemin supposé :", DB_PATH.resolve())
|
| 11 |
-
|
| 12 |
-
if not DB_PATH.exists():
|
| 13 |
-
print("[ERREUR] Fichier introuvable :", DB_PATH.resolve())
|
| 14 |
-
return
|
| 15 |
-
|
| 16 |
-
print("Taille fichier (octets) :", DB_PATH.stat().st_size)
|
| 17 |
-
|
| 18 |
-
conn = sqlite3.connect(DB_PATH)
|
| 19 |
-
|
| 20 |
-
print("\n=== Bases attachées ===")
|
| 21 |
-
for row in conn.execute("PRAGMA database_list;"):
|
| 22 |
-
# schema, name, file
|
| 23 |
-
print(row)
|
| 24 |
-
|
| 25 |
-
print("\n=== Tables SQLite ===")
|
| 26 |
-
tables = [
|
| 27 |
-
r[0]
|
| 28 |
-
for r in conn.execute(
|
| 29 |
-
"SELECT name FROM sqlite_master WHERE type='table'"
|
| 30 |
-
).fetchall()
|
| 31 |
-
]
|
| 32 |
-
if not tables:
|
| 33 |
-
print("(aucune table utilisateur)")
|
| 34 |
-
for name in tables:
|
| 35 |
-
print("-", name)
|
| 36 |
-
|
| 37 |
-
print("\n=== Structure des tables ===")
|
| 38 |
-
for name in tables:
|
| 39 |
-
print(f"\nTable: {name}")
|
| 40 |
-
for col in conn.execute(f"PRAGMA table_info({name})"):
|
| 41 |
-
print(" ", col)
|
| 42 |
-
|
| 43 |
-
conn.close()
|
| 44 |
-
|
| 45 |
-
if __name__ == "__main__":
|
| 46 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/readme.md
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Ba7ath OSINT API
|
| 3 |
-
emoji: 🛡️
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: blue
|
| 6 |
-
sdk: gradio
|
| 7 |
-
app_file: app.py
|
| 8 |
-
pinned: false
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
# Ba7ath OSINT API
|
| 12 |
-
Backend pour l'investigation et l'analyse de risque.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/API_Reference.md
DELETED
|
@@ -1,103 +0,0 @@
|
|
| 1 |
-
# 📖 API Reference
|
| 2 |
-
|
| 3 |
-
Tous les endpoints sont préfixés par `/api/v1`.
|
| 4 |
-
**Base URL Production**: `https://ahlya-production.up.railway.app/api/v1`
|
| 5 |
-
|
| 6 |
-
## 🔐 Authentification
|
| 7 |
-
La plupart des routes nécessitent un token JWT valide.
|
| 8 |
-
|
| 9 |
-
| Header | Valeur |
|
| 10 |
-
| :--- | :--- |
|
| 11 |
-
| `Authorization` | `Bearer <access_token>` |
|
| 12 |
-
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
## 🔑 Auth Endpoints
|
| 16 |
-
|
| 17 |
-
### Login
|
| 18 |
-
`POST /auth/login`
|
| 19 |
-
|
| 20 |
-
Authentification via formulaire standard OAuth2.
|
| 21 |
-
|
| 22 |
-
- **Request Body** (`application/x-www-form-urlencoded`):
|
| 23 |
-
- `username`: Email de l'utilisateur.
|
| 24 |
-
- `password`: Mot de passe.
|
| 25 |
-
- **Success (200)**:
|
| 26 |
-
```json
|
| 27 |
-
{
|
| 28 |
-
"access_token": "eyJhbG...",
|
| 29 |
-
"token_type": "bearer"
|
| 30 |
-
}
|
| 31 |
-
```
|
| 32 |
-
|
| 33 |
-
---
|
| 34 |
-
|
| 35 |
-
## 📊 Statistiques & Risques
|
| 36 |
-
|
| 37 |
-
### Statistiques Nationales
|
| 38 |
-
`GET /stats/national` (PROTÉGÉ)
|
| 39 |
-
|
| 40 |
-
Retourne les métriques agrégées pour l'ensemble du pays.
|
| 41 |
-
|
| 42 |
-
- **Exemple de réponse**:
|
| 43 |
-
```json
|
| 44 |
-
{
|
| 45 |
-
"total_companies": 31000,
|
| 46 |
-
"top_wilayas": ["Tunis", "Sousse", "Sfax"],
|
| 47 |
-
"risk_index": 4.2
|
| 48 |
-
}
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
-
### Risques par Wilaya
|
| 52 |
-
`GET /risk/wilayas` (PROTÉGÉ)
|
| 53 |
-
|
| 54 |
-
Liste les scores de risque pour toutes les wilayas.
|
| 55 |
-
|
| 56 |
-
---
|
| 57 |
-
|
| 58 |
-
## 📂 Enrichment (Core Data)
|
| 59 |
-
|
| 60 |
-
### Liste des sociétés enrichies
|
| 61 |
-
`GET /enrichment/list` (PROTÉGÉ)
|
| 62 |
-
|
| 63 |
-
- **Paramètres**:
|
| 64 |
-
- `page` (int): Par défaut 1.
|
| 65 |
-
- `per_page` (int): Par défaut 12.
|
| 66 |
-
- `search` (str): Recherche par nom.
|
| 67 |
-
- `wilaya` (str): Filtre par wilaya.
|
| 68 |
-
- `has_red_flags` (bool): Filtre les cas critiques.
|
| 69 |
-
|
| 70 |
-
- **Response**:
|
| 71 |
-
```json
|
| 72 |
-
{
|
| 73 |
-
"companies": [...],
|
| 74 |
-
"total": 150,
|
| 75 |
-
"total_pages": 13
|
| 76 |
-
}
|
| 77 |
-
```
|
| 78 |
-
|
| 79 |
-
### Profil complet
|
| 80 |
-
`GET /enrichment/profile/{company_id}` (PROTÉGÉ)
|
| 81 |
-
|
| 82 |
-
Retourne l'intégralité des données (RNE, JORT, Marchés) et les Red Flags calculés.
|
| 83 |
-
|
| 84 |
-
---
|
| 85 |
-
|
| 86 |
-
## 🛠️ User Management (Admin Only)
|
| 87 |
-
|
| 88 |
-
### Liste des utilisateurs
|
| 89 |
-
`GET /auth/users` (PROTECTED ADMIN)
|
| 90 |
-
|
| 91 |
-
Retourne la liste des utilisateurs du système.
|
| 92 |
-
|
| 93 |
-
### Création d'utilisateur
|
| 94 |
-
`POST /auth/users` (PROTECTED ADMIN)
|
| 95 |
-
- **Body**: `{ "email": "...", "password": "...", "is_admin": true }`
|
| 96 |
-
|
| 97 |
-
---
|
| 98 |
-
|
| 99 |
-
## 📝 Exemple Curl
|
| 100 |
-
```bash
|
| 101 |
-
curl -X GET "https://ahlya-production.up.railway.app/api/v1/enrichment/list" \
|
| 102 |
-
-H "Authorization: Bearer <votre_token>"
|
| 103 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Authentication_Guide.md
DELETED
|
@@ -1,58 +0,0 @@
|
|
| 1 |
-
# 🔐 Authentication Guide
|
| 2 |
-
|
| 3 |
-
Le système utilise une authentification basée sur les **JSON Web Tokens (JWT)** pour sécuriser les données sensibles d'investigation.
|
| 4 |
-
|
| 5 |
-
## 🔄 Flux d'Authentification
|
| 6 |
-
|
| 7 |
-
```mermaid
|
| 8 |
-
sequenceDiagram
|
| 9 |
-
participant User as Utilisateur
|
| 10 |
-
participant FE as Frontend (React)
|
| 11 |
-
participant BE as Backend (FastAPI)
|
| 12 |
-
participant DB as SQLite
|
| 13 |
-
|
| 14 |
-
User->>FE: Saisie Email/Password
|
| 15 |
-
FE->>BE: POST /api/v1/auth/login
|
| 16 |
-
BE->>DB: Vérifier User / Argon2 Hash
|
| 17 |
-
DB-->>BE: User Valide
|
| 18 |
-
BE-->>FE: Retourne JWT Access Token
|
| 19 |
-
FE->>FE: Stockage dans localStorage
|
| 20 |
-
FE->>BE: GET /api/v1/enriched (Header Bearer)
|
| 21 |
-
BE->>BE: Validation Signature JWT
|
| 22 |
-
BE-->>FE: Retourne Données
|
| 23 |
-
```
|
| 24 |
-
|
| 25 |
-
## 🛠️ Configuration Backend
|
| 26 |
-
Le secret et l'algorithme sont définis dans les variables d'environnement.
|
| 27 |
-
|
| 28 |
-
- **Variables Clés**:
|
| 29 |
-
- `SECRET_KEY`: Utilisée pour signer les tokens (indispensable en prod).
|
| 30 |
-
- `ALGORITHM`: Généralement `HS256`.
|
| 31 |
-
- `ACCESS_TOKEN_EXPIRE_MINUTES`: Durée de validité.
|
| 32 |
-
|
| 33 |
-
## 💻 Implémentation Frontend (`AuthContext`)
|
| 34 |
-
La gestion de l'état `user` et `token` est centralisée dans `src/context/AuthContext.jsx`.
|
| 35 |
-
|
| 36 |
-
### Usage dans les services :
|
| 37 |
-
Pour appeler une API protégée, utilisez le helper `authenticatedFetch` dans `src/services/api.js` qui injecte le header `Authorization`.
|
| 38 |
-
|
| 39 |
-
```javascript
|
| 40 |
-
const getAuthHeaders = () => {
|
| 41 |
-
const token = localStorage.getItem('token');
|
| 42 |
-
return token ? { 'Authorization': `Bearer ${token}` } : {};
|
| 43 |
-
};
|
| 44 |
-
```
|
| 45 |
-
|
| 46 |
-
## 🛡️ Rôles et Permissions
|
| 47 |
-
Le système distingue deux niveaux :
|
| 48 |
-
1. **Utilisateur Actif**: Accès aux données d'investigation.
|
| 49 |
-
2. **Administrateur** (`is_admin=true`): Accès au dashboard admin et gestion des utilisateurs.
|
| 50 |
-
|
| 51 |
-
## 👤 Création du Premier Admin
|
| 52 |
-
Si la base de données est vide, utilisez le script utilitaire :
|
| 53 |
-
```bash
|
| 54 |
-
python create_admin.py
|
| 55 |
-
```
|
| 56 |
-
**Admin par défaut**:
|
| 57 |
-
- **Email**: `ba77ath@proton.me`
|
| 58 |
-
- **Password**: `Apostroph03`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Contributing_Guide.md
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
# 🤝 Contributing Guide
|
| 2 |
-
|
| 3 |
-
Merci de contribuer à la plateforme **Ba7ath** ! Ce document définit les standards et le workflow pour maintenir la qualité du projet.
|
| 4 |
-
|
| 5 |
-
## 🌿 Workflow Git
|
| 6 |
-
1. **Branching**: Créez une branche descriptive pour chaque feature ou bugfix.
|
| 7 |
-
- `feat/nom-de-la-feature`
|
| 8 |
-
- `fix/nom-du-bug`
|
| 9 |
-
- `docs/nom-de-la-doc`
|
| 10 |
-
2. **Pull Requests**:
|
| 11 |
-
- Décrivez clairement les changements effectués.
|
| 12 |
-
- Liez la PR à une issue si elle existe.
|
| 13 |
-
- Assurez-vous que le build passe avant de demander une review.
|
| 14 |
-
|
| 15 |
-
## 📝 Standards de Code
|
| 16 |
-
|
| 17 |
-
### Backend (Python)
|
| 18 |
-
- Respectez la **PEP 8**.
|
| 19 |
-
- Utilisez des **type hints** pour toutes les fonctions FastAPI.
|
| 20 |
-
- Commentez les logiques OSINT complexes.
|
| 21 |
-
|
| 22 |
-
### Frontend (React)
|
| 23 |
-
- Utilisez des **Functional Components** avec hooks.
|
| 24 |
-
- **Tailwind CSS** : Évitez les styles inline ou le CSS personnalisé quand c'est possible.
|
| 25 |
-
- Nommez vos composants en `PascalCase`.
|
| 26 |
-
|
| 27 |
-
### Architecture
|
| 28 |
-
- Ne jamais coder en dur (hardcode) de secrets ou d'URLs de production.
|
| 29 |
-
- Utilisez toujours `src/services/api.js` pour les appels backend.
|
| 30 |
-
|
| 31 |
-
## 💬 Messages de Commit
|
| 32 |
-
Suivez la convention **Conventional Commits** :
|
| 33 |
-
- `feat: ajouter la comparaison par wilaya`
|
| 34 |
-
- `fix: corriger le hachage des mots de passe`
|
| 35 |
-
- `docs: mettre à jour l'architecture frontend`
|
| 36 |
-
|
| 37 |
-
---
|
| 38 |
-
|
| 39 |
-
## 🛡️ Sécurité
|
| 40 |
-
Si vous découvrez une faille de sécurité, ne créez pas d'issue publique. Contactez directement l'équipe à `ba77ath@proton.me`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Database_Schema.md
DELETED
|
@@ -1,81 +0,0 @@
|
|
| 1 |
-
# 🗄️ Database Schema
|
| 2 |
-
|
| 3 |
-
Le projet utilise **SQLite** pour sa simplicité de déploiement et ses performances suffisantes pour un outil d'investigation spécialisé.
|
| 4 |
-
|
| 5 |
-
**Fichier**: `backend/ba7ath_enriched.db`
|
| 6 |
-
|
| 7 |
-
## 📊 Diagramme E-R
|
| 8 |
-
|
| 9 |
-
```mermaid
|
| 10 |
-
erDiagram
|
| 11 |
-
USER ||--o{ INVESTIGATION_NOTE : creates
|
| 12 |
-
ENRICHED_COMPANY ||--o{ INVESTIGATION_NOTE : has
|
| 13 |
-
WATCH_COMPANY ||--o{ ENRICHED_COMPANY : becomes
|
| 14 |
-
|
| 15 |
-
USER {
|
| 16 |
-
int id PK
|
| 17 |
-
string email UK
|
| 18 |
-
string hashed_password
|
| 19 |
-
string full_name
|
| 20 |
-
boolean is_active
|
| 21 |
-
boolean is_admin
|
| 22 |
-
}
|
| 23 |
-
|
| 24 |
-
ENRICHED_COMPANY {
|
| 25 |
-
string company_id PK
|
| 26 |
-
string company_name
|
| 27 |
-
string wilaya
|
| 28 |
-
json data
|
| 29 |
-
json metrics
|
| 30 |
-
string enriched_by
|
| 31 |
-
datetime enriched_at
|
| 32 |
-
}
|
| 33 |
-
|
| 34 |
-
INVESTIGATION_NOTE {
|
| 35 |
-
string id PK
|
| 36 |
-
string company_id FK
|
| 37 |
-
string title
|
| 38 |
-
text content
|
| 39 |
-
datetime created_at
|
| 40 |
-
string created_by
|
| 41 |
-
json tags
|
| 42 |
-
}
|
| 43 |
-
|
| 44 |
-
WATCH_COMPANY {
|
| 45 |
-
string id PK
|
| 46 |
-
string name_ar
|
| 47 |
-
string wilaya
|
| 48 |
-
string etat_enregistrement
|
| 49 |
-
datetime detected_trovit_at
|
| 50 |
-
}
|
| 51 |
-
```
|
| 52 |
-
|
| 53 |
-
---
|
| 54 |
-
|
| 55 |
-
## 📑 Tables Détail
|
| 56 |
-
|
| 57 |
-
### 1. `users`
|
| 58 |
-
Stocke les identifiants et les niveaux de privilèges.
|
| 59 |
-
- `hashed_password`: Hachage sécurisé (Argon2).
|
| 60 |
-
|
| 61 |
-
### 2. `enriched_companies`
|
| 62 |
-
C'est le cœur de la plateforme. Les colonnes `data` et `metrics` sont de type JSON.
|
| 63 |
-
- **data**: Contient les données brutes extraites (RNE, JORT, Marchés).
|
| 64 |
-
- **metrics**: Contient les scores de risque et la liste des Red Flags détectés.
|
| 65 |
-
|
| 66 |
-
### 3. `investigation_notes`
|
| 67 |
-
Permet aux journalistes d'ajouter des preuves textuelles ou des commentaires sur une société spécifique.
|
| 68 |
-
|
| 69 |
-
### 4. `watch_companies`
|
| 70 |
-
Liste des sociétés identifiées comme "Ahlia" mais non encore trouvées dans les registres officiels (RNE).
|
| 71 |
-
|
| 72 |
-
---
|
| 73 |
-
|
| 74 |
-
## 📁 Migration et Initialisation
|
| 75 |
-
La base de données est automatiquement créée et les tables initialisées lors du démarrage du backend :
|
| 76 |
-
```python
|
| 77 |
-
# backend/app/main.py
|
| 78 |
-
@app.on_event("startup")
|
| 79 |
-
async def startup_event():
|
| 80 |
-
Base.metadata.create_all(bind=engine)
|
| 81 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Deployment_Guide.md
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
# 🚀 Deployment Guide
|
| 2 |
-
|
| 3 |
-
Le projet est conçu pour un déploiement Cloud moderne et automatisé.
|
| 4 |
-
|
| 5 |
-
## 📁 Backend : Railway
|
| 6 |
-
|
| 7 |
-
Le backend FastAPI est hébergé sur **Railway**.
|
| 8 |
-
|
| 9 |
-
### Configuration
|
| 10 |
-
1. **Repository**: Liez votre repository GitHub à Railway.
|
| 11 |
-
2. **Volumes** (CRITIQUE) :
|
| 12 |
-
- SQLite nécessite un stockage persistant.
|
| 13 |
-
- Créez un Volume Railway nommé `data` monté sur `/app/data`.
|
| 14 |
-
- Modifiez votre `DATABASE_URL` pour pointer vers `/app/data/ba7ath_enriched.db`.
|
| 15 |
-
3. **Variables d'environnement** :
|
| 16 |
-
- `SECRET_KEY`: Une chaîne aléatoire longue.
|
| 17 |
-
- `ALGORITHM`: `HS256`.
|
| 18 |
-
- `CORS_ORIGINS`: Liste des domaines autorisés (ex: `https://ahlya-investigations.vercel.app`).
|
| 19 |
-
|
| 20 |
-
---
|
| 21 |
-
|
| 22 |
-
## 🎨 Frontend : Vercel
|
| 23 |
-
|
| 24 |
-
Le frontend React est hébergé sur **Vercel**.
|
| 25 |
-
|
| 26 |
-
### Configuration
|
| 27 |
-
1. **Framework Preset**: Vite.
|
| 28 |
-
2. **Build Command**: `npm run build`.
|
| 29 |
-
3. **Output Directory**: `dist`. (Ou `build` selon votre config `vite.config.js`).
|
| 30 |
-
4. **Environment Variables**:
|
| 31 |
-
- `VITE_API_URL`: `https://votre-app-backend.up.railway.app/api/v1`.
|
| 32 |
-
|
| 33 |
-
---
|
| 34 |
-
|
| 35 |
-
## 🔄 Pipeline CI/CD
|
| 36 |
-
Toute modification poussée sur la branche `main` déclenche automatiquement :
|
| 37 |
-
1. Un redeploy sur Railway (Backend).
|
| 38 |
-
2. Un redeploy sur Vercel (Frontend).
|
| 39 |
-
|
| 40 |
-
> [!WARNING]
|
| 41 |
-
> Assurez-vous de migrer les données CSV vers la base SQLite SQL avant le déploiement final pour ne pas avoir une base vide en production.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Development_Guide.md
DELETED
|
@@ -1,78 +0,0 @@
|
|
| 1 |
-
# 🛠️ Development Guide
|
| 2 |
-
|
| 3 |
-
Ce guide détaille comment mettre en place l'environnement de développement local pour contribuer au projet Ba7ath.
|
| 4 |
-
|
| 5 |
-
## 📋 Prérequis
|
| 6 |
-
- **Python 3.10+**
|
| 7 |
-
- **Node.js 18+**
|
| 8 |
-
- **Git**
|
| 9 |
-
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
## 🐍 Backend Setup (FastAPI)
|
| 13 |
-
|
| 14 |
-
1. **Cloner le repository** :
|
| 15 |
-
```bash
|
| 16 |
-
git clone <repo_url>
|
| 17 |
-
cd Ba7ath_scripts/Scrap_Ahlya/microsite
|
| 18 |
-
```
|
| 19 |
-
|
| 20 |
-
2. **Créer l'environnement virtuel** :
|
| 21 |
-
```bash
|
| 22 |
-
cd backend
|
| 23 |
-
python -m venv venv
|
| 24 |
-
source venv/bin/activate # Windows: venv\Scripts\activate
|
| 25 |
-
```
|
| 26 |
-
|
| 27 |
-
3. **Installer les dépendances** :
|
| 28 |
-
```bash
|
| 29 |
-
pip install -r requirements.txt
|
| 30 |
-
```
|
| 31 |
-
|
| 32 |
-
4. **Variables d'environnement** :
|
| 33 |
-
Créez un fichier `.env` dans `backend/` :
|
| 34 |
-
```env
|
| 35 |
-
SECRET_KEY=votre_cle_secrete_ultra_securisee
|
| 36 |
-
ALGORITHM=HS256
|
| 37 |
-
```
|
| 38 |
-
|
| 39 |
-
5. **Lancer le serveur** :
|
| 40 |
-
```bash
|
| 41 |
-
uvicorn app.main:app --reload --port 8000
|
| 42 |
-
```
|
| 43 |
-
|
| 44 |
-
---
|
| 45 |
-
|
| 46 |
-
## ⚛️ Frontend Setup (React)
|
| 47 |
-
|
| 48 |
-
1. **Installer les dépendances** :
|
| 49 |
-
```bash
|
| 50 |
-
cd microsite
|
| 51 |
-
npm install
|
| 52 |
-
```
|
| 53 |
-
|
| 54 |
-
2. **Variables d'environnement** :
|
| 55 |
-
Créez un fichier `.env` dans `microsite/` :
|
| 56 |
-
```env
|
| 57 |
-
VITE_API_URL=http://localhost:8000/api/v1
|
| 58 |
-
```
|
| 59 |
-
|
| 60 |
-
3. **Lancer le serveur de dev** :
|
| 61 |
-
```bash
|
| 62 |
-
npm run dev
|
| 63 |
-
```
|
| 64 |
-
L'application sera accessible sur `http://localhost:5173`.
|
| 65 |
-
|
| 66 |
-
---
|
| 67 |
-
|
| 68 |
-
## 🚀 Scripts Utilitaires
|
| 69 |
-
|
| 70 |
-
- **`backend/create_admin.py`** : Recrée l'utilisateur administrateur par défaut.
|
| 71 |
-
- **`start_all.bat`** (Windows) : Script pour lancer simultanément le backend et le frontend en développement.
|
| 72 |
-
|
| 73 |
-
## 🧪 Tests Rapides
|
| 74 |
-
Pour vérifier que l'API répond correctement après installation :
|
| 75 |
-
```bash
|
| 76 |
-
curl http://localhost:8000/
|
| 77 |
-
# Réponse attendue: {"message": "Ba7ath OSINT API is running"}
|
| 78 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Frontend_Architecture.md
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
# 💻 Frontend Architecture
|
| 2 |
-
|
| 3 |
-
L'application est une **Single Page Application (SPA)** moderne construite avec **React 18** et **Vite**.
|
| 4 |
-
|
| 5 |
-
## 🏗️ Structure des Dossiers
|
| 6 |
-
|
| 7 |
-
```text
|
| 8 |
-
microsite/
|
| 9 |
-
├── public/ # Assets statiques
|
| 10 |
-
├── src/
|
| 11 |
-
│ ├── components/ # Composants réutilisables (Map, Widgets, Modals)
|
| 12 |
-
│ ├── context/ # AuthContext pour la gestion globale
|
| 13 |
-
│ ├── pages/ # Vues principales (Home, Admin, Enriched)
|
| 14 |
-
│ ├── services/ # Appels API et configuration
|
| 15 |
-
│ ├── App.jsx # Router et layout global
|
| 16 |
-
│ └── index.css # Tailwind et styles globaux
|
| 17 |
-
└── vite.config.js # Configuration de build
|
| 18 |
-
```
|
| 19 |
-
|
| 20 |
-
## 🚦 Routing (`App.jsx`)
|
| 21 |
-
Le routage est géré par `react-router-dom`. Les routes sensibles sont protégées.
|
| 22 |
-
|
| 23 |
-
```jsx
|
| 24 |
-
<Routes>
|
| 25 |
-
<Route path="/login" element={<LoginPage />} />
|
| 26 |
-
<Route element={<ProtectedRoute />}>
|
| 27 |
-
<Route path="/" element={<HomeDashboard />} />
|
| 28 |
-
<Route path="/enriched" element={<EnrichedCompaniesPage />} />
|
| 29 |
-
<Route path="/admin" element={<AdminDashboard />} adminOnly={true} />
|
| 30 |
-
</Route>
|
| 31 |
-
</Routes>
|
| 32 |
-
```
|
| 33 |
-
|
| 34 |
-
## 🔐 Gestion de l'État : `AuthContext`
|
| 35 |
-
Un contexte React global gère :
|
| 36 |
-
- L'utilisateur actuel (`user`).
|
| 37 |
-
- La persistance du token (`localStorage`).
|
| 38 |
-
- Les méthodes `login` / `logout`.
|
| 39 |
-
|
| 40 |
-
## 📦 Composants Clés
|
| 41 |
-
|
| 42 |
-
### Visualisation
|
| 43 |
-
- **`RegionPanel`**: Affiche les statistiques détaillées d'une wilaya sélectionnée sur la carte.
|
| 44 |
-
- **`SubScoresRadar`**: Graphique radar (Chart.js) montrant les différents axes de risque.
|
| 45 |
-
- **`StatisticalComparisonGrid`**: Grille de comparaison entre wilayas.
|
| 46 |
-
|
| 47 |
-
### Investigation
|
| 48 |
-
- **`InvestigationWizard`**: Formulaire pas-à-pas pour guider l'analyse.
|
| 49 |
-
- **`ManualEnrichmentWizard`**: Interface de saisie pour ajouter de nouvelles données d'enrichissement.
|
| 50 |
-
|
| 51 |
-
## 🎨 Design System
|
| 52 |
-
- **Tailwind CSS**: Utilisé pour tout le styling.
|
| 53 |
-
- **Inter / Noto Sans Arabic**: Polices utilisées pour une lisibilité maximale bilingue.
|
| 54 |
-
- **Glassmorphism**: Appliqué sur les modals et les overlays pour un aspect premium.
|
| 55 |
-
|
| 56 |
-
---
|
| 57 |
-
|
| 58 |
-
## 🔌 Intégration API
|
| 59 |
-
Tous les appels passent par `src/services/api.js` qui utilise un wrapper `authenticatedFetch` pour garantir que le token est envoyé si disponible.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/OSINT_Methodology.md
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
# 🕵️ OSINT Methodology
|
| 2 |
-
|
| 3 |
-
La plateforme Ba7ath ne se contente pas d'afficher des données ; elle les transforme en **renseignements actionnables** grâce à une méthodologie d'enrichissement rigoureuse.
|
| 4 |
-
|
| 5 |
-
## 📡 Sources de Données
|
| 6 |
-
|
| 7 |
-
1. **RNE (Registre National des Entreprises)** : Source officielle pour le statut légal, le capital social, l'adresse et les actionnaires.
|
| 8 |
-
2. **JORT (Journal Officiel de la République Tunisienne)** : Extraction des annonces de création, de modification de capital et de liquidation.
|
| 9 |
-
3. **Marchés Publics (TUNEPS / Observatoire)** : Données sur les contrats remportés par les sociétés citoyennes.
|
| 10 |
-
4. **Scraping Web (Trovit / Web)** : Identification précoce des sociétés non encore officiellement enregistrées.
|
| 11 |
-
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
-
## 🚩 Calcul des Red Flags (Signaux d'Alerte)
|
| 15 |
-
|
| 16 |
-
Le système applique des algorithmes automatiques pour détecter des patterns suspects :
|
| 17 |
-
|
| 18 |
-
### 1. Ratio Financier Critiques
|
| 19 |
-
- **Logique**: Si `Valeur totale des contrats / Capital social > 10`.
|
| 20 |
-
- **Interprétation**: Une société avec un capital très faible remportant des marchés massifs peut indiquer une structure "écran" ou un manque de capacité réelle.
|
| 21 |
-
- **Badge**: `FINANCIAL_RATIO` (Severity: HIGH).
|
| 22 |
-
|
| 23 |
-
### 2. Méthodes de Passation
|
| 24 |
-
- **Logique**: Si `Marchés de gré à gré (Direct) > 50%` du total des contrats.
|
| 25 |
-
- **Interprétation**: Une dépendance excessive aux contrats non-concurrentiels est un indicateur de risque de favoritisme.
|
| 26 |
-
- **Badge**: `PROCUREMENT_METHOD` (Severity: HIGH).
|
| 27 |
-
|
| 28 |
-
### 3. Gouvernance
|
| 29 |
-
- **Logique**: Détection d'actionnaire unique ou de liens croisés entre sociétés Ahlia d'une même région.
|
| 30 |
-
- **Badge**: `GOVERNANCE` (Severity: MEDIUM).
|
| 31 |
-
|
| 32 |
-
---
|
| 33 |
-
|
| 34 |
-
## 🧪 Processus d'Enrichissement Manuel
|
| 35 |
-
|
| 36 |
-
Le **ManualEnrichmentWizard** permet aux journalistes d'ajouter une couche d'analyse humaine :
|
| 37 |
-
1. **Saisie des données RNE** : Validation des numéros de registre.
|
| 38 |
-
2. **Ajout de contrats** : Saisie manuelle si TUNEPS n'est pas à jour.
|
| 39 |
-
3. **Calcul Auto** : Le système recalcule instantanément les scores dès que les données sont enregistrées.
|
| 40 |
-
|
| 41 |
-
## 📈 Indice de Risque Régional
|
| 42 |
-
Le score d'une wilaya est la moyenne pondérée des scores de risque des sociétés Ahlia qui y sont basées. Cela permet de cartographier les "zones grises" au niveau national.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/README.md
DELETED
|
@@ -1,104 +0,0 @@
|
|
| 1 |
-
# 📂 Ba7ath / Ahlya Investigations
|
| 2 |
-
|
| 3 |
-
> **Ba7ath** (البحث - La Recherche) est une plateforme OSINT de datajournalisme dédiée à l'investigation sur les sociétés citoyennes (Ahlia - أهلية) en Tunisie.
|
| 4 |
-
|
| 5 |
-
[](#)
|
| 6 |
-
[](#)
|
| 7 |
-
|
| 8 |
-
## 📌 Mission
|
| 9 |
-
Ce projet permet aux journalistes et analystes d'explorer, de cartographier et d'enrichir les données sur les sociétés Ahlia tunisiennes, en identifiant les anomalies financières, les structures de gouvernance suspectes et les signaux de risque OSINT.
|
| 10 |
-
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
## 🏗️ Architecture du Système
|
| 14 |
-
|
| 15 |
-
```mermaid
|
| 16 |
-
graph TD
|
| 17 |
-
subgraph Frontend [React SPA - Vercel]
|
| 18 |
-
UI[Interface Utilisateur]
|
| 19 |
-
State[AuthContext & State]
|
| 20 |
-
Map[Leaflet Map]
|
| 21 |
-
Charts[Chart.js / Radar]
|
| 22 |
-
end
|
| 23 |
-
|
| 24 |
-
subgraph Backend [FastAPI - Railway]
|
| 25 |
-
API[V1 API Endpoints]
|
| 26 |
-
Auth[JWT JWT Service]
|
| 27 |
-
Logic[Business Logic / Red Flags]
|
| 28 |
-
end
|
| 29 |
-
|
| 30 |
-
subgraph Data [Storage]
|
| 31 |
-
DB[(SQLite - ba7ath_enriched.db)]
|
| 32 |
-
Vol[Railway Persistent Volume]
|
| 33 |
-
end
|
| 34 |
-
|
| 35 |
-
UI --> State
|
| 36 |
-
State --> API
|
| 37 |
-
API --> Auth
|
| 38 |
-
API --> Logic
|
| 39 |
-
Logic --> DB
|
| 40 |
-
DB -.-> Vol
|
| 41 |
-
```
|
| 42 |
-
|
| 43 |
-
---
|
| 44 |
-
|
| 45 |
-
## 🛠️ Stack Technique
|
| 46 |
-
|
| 47 |
-
### Backend
|
| 48 |
-
- **Framework**: FastAPI (Python)
|
| 49 |
-
- **Base de données**: SQLite avec SQLAlchemy ORM.
|
| 50 |
-
- **Authentification**: JWT Bearer avec hachage Argon2.
|
| 51 |
-
- **Service OSINT**: Logique personnalisée de détection de "Red Flags".
|
| 52 |
-
|
| 53 |
-
### Frontend
|
| 54 |
-
- **Framework**: React 18 (Vite).
|
| 55 |
-
- **Styling**: Tailwind CSS pour une interface premium et responsive.
|
| 56 |
-
- **Cartographie**: React-Leaflet pour la visualisation géographique des risques.
|
| 57 |
-
- **Visualisation**: Chart.js pour les graphiques radar et de comparaison.
|
| 58 |
-
|
| 59 |
-
---
|
| 60 |
-
|
| 61 |
-
## 🚀 Quick Start (Local)
|
| 62 |
-
|
| 63 |
-
### 1. Backend
|
| 64 |
-
```bash
|
| 65 |
-
cd backend
|
| 66 |
-
python -m venv venv
|
| 67 |
-
source venv/bin/activate # venv\Scripts\activate sur Windows
|
| 68 |
-
pip install -r requirements.txt
|
| 69 |
-
python create_admin.py # Initialiser l'admin par défaut
|
| 70 |
-
uvicorn app.main:app --reload
|
| 71 |
-
```
|
| 72 |
-
|
| 73 |
-
### 2. Frontend
|
| 74 |
-
```bash
|
| 75 |
-
cd microsite
|
| 76 |
-
npm install
|
| 77 |
-
npm run dev
|
| 78 |
-
```
|
| 79 |
-
|
| 80 |
-
---
|
| 81 |
-
|
| 82 |
-
## 📖 Documentation Détaillée
|
| 83 |
-
|
| 84 |
-
1. [**API Reference**](API_Reference.md) : Détail des endpoints et formats.
|
| 85 |
-
2. [**Authentication Guide**](Authentication_Guide.md) : Flux JWT et gestion admin.
|
| 86 |
-
3. [**Frontend Architecture**](Frontend_Architecture.md) : Structure des composants et hooks.
|
| 87 |
-
4. [**Database Schema**](Database_Schema.md) : Modèles SQLAlchemy et colonnes enrichies.
|
| 88 |
-
5. [**Deployment Guide**](Deployment_Guide.md) : Procédures Railway/Vercel.
|
| 89 |
-
6. [**OSINT Methodology**](OSINT_Methodology.md) : Calcul des risques et sources.
|
| 90 |
-
7. [**Troubleshooting**](Troubleshooting.md) : Problèmes connus et solutions.
|
| 91 |
-
8. [**Development Guide**](Development_Guide.md) : Workflow de contribution.
|
| 92 |
-
|
| 93 |
-
---
|
| 94 |
-
|
| 95 |
-
## 🕵️ Méthodologie OSINT
|
| 96 |
-
La plateforme agrège des données provenant du **RNE** (Registre National des Entreprises), du **JORT** (Journal Officiel) et des données de marchés publics pour générer des scores de risque basés sur :
|
| 97 |
-
- Le ratio Capital / Valeur des contrats.
|
| 98 |
-
- La fréquence des marchés de gré à gré (بالتراضي).
|
| 99 |
-
- La structure de gouvernance (Actionnaire unique, etc.).
|
| 100 |
-
|
| 101 |
-
---
|
| 102 |
-
|
| 103 |
-
## ⚖️ Licence
|
| 104 |
-
Projet interne - Tous droits réservés.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Troubleshooting.md
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
# 🔍 Troubleshooting Guide
|
| 2 |
-
|
| 3 |
-
Ce guide recense les erreurs courantes rencontrées lors du développement ou du déploiement de la plateforme Ba7ath.
|
| 4 |
-
|
| 5 |
-
## 1. Erreurs d'Authentification
|
| 6 |
-
|
| 7 |
-
### Symptôme : "401 Unauthorized" ou "403 Forbidden"
|
| 8 |
-
- **Cause 1**: Le token JWT a expiré.
|
| 9 |
-
- **Solution**: Se déconnecter et se reconnecter.
|
| 10 |
-
- **Cause 2**: Le frontend n'envoie pas le header `Authorization`.
|
| 11 |
-
- **Diagnostic**: Vérifiez dans l'onglet Network de votre navigateur si le header `Authorization: Bearer <token>` est présent.
|
| 12 |
-
- **Fix**: Assurez-vous que l'appel API utilise `authenticatedFetch`.
|
| 13 |
-
|
| 14 |
-
### Symptôme : Erreur de signature du token après redémarrage
|
| 15 |
-
- **Cause**: La `SECRET_KEY` n'est pas fixe et change à chaque redémarrage du serveur.
|
| 16 |
-
- **Fix**: Définir une `SECRET_KEY` statique dans les variables d'environnement.
|
| 17 |
-
|
| 18 |
-
---
|
| 19 |
-
|
| 20 |
-
## 2. Erreurs de Données (API 404)
|
| 21 |
-
|
| 22 |
-
### Symptôme : Les données enriched sont inaccessibles
|
| 23 |
-
- **Diagnostic**: L'URL appelée est incorrecte (ex: `/enrichment/list` au lieu de `/api/v1/enrichment/list`).
|
| 24 |
-
- **Fix**: Centraliser `API_BASE_URL` dans `config.js` et s'assurer qu'il inclut `/api/v1`.
|
| 25 |
-
|
| 26 |
-
### Symptôme : Les sociétés disparaissent au redéploiement Railway
|
| 27 |
-
- **Cause**: La base SQLite n'est pas sur un volume persistant.
|
| 28 |
-
- **Fix**: Monter un Volume Railway et pointer le chemin de la DB vers ce volume (`/data/ba7ath_enriched.db`).
|
| 29 |
-
|
| 30 |
-
---
|
| 31 |
-
|
| 32 |
-
## 3. Erreurs de Build (Frontend)
|
| 33 |
-
|
| 34 |
-
### Symptôme : `vite:html-inline-proxy` error
|
| 35 |
-
- **Cause**: Présence de blocs `<style>` inline dans `index.html` (bug spécifique à certains environnements Windows).
|
| 36 |
-
- **Fix**: Déplacer les styles vers `index.css` et configurer les polices dans `tailwind.config.js`.
|
| 37 |
-
|
| 38 |
-
---
|
| 39 |
-
|
| 40 |
-
## 🛠️ Diagnostics Utiles
|
| 41 |
-
|
| 42 |
-
**Logs Backend** :
|
| 43 |
-
```bash
|
| 44 |
-
# Sur Railway
|
| 45 |
-
railway logs
|
| 46 |
-
```
|
| 47 |
-
|
| 48 |
-
**Debugger React** :
|
| 49 |
-
Utilisez les **React DevTools** pour vérifier si `AuthContext` possède bien l'état `user` après le login.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
index.html
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
<!DOCTYPE html>
|
| 2 |
-
<html lang="ar" dir="rtl">
|
| 3 |
-
|
| 4 |
-
<head>
|
| 5 |
-
<meta charset="utf-8" />
|
| 6 |
-
<link rel="icon" href="/favicon.ico" />
|
| 7 |
-
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 8 |
-
<meta name="theme-color" content="#10B981" />
|
| 9 |
-
|
| 10 |
-
<meta name="description" content="لوحة تفاعلية لقراءة بيانات الشركات الأهلية في تونس حسب الولاية والنشاط." />
|
| 11 |
-
|
| 12 |
-
<link rel="apple-touch-icon" href="/logo192.png" />
|
| 13 |
-
|
| 14 |
-
<!-- خط عربي (اختياري) -->
|
| 15 |
-
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 16 |
-
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 17 |
-
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@300;400;600;700&display=swap"
|
| 18 |
-
rel="stylesheet" />
|
| 19 |
-
|
| 20 |
-
<!-- ملف manifest لتطبيق الويب -->
|
| 21 |
-
<link rel="manifest" href="/manifest.json" />
|
| 22 |
-
|
| 23 |
-
<title>الشركات الأهلية في تونس</title>
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
</head>
|
| 27 |
-
|
| 28 |
-
<body>
|
| 29 |
-
<noscript>يجب تفعيل جافاسكريبت لتشغيل هذا التطبيق.</noscript>
|
| 30 |
-
<div id="root"></div>
|
| 31 |
-
<script type="module" src="/src/index.jsx"></script>
|
| 32 |
-
</body>
|
| 33 |
-
|
| 34 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
package-lock.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
package.json
DELETED
|
@@ -1,54 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"name": "microsite",
|
| 3 |
-
"version": "0.1.0",
|
| 4 |
-
"private": true,
|
| 5 |
-
"dependencies": {
|
| 6 |
-
"@testing-library/dom": "^10.4.1",
|
| 7 |
-
"@testing-library/jest-dom": "^6.9.1",
|
| 8 |
-
"@testing-library/react": "^16.3.2",
|
| 9 |
-
"@testing-library/user-event": "^13.5.0",
|
| 10 |
-
"chart.js": "^4.5.1",
|
| 11 |
-
"framer-motion": "^12.34.3",
|
| 12 |
-
"leaflet": "^1.9.4",
|
| 13 |
-
"lucide-react": "^0.563.0",
|
| 14 |
-
"react": "^19.2.4",
|
| 15 |
-
"react-chartjs-2": "^5.3.1",
|
| 16 |
-
"react-dom": "^19.2.4",
|
| 17 |
-
"react-leaflet": "^5.0.0",
|
| 18 |
-
"react-router-dom": "^7.13.0",
|
| 19 |
-
"recharts": "^3.7.0"
|
| 20 |
-
},
|
| 21 |
-
"scripts": {
|
| 22 |
-
"dev": "vite",
|
| 23 |
-
"start": "vite",
|
| 24 |
-
"build": "vite build",
|
| 25 |
-
"preview": "vite preview",
|
| 26 |
-
"test": "react-scripts test",
|
| 27 |
-
"eject": "react-scripts eject"
|
| 28 |
-
},
|
| 29 |
-
"eslintConfig": {
|
| 30 |
-
"extends": [
|
| 31 |
-
"react-app",
|
| 32 |
-
"react-app/jest"
|
| 33 |
-
]
|
| 34 |
-
},
|
| 35 |
-
"browserslist": {
|
| 36 |
-
"production": [
|
| 37 |
-
">0.2%",
|
| 38 |
-
"not dead",
|
| 39 |
-
"not op_mini all"
|
| 40 |
-
],
|
| 41 |
-
"development": [
|
| 42 |
-
"last 1 chrome version",
|
| 43 |
-
"last 1 firefox version",
|
| 44 |
-
"last 1 safari version"
|
| 45 |
-
]
|
| 46 |
-
},
|
| 47 |
-
"devDependencies": {
|
| 48 |
-
"@vitejs/plugin-react": "^5.1.3",
|
| 49 |
-
"autoprefixer": "^10.4.24",
|
| 50 |
-
"postcss": "^8.5.6",
|
| 51 |
-
"tailwindcss": "^3.4.19",
|
| 52 |
-
"vite": "^7.3.1"
|
| 53 |
-
}
|
| 54 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
postcss.config.js
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
module.exports = {
|
| 2 |
-
plugins: {
|
| 3 |
-
tailwindcss: {},
|
| 4 |
-
autoprefixer: {},
|
| 5 |
-
},
|
| 6 |
-
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
project_tree.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
def list_files(startpath):
|
| 5 |
-
output = []
|
| 6 |
-
for root, dirs, files in os.walk(startpath):
|
| 7 |
-
level = root.replace(startpath, '').count(os.sep)
|
| 8 |
-
indent = ' ' * 4 * (level)
|
| 9 |
-
output.append('{}{}/'.format(indent, os.path.basename(root)))
|
| 10 |
-
subindent = ' ' * 4 * (level + 1)
|
| 11 |
-
for f in files:
|
| 12 |
-
if not f.startswith("."):
|
| 13 |
-
output.append('{}{}'.format(subindent, f))
|
| 14 |
-
return "\n".join(output)
|
| 15 |
-
|
| 16 |
-
print(list_files('.'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/data/enrich_companies.py
DELETED
|
@@ -1,95 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
|
| 3 |
-
with open("companies.json", "r", encoding="utf-8") as f:
|
| 4 |
-
data = json.load(f)
|
| 5 |
-
|
| 6 |
-
def normaliser_activite(raw):
|
| 7 |
-
if not raw:
|
| 8 |
-
return "غير مصنف"
|
| 9 |
-
a = raw.strip()
|
| 10 |
-
|
| 11 |
-
# Normalisation orthographique simple
|
| 12 |
-
a = a.replace("فلاحة/ ", "فلاحة / ")
|
| 13 |
-
a = a.replace("فلاحة/صيد", "فلاحة / صيد")
|
| 14 |
-
a = a.replace(" /", " / ").replace(" ", " ")
|
| 15 |
-
|
| 16 |
-
# AGRI / FORÊTS / PÊCHE
|
| 17 |
-
if "فلاحة" in a or "صيد" in a or "زراعة" in a or "تربية" in a or "حراجة" in a:
|
| 18 |
-
# Regrouper les sous-types
|
| 19 |
-
if "حراجة" in a:
|
| 20 |
-
return "حراجة و استغلال الغابات"
|
| 21 |
-
if "تربية الدواجن" in a:
|
| 22 |
-
return "تربية الدواجن"
|
| 23 |
-
if "تربية" in a:
|
| 24 |
-
return "تربية الحيوانات"
|
| 25 |
-
if "النباتات الصناعية" in a:
|
| 26 |
-
return "زراعة النباتات الصناعية"
|
| 27 |
-
if "الحبوب" in a:
|
| 28 |
-
return "زراعة الحبوب"
|
| 29 |
-
return "فلاحة و صيد و خدمات فلاحية"
|
| 30 |
-
|
| 31 |
-
# TRANSPORT
|
| 32 |
-
if "نقل" in a or "النقل البرّي" in a:
|
| 33 |
-
if "منتظم" in a or "المسافرين" in a:
|
| 34 |
-
return "نقل المسافرين"
|
| 35 |
-
if "خدمات ملحقة بالنقل" in a:
|
| 36 |
-
return "خدمات ملحقة بالنقل"
|
| 37 |
-
return "نقل بري و خدماته"
|
| 38 |
-
|
| 39 |
-
# ENVIRONNEMENT / DÉCHETS
|
| 40 |
-
if "الرسكلة" in a or "المستعملة" in a or "التطهير" in a or "الفضلات" in a:
|
| 41 |
-
if "التطهير" in a:
|
| 42 |
-
return "تطهير و نظافة و تصرف في الفضلات"
|
| 43 |
-
return "رسكلة المواد المستعملة"
|
| 44 |
-
|
| 45 |
-
# ENERGIE / MINES
|
| 46 |
-
if "الكهرباء" in a or "الغاز" in a or "الحرارة" in a:
|
| 47 |
-
return "إنتاج و توزيع الكهرباء و الغاز"
|
| 48 |
-
if "إستخراج الأحجار" in a or "إستخراج" in a:
|
| 49 |
-
return "صناعات إستخراجية"
|
| 50 |
-
|
| 51 |
-
# INDUSTRIE / TRANSFORMATION
|
| 52 |
-
if "صناعة" in a or "صنع" in a or "تحويل" in a or "القرميد" in a or "الآجر" in a or "المطاط" in a:
|
| 53 |
-
return "صناعات تحويلية و حرفية"
|
| 54 |
-
|
| 55 |
-
# LOISIRS / TOURISME
|
| 56 |
-
if "ترفيهية" in a or "سياحة" in a or "رياضية" in a or "ثقافية" in a:
|
| 57 |
-
return "أنشطة ترفيهية و ثقافية و سياحية"
|
| 58 |
-
|
| 59 |
-
# COMMERCE / SERVICES / SOCIAL
|
| 60 |
-
if "تجارة" in a:
|
| 61 |
-
return "تجارة"
|
| 62 |
-
if "خدمات جماعية" in a or "إجتماعية" in a or "شخصية" in a:
|
| 63 |
-
return "خدمات جماعية و إجتماعية"
|
| 64 |
-
if "التعليم" in a:
|
| 65 |
-
return "تعليم"
|
| 66 |
-
|
| 67 |
-
return a # fallback : laisser le texte original
|
| 68 |
-
|
| 69 |
-
def groupe_activite(norm):
|
| 70 |
-
if any(k in norm for k in ["فلاحة", "زراعة", "تربية", "حراجة"]):
|
| 71 |
-
return "AGRI_NATUREL"
|
| 72 |
-
if any(k in norm for k in ["نقل", "المسافرين"]):
|
| 73 |
-
return "TRANSPORT"
|
| 74 |
-
if any(k in norm for k in ["رسكلة", "تطهير", "الفضلات"]):
|
| 75 |
-
return "ENVIRONNEMENT"
|
| 76 |
-
if any(k in norm for k in ["الكهرباء", "الغاز", "إستخراج"]):
|
| 77 |
-
return "ENERGIE_MINES"
|
| 78 |
-
if any(k in norm for k in ["صناعات تحويلية", "صنع", "صناعة"]):
|
| 79 |
-
return "INDUSTRIE"
|
| 80 |
-
if any(k in norm for k in ["تجارة", "خدمات جماعية", "تعليم"]):
|
| 81 |
-
return "SERVICES_COM"
|
| 82 |
-
if any(k in norm for k in ["ترفيهية", "سياحية"]):
|
| 83 |
-
return "LOISIRS_TOURISME"
|
| 84 |
-
return "AUTRE"
|
| 85 |
-
|
| 86 |
-
for c in data:
|
| 87 |
-
raw = c.get("الموضوع / النشاط", "")
|
| 88 |
-
norm = normaliser_activite(raw)
|
| 89 |
-
c["activité_normalisée"] = norm
|
| 90 |
-
c["activité_groupe"] = groupe_activite(norm)
|
| 91 |
-
|
| 92 |
-
with open("companies_normalized.json", "w", encoding="utf-8") as f:
|
| 93 |
-
json.dump(data, f, ensure_ascii=False, indent=2)
|
| 94 |
-
|
| 95 |
-
print("✅ companies_normalized.json généré :", len(data), "lignes")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|