Spaces:
Sleeping
Sleeping
cd@bziiit.com commited on
Commit ·
76f1467
1
Parent(s): 93d8b6e
Update data handling and API request parameters for aides
Browse files- .gitignore +3 -1
- data/aides_territoires.py +5 -2
- data/get_aides_entreprises.py +4 -1
- data/get_les_aides.py +37 -27
- data/importToDb.py +10 -4
.gitignore
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
*/__pycache__/*
|
| 2 |
__pycache__
|
| 3 |
|
| 4 |
-
.env
|
|
|
|
|
|
|
|
|
| 1 |
*/__pycache__/*
|
| 2 |
__pycache__
|
| 3 |
|
| 4 |
+
.env
|
| 5 |
+
|
| 6 |
+
/data/*.json
|
data/aides_territoires.py
CHANGED
|
@@ -6,6 +6,9 @@ headersConnexion = {
|
|
| 6 |
'X-AUTH-TOKEN': 'eeb481e42950f1dbfc46dc348e6e32a0c631cc5b94dd7ab874a30c027f9de87c',
|
| 7 |
}
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def connexion():
|
| 11 |
conn.request("POST", "/api/connexion/", '', headersConnexion)
|
|
@@ -43,7 +46,7 @@ def request():
|
|
| 43 |
page = 1
|
| 44 |
|
| 45 |
while True:
|
| 46 |
-
conn.request("GET", f"/api/aids/?page={page}&organization_type_slugs=
|
| 47 |
res = conn.getresponse().read()
|
| 48 |
resData = res.decode("utf-8")
|
| 49 |
data = json.loads(resData)
|
|
@@ -76,5 +79,5 @@ for aide in aides:
|
|
| 76 |
|
| 77 |
|
| 78 |
# print(subventions)
|
| 79 |
-
with open('aides_territoires.json', 'w', encoding='utf-8') as f:
|
| 80 |
json.dump(aides, f, ensure_ascii=False, indent=4)
|
|
|
|
| 6 |
'X-AUTH-TOKEN': 'eeb481e42950f1dbfc46dc348e6e32a0c631cc5b94dd7ab874a30c027f9de87c',
|
| 7 |
}
|
| 8 |
|
| 9 |
+
perimeter_id = '71054-yvelines'
|
| 10 |
+
organization_type_slugs = 'private-sector'
|
| 11 |
+
|
| 12 |
|
| 13 |
def connexion():
|
| 14 |
conn.request("POST", "/api/connexion/", '', headersConnexion)
|
|
|
|
| 46 |
page = 1
|
| 47 |
|
| 48 |
while True:
|
| 49 |
+
conn.request("GET", f"/api/aids/?page={page}&organization_type_slugs={organization_type_slugs}&perimeter_id={perimeter_id}", '', headersRequest)
|
| 50 |
res = conn.getresponse().read()
|
| 51 |
resData = res.decode("utf-8")
|
| 52 |
data = json.loads(resData)
|
|
|
|
| 79 |
|
| 80 |
|
| 81 |
# print(subventions)
|
| 82 |
+
with open('data/aides_territoires.json', 'w', encoding='utf-8') as f:
|
| 83 |
json.dump(aides, f, ensure_ascii=False, indent=4)
|
data/get_aides_entreprises.py
CHANGED
|
@@ -8,10 +8,13 @@ headers = {
|
|
| 8 |
'X-Aidesentreprises-Key': 'waMF2TjO',
|
| 9 |
}
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def request(limit=20, offset=0):
|
| 13 |
print(f"Requesting {limit} subventions from offset {offset}")
|
| 14 |
-
conn.request("GET", f"/v1.1/aides?profils=
|
| 15 |
res = conn.getresponse()
|
| 16 |
data = res.read()
|
| 17 |
return json.loads(data.decode("utf-8"))['data']
|
|
|
|
| 8 |
'X-Aidesentreprises-Key': 'waMF2TjO',
|
| 9 |
}
|
| 10 |
|
| 11 |
+
profils = "10,14" # 10: ETI et grande entreprise, 14: Industrie
|
| 12 |
+
territoire = "103"
|
| 13 |
+
|
| 14 |
|
| 15 |
def request(limit=20, offset=0):
|
| 16 |
print(f"Requesting {limit} subventions from offset {offset}")
|
| 17 |
+
conn.request("GET", f"/v1.1/aides?profils={profils}&territoire={territoire}&limit={limit}&offset={offset}", payload, headers)
|
| 18 |
res = conn.getresponse()
|
| 19 |
data = res.read()
|
| 20 |
return json.loads(data.decode("utf-8"))['data']
|
data/get_les_aides.py
CHANGED
|
@@ -7,12 +7,15 @@ headers = {
|
|
| 7 |
'X-IDC': 'bcfac1828e5ef1b7cab084379a5f2a871e82ee7c',
|
| 8 |
}
|
| 9 |
|
| 10 |
-
|
| 11 |
-
filieres = [ 289, 290 ]
|
| 12 |
domaines = [893,883,877,790,793,798,802,805,862,807,810,813,816,820,818]
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
res = conn.getresponse()
|
| 17 |
data = res.read()
|
| 18 |
data = json.loads(data.decode("utf-8"))
|
|
@@ -25,11 +28,6 @@ def request(filiere, domaines):
|
|
| 25 |
return data['dispositifs']
|
| 26 |
|
| 27 |
|
| 28 |
-
def get_final_type(types):
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
return None # Return None if no matching type is found
|
| 32 |
-
|
| 33 |
|
| 34 |
def getAide(aide):
|
| 35 |
|
|
@@ -38,16 +36,10 @@ def getAide(aide):
|
|
| 38 |
res = conn.getresponse()
|
| 39 |
aide = json.loads(res.read().decode("utf-8"))
|
| 40 |
|
| 41 |
-
# if 'cci' in aide:
|
| 42 |
-
# del aide['cci']
|
| 43 |
-
# if 'url' in aide:
|
| 44 |
-
# del aide['url']
|
| 45 |
-
|
| 46 |
-
|
| 47 |
aide["metadata"] = {
|
| 48 |
-
# "type_aide": get_final_type(aide["prets"]),
|
| 49 |
"lien": aide['uri'],
|
| 50 |
-
"Source": f"https://les-aides.fr/"
|
|
|
|
| 51 |
}
|
| 52 |
|
| 53 |
return aide
|
|
@@ -58,6 +50,7 @@ def getAide(aide):
|
|
| 58 |
|
| 59 |
|
| 60 |
subventions = []
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
# Split domaines into two sections
|
|
@@ -66,21 +59,39 @@ domaines_section_1 = "[805,862,807,810,813,816,820,818]"
|
|
| 66 |
|
| 67 |
|
| 68 |
# Function to add dispositifs to subventions list without duplication
|
| 69 |
-
def
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
dispositifs = request(
|
| 73 |
for dispositif in dispositifs:
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
add_dispositifs(
|
| 81 |
-
add_dispositifs(
|
| 82 |
|
|
|
|
|
|
|
| 83 |
|
|
|
|
| 84 |
print(f"Nb aides : {len(subventions)}")
|
| 85 |
|
| 86 |
for i in range(len(subventions)):
|
|
@@ -88,6 +99,5 @@ for i in range(len(subventions)):
|
|
| 88 |
subventions[i] = getAide(subventions[i])
|
| 89 |
|
| 90 |
|
| 91 |
-
# print(subventions)
|
| 92 |
with open('data/les_aides.json', 'w', encoding='utf-8') as f:
|
| 93 |
json.dump(subventions, f, ensure_ascii=False, indent=4)
|
|
|
|
| 7 |
'X-IDC': 'bcfac1828e5ef1b7cab084379a5f2a871e82ee7c',
|
| 8 |
}
|
| 9 |
|
| 10 |
+
sirets = [ 88889319500014, 89765311900015, 53476514400025, 89125008600016, 98379850500019 ]
|
|
|
|
| 11 |
domaines = [893,883,877,790,793,798,802,805,862,807,810,813,816,820,818]
|
| 12 |
|
| 13 |
+
# region = 11
|
| 14 |
+
# filieres = [ 341 ] # 341: Automobile
|
| 15 |
+
|
| 16 |
+
def request(domaines, siret = None):
|
| 17 |
+
# ape=A®ion=75
|
| 18 |
+
conn.request("GET", f"/aides?siret={siret}&domaine={domaines}", payload, headers)
|
| 19 |
res = conn.getresponse()
|
| 20 |
data = res.read()
|
| 21 |
data = json.loads(data.decode("utf-8"))
|
|
|
|
| 28 |
return data['dispositifs']
|
| 29 |
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def getAide(aide):
|
| 33 |
|
|
|
|
| 36 |
res = conn.getresponse()
|
| 37 |
aide = json.loads(res.read().decode("utf-8"))
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
aide["metadata"] = {
|
|
|
|
| 40 |
"lien": aide['uri'],
|
| 41 |
+
"Source": f"https://les-aides.fr/",
|
| 42 |
+
"sirets": subventions_sirest[aide['numero']]
|
| 43 |
}
|
| 44 |
|
| 45 |
return aide
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
subventions = []
|
| 53 |
+
subventions_sirest = {}
|
| 54 |
|
| 55 |
|
| 56 |
# Split domaines into two sections
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
# Function to add dispositifs to subventions list without duplication
|
| 62 |
+
def is_dispositif_exists(numero):
|
| 63 |
+
"""Check if a dispositif with given numero exists and return it if found."""
|
| 64 |
+
for subvention in subventions:
|
| 65 |
+
if isinstance(subvention, dict) and subvention.get("numero") == numero:
|
| 66 |
+
return subvention
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
+
def add_dispositifs(domaines_section, siret=None):
|
| 70 |
+
print(f"Requesting domaines {domaines_section}")
|
| 71 |
|
| 72 |
+
dispositifs = request(domaines_section, siret)
|
| 73 |
for dispositif in dispositifs:
|
| 74 |
+
numero = dispositif.get("numero")
|
| 75 |
+
existing = is_dispositif_exists(numero)
|
| 76 |
+
if not existing:
|
| 77 |
+
subventions.append(dispositif) # Add dispositif to subventions list
|
| 78 |
+
|
| 79 |
+
if numero in subventions_sirest:
|
| 80 |
+
# print(f"Dispositif {numero} already exists for siret {subventions_sirest[numero]}")
|
| 81 |
+
subventions_sirest[numero].append(siret)
|
| 82 |
+
else:
|
| 83 |
+
subventions_sirest[numero] = [siret]
|
| 84 |
|
| 85 |
|
| 86 |
+
for siret in sirets:
|
| 87 |
+
print(f"Requesting siret {siret}")
|
| 88 |
+
add_dispositifs(domaines_section_1, siret)
|
| 89 |
+
add_dispositifs(domaines_section_2, siret)
|
| 90 |
|
| 91 |
+
# Call request function with different parameters
|
| 92 |
+
# for filiere in filieres:
|
| 93 |
|
| 94 |
+
print(f"Nb aides : {subventions_sirest}")
|
| 95 |
print(f"Nb aides : {len(subventions)}")
|
| 96 |
|
| 97 |
for i in range(len(subventions)):
|
|
|
|
| 99 |
subventions[i] = getAide(subventions[i])
|
| 100 |
|
| 101 |
|
|
|
|
| 102 |
with open('data/les_aides.json', 'w', encoding='utf-8') as f:
|
| 103 |
json.dump(subventions, f, ensure_ascii=False, indent=4)
|
data/importToDb.py
CHANGED
|
@@ -87,8 +87,12 @@ def importAideEntreprise(subvention, source):
|
|
| 87 |
if 'url' in subvention:
|
| 88 |
del subvention['url']
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
metadata = {
|
| 91 |
-
**
|
| 92 |
"id_subvention": subvention['numero'],
|
| 93 |
"deadline_date": -1,
|
| 94 |
"id_document": f"aides_{subvention['numero']}"
|
|
@@ -130,10 +134,12 @@ def go():
|
|
| 130 |
if file_name.endswith(".json"):
|
| 131 |
print(file_name)
|
| 132 |
file_path = os.path.join('data', file_name)
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
| 135 |
|
| 136 |
-
|
| 137 |
|
| 138 |
if __name__ == "__main__":
|
| 139 |
go()
|
|
|
|
| 87 |
if 'url' in subvention:
|
| 88 |
del subvention['url']
|
| 89 |
|
| 90 |
+
metadata = subvention.get("metadata", {})
|
| 91 |
+
if "sirets" in metadata:
|
| 92 |
+
del metadata["sirets"]
|
| 93 |
+
|
| 94 |
metadata = {
|
| 95 |
+
**metadata,
|
| 96 |
"id_subvention": subvention['numero'],
|
| 97 |
"deadline_date": -1,
|
| 98 |
"id_document": f"aides_{subvention['numero']}"
|
|
|
|
| 134 |
if file_name.endswith(".json"):
|
| 135 |
print(file_name)
|
| 136 |
file_path = os.path.join('data', file_name)
|
| 137 |
+
|
| 138 |
+
if file_name == "les_aides.json":
|
| 139 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 140 |
+
data = json.load(f)
|
| 141 |
|
| 142 |
+
loopSubventions(data,source=file_name)
|
| 143 |
|
| 144 |
if __name__ == "__main__":
|
| 145 |
go()
|