Spaces:
Running on Zero
Running on Zero
optimize get ssm counts
Browse files- methods/gdc_api_calls.py +39 -25
methods/gdc_api_calls.py
CHANGED
|
@@ -118,37 +118,51 @@ def get_ssm_id(gene, mutation):
|
|
| 118 |
return ssm_id
|
| 119 |
|
| 120 |
|
| 121 |
-
def get_ssm_counts(ssm_id):
|
| 122 |
# get project level counts of ssm
|
| 123 |
ssm_counts_by_project = {}
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
return ssm_counts_by_project
|
| 150 |
|
| 151 |
|
|
|
|
| 152 |
def get_available_cnv_data_for_project(project):
|
| 153 |
case_ssm_endpt = "https://api.gdc.cancer.gov/case_ssms"
|
| 154 |
fields = ["project.project_id", "available_variation_data"]
|
|
|
|
| 118 |
return ssm_id
|
| 119 |
|
| 120 |
|
| 121 |
+
def get_ssm_counts(ssm_id, cancer_entities):
|
| 122 |
# get project level counts of ssm
|
| 123 |
ssm_counts_by_project = {}
|
| 124 |
|
| 125 |
+
for ce in cancer_entities:
|
| 126 |
+
|
| 127 |
+
ssm_occurrences_endpt = "https://api.gdc.cancer.gov/ssm_occurrences"
|
| 128 |
+
fields = ["case.project.project_id", "case.case_id"]
|
| 129 |
+
fields = ",".join(fields)
|
| 130 |
+
filters = {
|
| 131 |
+
"op": "and",
|
| 132 |
+
"content": [
|
| 133 |
+
{
|
| 134 |
+
"op": '=',
|
| 135 |
+
"content": {"field": "ssm.ssm_id", "value": ssm_id}
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"op": "=",
|
| 139 |
+
"content": {"field": "case.project.project_id", "value": ce}
|
| 140 |
+
},
|
| 141 |
+
]}
|
| 142 |
+
params = {"filters": json.dumps(filters), "fields": fields, "size": 1000}
|
| 143 |
+
try:
|
| 144 |
+
response = requests.get(ssm_occurrences_endpt, params=params)
|
| 145 |
+
ssm_counts = json.loads(response.content)
|
| 146 |
+
for item in ssm_counts["data"]["hits"]:
|
| 147 |
+
project_name = item["case"]["project"]["project_id"]
|
| 148 |
+
case_id_list = "case_id_list"
|
| 149 |
+
if not project_name in ssm_counts_by_project:
|
| 150 |
+
ssm_counts_by_project[project_name] = {}
|
| 151 |
+
ssm_counts_by_project[project_name][case_id_list] = []
|
| 152 |
+
ssm_counts_by_project[project_name][case_id_list].append(
|
| 153 |
+
item["case"]["case_id"]
|
| 154 |
+
)
|
| 155 |
+
ssm_counts_by_project[project_name]["ssm_counts"] = (
|
| 156 |
+
ssm_counts_by_project[project_name]["ssm_counts"] + 1
|
| 157 |
+
if "ssm_counts" in ssm_counts_by_project[project_name]
|
| 158 |
+
else 1
|
| 159 |
+
)
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print("unable to execute GDC API request {}".format(str(e)))
|
| 162 |
return ssm_counts_by_project
|
| 163 |
|
| 164 |
|
| 165 |
+
|
| 166 |
def get_available_cnv_data_for_project(project):
|
| 167 |
case_ssm_endpt = "https://api.gdc.cancer.gov/case_ssms"
|
| 168 |
fields = ["project.project_id", "available_variation_data"]
|