aatu18 commited on
Commit
48cd1b2
·
verified ·
1 Parent(s): 15ea2a4

optimize get ssm counts

Browse files
Files changed (1) hide show
  1. methods/gdc_api_calls.py +39 -25
methods/gdc_api_calls.py CHANGED
@@ -118,37 +118,51 @@ def get_ssm_id(gene, mutation):
118
  return ssm_id
119
 
120
 
121
- def get_ssm_counts(ssm_id):
122
  # get project level counts of ssm
123
  ssm_counts_by_project = {}
124
 
125
- ssm_occurrences_endpt = "https://api.gdc.cancer.gov/ssm_occurrences"
126
- fields = ["case.project.project_id", "case.case_id"]
127
- fields = ",".join(fields)
128
- filters = {"op": "=", "content": {"field": "ssm.ssm_id", "value": ssm_id}}
129
- params = {"filters": json.dumps(filters), "fields": fields, "size": 1000}
130
- try:
131
- response = requests.get(ssm_occurrences_endpt, params=params)
132
- ssm_counts = json.loads(response.content)
133
- for item in ssm_counts["data"]["hits"]:
134
- project_name = item["case"]["project"]["project_id"]
135
- case_id_list = "case_id_list"
136
- if not project_name in ssm_counts_by_project:
137
- ssm_counts_by_project[project_name] = {}
138
- ssm_counts_by_project[project_name][case_id_list] = []
139
- ssm_counts_by_project[project_name][case_id_list].append(
140
- item["case"]["case_id"]
141
- )
142
- ssm_counts_by_project[project_name]["ssm_counts"] = (
143
- ssm_counts_by_project[project_name]["ssm_counts"] + 1
144
- if "ssm_counts" in ssm_counts_by_project[project_name]
145
- else 1
146
- )
147
- except Exception as e:
148
- print("unable to execute GDC API request {}".format(str(e)))
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  return ssm_counts_by_project
150
 
151
 
 
152
  def get_available_cnv_data_for_project(project):
153
  case_ssm_endpt = "https://api.gdc.cancer.gov/case_ssms"
154
  fields = ["project.project_id", "available_variation_data"]
 
118
  return ssm_id
119
 
120
 
121
+ def get_ssm_counts(ssm_id, cancer_entities):
122
  # get project level counts of ssm
123
  ssm_counts_by_project = {}
124
 
125
+ for ce in cancer_entities:
126
+
127
+ ssm_occurrences_endpt = "https://api.gdc.cancer.gov/ssm_occurrences"
128
+ fields = ["case.project.project_id", "case.case_id"]
129
+ fields = ",".join(fields)
130
+ filters = {
131
+ "op": "and",
132
+ "content": [
133
+ {
134
+ "op": '=',
135
+ "content": {"field": "ssm.ssm_id", "value": ssm_id}
136
+ },
137
+ {
138
+ "op": "=",
139
+ "content": {"field": "case.project.project_id", "value": ce}
140
+ },
141
+ ]}
142
+ params = {"filters": json.dumps(filters), "fields": fields, "size": 1000}
143
+ try:
144
+ response = requests.get(ssm_occurrences_endpt, params=params)
145
+ ssm_counts = json.loads(response.content)
146
+ for item in ssm_counts["data"]["hits"]:
147
+ project_name = item["case"]["project"]["project_id"]
148
+ case_id_list = "case_id_list"
149
+ if not project_name in ssm_counts_by_project:
150
+ ssm_counts_by_project[project_name] = {}
151
+ ssm_counts_by_project[project_name][case_id_list] = []
152
+ ssm_counts_by_project[project_name][case_id_list].append(
153
+ item["case"]["case_id"]
154
+ )
155
+ ssm_counts_by_project[project_name]["ssm_counts"] = (
156
+ ssm_counts_by_project[project_name]["ssm_counts"] + 1
157
+ if "ssm_counts" in ssm_counts_by_project[project_name]
158
+ else 1
159
+ )
160
+ except Exception as e:
161
+ print("unable to execute GDC API request {}".format(str(e)))
162
  return ssm_counts_by_project
163
 
164
 
165
+
166
  def get_available_cnv_data_for_project(project):
167
  case_ssm_endpt = "https://api.gdc.cancer.gov/case_ssms"
168
  fields = ["project.project_id", "available_variation_data"]