aatu18 commited on
Commit
fa13c21
·
verified ·
1 Parent(s): 0bd0f4d

add step 5 to logs

Browse files
Files changed (1) hide show
  1. methods/gdc_api_calls.py +5 -8
methods/gdc_api_calls.py CHANGED
@@ -272,6 +272,7 @@ def return_joint_single_cnv_frequency(cnv, cnv_change, cnv_change_5_category):
272
  if not total_number_of_cases_with_cnv_data:
273
  continue
274
 
 
275
  print('total number of cases with CNV data {}'.format(
276
  total_number_of_cases_with_cnv_data))
277
 
@@ -378,10 +379,6 @@ def get_freq_cnv_loss_or_gain(gene_entities, cancer_entities, query, cnv_and_ssm
378
  print("exception: {}".format(str(e)))
379
  continue
380
 
381
- # total_number_of_cases_with_cnv_data = get_available_cnv_data_for_project(ce)
382
- # skip if cannot obtain total # of cnv cases from API
383
- # if not total_number_of_cases_with_cnv_data:
384
- # continue
385
 
386
  if not ce in cnv:
387
  cnv[ce] = {}
@@ -393,11 +390,8 @@ def get_freq_cnv_loss_or_gain(gene_entities, cancer_entities, query, cnv_and_ssm
393
  if item["case"]["case_id"]:
394
  case_id_list.append(item["case"]["case_id"])
395
  number_of_cases_with_cnv_change = len(case_id_list)
396
- # freq = number_of_cases_with_cnv_change / total_number_of_cases_with_cnv_data
397
  cnv[ce][ge]["case_id_list"] = case_id_list
398
- # cnv[ce][ge]["frequency"] = round(freq * 100, 2)
399
 
400
- # print('debug: cnv {}'.format(cnv))
401
  if cnv_and_ssm_flag:
402
  return cnv
403
  else:
@@ -458,6 +452,7 @@ def get_msi_frequency(cancer_entities):
458
  msi_pos = msi_results.count('MSI')
459
  msi_total = len(msi_results)
460
  freq = msi_pos / msi_total
 
461
  print('obtained {} BAM files with MSI tag, out of a total of {} BAM files with MSI information'.format(
462
  msi_pos, msi_total
463
  ))
@@ -588,6 +583,8 @@ def run_cnv_ssm_api(decompose_result, cancer_entities, query):
588
  ssm_result = get_cases_with_ssms_in_a_gene(
589
  project=ce, gene_name=decompose_result["mut_gene"]
590
  )
 
 
591
  # calcuate overlap of cases and return freq
592
  print('getting shared cases with CNV and SSMs...')
593
  cases_with_ssm_and_cnvs = [
@@ -595,7 +592,6 @@ def run_cnv_ssm_api(decompose_result, cancer_entities, query):
595
  set(ssm_result["case_id_list"]),
596
  ]
597
  shared_cases = list(reduce(lambda x, y: x & y, cases_with_ssm_and_cnvs))
598
- total_case_count = get_total_variation_data_for_project(project=ce)
599
  print('number of shared_cases {}'.format(len(shared_cases)))
600
  print('total case count {}'.format(total_case_count))
601
  freq = round((len(shared_cases) / total_case_count) * 100, 2)
@@ -634,6 +630,7 @@ def get_top_cases_counts_by_gene(gene_entities, cancer_entities):
634
  "cases_without_mutations"
635
  ] = cases_without_mutations
636
  top_cases_counts_by_gene[ce]["total_case_count"] = total_case_count
 
637
  print('obtained {} cases with mutations and a total case count of {}'.format(
638
  cases_with_mutations, total_case_count
639
  ))
 
272
  if not total_number_of_cases_with_cnv_data:
273
  continue
274
 
275
+ print('\nStep 5: Query GDC and process results\n')
276
  print('total number of cases with CNV data {}'.format(
277
  total_number_of_cases_with_cnv_data))
278
 
 
379
  print("exception: {}".format(str(e)))
380
  continue
381
 
 
 
 
 
382
 
383
  if not ce in cnv:
384
  cnv[ce] = {}
 
390
  if item["case"]["case_id"]:
391
  case_id_list.append(item["case"]["case_id"])
392
  number_of_cases_with_cnv_change = len(case_id_list)
 
393
  cnv[ce][ge]["case_id_list"] = case_id_list
 
394
 
 
395
  if cnv_and_ssm_flag:
396
  return cnv
397
  else:
 
452
  msi_pos = msi_results.count('MSI')
453
  msi_total = len(msi_results)
454
  freq = msi_pos / msi_total
455
+ print('\nStep 5: Query GDC and process results\n')
456
  print('obtained {} BAM files with MSI tag, out of a total of {} BAM files with MSI information'.format(
457
  msi_pos, msi_total
458
  ))
 
583
  ssm_result = get_cases_with_ssms_in_a_gene(
584
  project=ce, gene_name=decompose_result["mut_gene"]
585
  )
586
+ total_case_count = get_total_variation_data_for_project(project=ce)
587
+ print('\nStep 5: Query GDC and process results\n')
588
  # calcuate overlap of cases and return freq
589
  print('getting shared cases with CNV and SSMs...')
590
  cases_with_ssm_and_cnvs = [
 
592
  set(ssm_result["case_id_list"]),
593
  ]
594
  shared_cases = list(reduce(lambda x, y: x & y, cases_with_ssm_and_cnvs))
 
595
  print('number of shared_cases {}'.format(len(shared_cases)))
596
  print('total case count {}'.format(total_case_count))
597
  freq = round((len(shared_cases) / total_case_count) * 100, 2)
 
630
  "cases_without_mutations"
631
  ] = cases_without_mutations
632
  top_cases_counts_by_gene[ce]["total_case_count"] = total_case_count
633
+ print('\nStep 5: Query GDC and process results\n')
634
  print('obtained {} cases with mutations and a total case count of {}'.format(
635
  cases_with_mutations, total_case_count
636
  ))