Spaces:
Sleeping
Sleeping
add step 5 to logs
Browse files- methods/gdc_api_calls.py +5 -8
methods/gdc_api_calls.py
CHANGED
|
@@ -272,6 +272,7 @@ def return_joint_single_cnv_frequency(cnv, cnv_change, cnv_change_5_category):
|
|
| 272 |
if not total_number_of_cases_with_cnv_data:
|
| 273 |
continue
|
| 274 |
|
|
|
|
| 275 |
print('total number of cases with CNV data {}'.format(
|
| 276 |
total_number_of_cases_with_cnv_data))
|
| 277 |
|
|
@@ -378,10 +379,6 @@ def get_freq_cnv_loss_or_gain(gene_entities, cancer_entities, query, cnv_and_ssm
|
|
| 378 |
print("exception: {}".format(str(e)))
|
| 379 |
continue
|
| 380 |
|
| 381 |
-
# total_number_of_cases_with_cnv_data = get_available_cnv_data_for_project(ce)
|
| 382 |
-
# skip if cannot obtain total # of cnv cases from API
|
| 383 |
-
# if not total_number_of_cases_with_cnv_data:
|
| 384 |
-
# continue
|
| 385 |
|
| 386 |
if not ce in cnv:
|
| 387 |
cnv[ce] = {}
|
|
@@ -393,11 +390,8 @@ def get_freq_cnv_loss_or_gain(gene_entities, cancer_entities, query, cnv_and_ssm
|
|
| 393 |
if item["case"]["case_id"]:
|
| 394 |
case_id_list.append(item["case"]["case_id"])
|
| 395 |
number_of_cases_with_cnv_change = len(case_id_list)
|
| 396 |
-
# freq = number_of_cases_with_cnv_change / total_number_of_cases_with_cnv_data
|
| 397 |
cnv[ce][ge]["case_id_list"] = case_id_list
|
| 398 |
-
# cnv[ce][ge]["frequency"] = round(freq * 100, 2)
|
| 399 |
|
| 400 |
-
# print('debug: cnv {}'.format(cnv))
|
| 401 |
if cnv_and_ssm_flag:
|
| 402 |
return cnv
|
| 403 |
else:
|
|
@@ -458,6 +452,7 @@ def get_msi_frequency(cancer_entities):
|
|
| 458 |
msi_pos = msi_results.count('MSI')
|
| 459 |
msi_total = len(msi_results)
|
| 460 |
freq = msi_pos / msi_total
|
|
|
|
| 461 |
print('obtained {} BAM files with MSI tag, out of a total of {} BAM files with MSI information'.format(
|
| 462 |
msi_pos, msi_total
|
| 463 |
))
|
|
@@ -588,6 +583,8 @@ def run_cnv_ssm_api(decompose_result, cancer_entities, query):
|
|
| 588 |
ssm_result = get_cases_with_ssms_in_a_gene(
|
| 589 |
project=ce, gene_name=decompose_result["mut_gene"]
|
| 590 |
)
|
|
|
|
|
|
|
| 591 |
# calcuate overlap of cases and return freq
|
| 592 |
print('getting shared cases with CNV and SSMs...')
|
| 593 |
cases_with_ssm_and_cnvs = [
|
|
@@ -595,7 +592,6 @@ def run_cnv_ssm_api(decompose_result, cancer_entities, query):
|
|
| 595 |
set(ssm_result["case_id_list"]),
|
| 596 |
]
|
| 597 |
shared_cases = list(reduce(lambda x, y: x & y, cases_with_ssm_and_cnvs))
|
| 598 |
-
total_case_count = get_total_variation_data_for_project(project=ce)
|
| 599 |
print('number of shared_cases {}'.format(len(shared_cases)))
|
| 600 |
print('total case count {}'.format(total_case_count))
|
| 601 |
freq = round((len(shared_cases) / total_case_count) * 100, 2)
|
|
@@ -634,6 +630,7 @@ def get_top_cases_counts_by_gene(gene_entities, cancer_entities):
|
|
| 634 |
"cases_without_mutations"
|
| 635 |
] = cases_without_mutations
|
| 636 |
top_cases_counts_by_gene[ce]["total_case_count"] = total_case_count
|
|
|
|
| 637 |
print('obtained {} cases with mutations and a total case count of {}'.format(
|
| 638 |
cases_with_mutations, total_case_count
|
| 639 |
))
|
|
|
|
| 272 |
if not total_number_of_cases_with_cnv_data:
|
| 273 |
continue
|
| 274 |
|
| 275 |
+
print('\nStep 5: Query GDC and process results\n')
|
| 276 |
print('total number of cases with CNV data {}'.format(
|
| 277 |
total_number_of_cases_with_cnv_data))
|
| 278 |
|
|
|
|
| 379 |
print("exception: {}".format(str(e)))
|
| 380 |
continue
|
| 381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
if not ce in cnv:
|
| 384 |
cnv[ce] = {}
|
|
|
|
| 390 |
if item["case"]["case_id"]:
|
| 391 |
case_id_list.append(item["case"]["case_id"])
|
| 392 |
number_of_cases_with_cnv_change = len(case_id_list)
|
|
|
|
| 393 |
cnv[ce][ge]["case_id_list"] = case_id_list
|
|
|
|
| 394 |
|
|
|
|
| 395 |
if cnv_and_ssm_flag:
|
| 396 |
return cnv
|
| 397 |
else:
|
|
|
|
| 452 |
msi_pos = msi_results.count('MSI')
|
| 453 |
msi_total = len(msi_results)
|
| 454 |
freq = msi_pos / msi_total
|
| 455 |
+
print('\nStep 5: Query GDC and process results\n')
|
| 456 |
print('obtained {} BAM files with MSI tag, out of a total of {} BAM files with MSI information'.format(
|
| 457 |
msi_pos, msi_total
|
| 458 |
))
|
|
|
|
| 583 |
ssm_result = get_cases_with_ssms_in_a_gene(
|
| 584 |
project=ce, gene_name=decompose_result["mut_gene"]
|
| 585 |
)
|
| 586 |
+
total_case_count = get_total_variation_data_for_project(project=ce)
|
| 587 |
+
print('\nStep 5: Query GDC and process results\n')
|
| 588 |
# calcuate overlap of cases and return freq
|
| 589 |
print('getting shared cases with CNV and SSMs...')
|
| 590 |
cases_with_ssm_and_cnvs = [
|
|
|
|
| 592 |
set(ssm_result["case_id_list"]),
|
| 593 |
]
|
| 594 |
shared_cases = list(reduce(lambda x, y: x & y, cases_with_ssm_and_cnvs))
|
|
|
|
| 595 |
print('number of shared_cases {}'.format(len(shared_cases)))
|
| 596 |
print('total case count {}'.format(total_case_count))
|
| 597 |
freq = round((len(shared_cases) / total_case_count) * 100, 2)
|
|
|
|
| 630 |
"cases_without_mutations"
|
| 631 |
] = cases_without_mutations
|
| 632 |
top_cases_counts_by_gene[ce]["total_case_count"] = total_case_count
|
| 633 |
+
print('\nStep 5: Query GDC and process results\n')
|
| 634 |
print('obtained {} cases with mutations and a total case count of {}'.format(
|
| 635 |
cases_with_mutations, total_case_count
|
| 636 |
))
|