initial commit
Browse files- .gitattributes +1 -0
- .gitignore +2 -0
- Dockerfile +21 -0
- app.py +76 -0
- app2.py +165 -0
- cbioportal_study_pmids.csv +733 -0
- index_dir/faiss.index +3 -0
- index_dir/meta.jsonl +3 -0
- mcp_server.py +200 -0
- pdf_semsearch.py +337 -0
- pmc_pdf_downloader.py +245 -0
- pmcids.txt +732 -0
- pmid2pmcid.py +219 -0
- pmid_to_pmcid.csv +733 -0
- pull_pdfs.py +55 -0
- requirements.txt +20 -0
- unfetched_pmcids.tsv +2 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
index_dir/* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_pdfs/
|
| 2 |
+
Pipfile
|
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the requirements file and install dependencies
|
| 8 |
+
COPY requirements.txt ./
|
| 9 |
+
RUN pip install -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Copy the rest of the application code
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
# Make port 8123 available to the world outside this container
|
| 15 |
+
EXPOSE 8123
|
| 16 |
+
|
| 17 |
+
# Define environment variable
|
| 18 |
+
ENV INDEX_DIR=./index_dir
|
| 19 |
+
|
| 20 |
+
# Run mcp_server.py when the container launches
|
| 21 |
+
CMD ["python", "mcp_server.py", "--host", "0.0.0.0", "--port", "8123"]
|
app.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from langchain_community.vectorstores import Chroma
|
| 4 |
+
from langchain_openai import OpenAIEmbeddings
|
| 5 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 6 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 7 |
+
|
| 8 |
+
# Create the pdfs directory if it doesn't exist
|
| 9 |
+
if not os.path.exists("pdfs"):
|
| 10 |
+
os.makedirs("pdfs")
|
| 11 |
+
|
| 12 |
+
def get_pdf_files():
|
| 13 |
+
"""Gets the list of PDF files from the 'pdfs' directory."""
|
| 14 |
+
return [f for f in os.listdir("pdfs") if f.endswith(".pdf")]
|
| 15 |
+
|
| 16 |
+
def index_pdfs():
|
| 17 |
+
"""Indexes the PDF files in the 'pdfs' directory."""
|
| 18 |
+
pdf_files = get_pdf_files()
|
| 19 |
+
if not pdf_files:
|
| 20 |
+
return "No PDF files found in the 'pdfs' directory."
|
| 21 |
+
|
| 22 |
+
success_files = []
|
| 23 |
+
failed_files = []
|
| 24 |
+
|
| 25 |
+
for pdf_file in pdf_files:
|
| 26 |
+
try:
|
| 27 |
+
file_path = os.path.join("pdfs", pdf_file)
|
| 28 |
+
if os.path.getsize(file_path) == 0:
|
| 29 |
+
failed_files.append(f"{pdf_file} (file is empty)")
|
| 30 |
+
continue
|
| 31 |
+
|
| 32 |
+
loader = PyPDFLoader(file_path)
|
| 33 |
+
documents = loader.load()
|
| 34 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 35 |
+
texts = text_splitter.split_documents(documents)
|
| 36 |
+
embeddings = OpenAIEmbeddings()
|
| 37 |
+
db = Chroma.from_documents(texts, embeddings, persist_directory="./db")
|
| 38 |
+
db.persist()
|
| 39 |
+
success_files.append(pdf_file)
|
| 40 |
+
except Exception as e:
|
| 41 |
+
failed_files.append(f"{pdf_file} (Error: {e})")
|
| 42 |
+
|
| 43 |
+
status = ""
|
| 44 |
+
if success_files:
|
| 45 |
+
status += f"Successfully indexed: {', '.join(success_files)}\n"
|
| 46 |
+
if failed_files:
|
| 47 |
+
status += f"Failed to index: {', '.join(failed_files)}"
|
| 48 |
+
|
| 49 |
+
return status if status else "No files were processed."
|
| 50 |
+
|
| 51 |
+
def search(query):
|
| 52 |
+
"""Searches the indexed PDFs for the given query."""
|
| 53 |
+
embeddings = OpenAIEmbeddings()
|
| 54 |
+
db = Chroma(persist_directory="./db", embedding_function=embeddings)
|
| 55 |
+
docs = db.similarity_search(query)
|
| 56 |
+
results = ""
|
| 57 |
+
for doc in docs:
|
| 58 |
+
results += f"Source: {doc.metadata['source']}\n"
|
| 59 |
+
results += f"Content: {doc.page_content}\n\n"
|
| 60 |
+
return results
|
| 61 |
+
|
| 62 |
+
with gr.Blocks() as demo:
|
| 63 |
+
gr.Markdown("# Simple Semantic Search App")
|
| 64 |
+
with gr.Tab("Index PDFs"):
|
| 65 |
+
pdf_files_display = gr.Textbox(label="Available PDF Files", interactive=False, value="\n".join(get_pdf_files()))
|
| 66 |
+
index_button = gr.Button("Index PDFs")
|
| 67 |
+
index_status = gr.Textbox(label="Indexing Status", interactive=False)
|
| 68 |
+
index_button.click(index_pdfs, inputs=None, outputs=index_status)
|
| 69 |
+
with gr.Tab("Search"):
|
| 70 |
+
search_query = gr.Textbox(label="Search Query")
|
| 71 |
+
search_button = gr.Button("Search")
|
| 72 |
+
search_results = gr.Textbox(label="Search Results", interactive=False)
|
| 73 |
+
search_button.click(search, inputs=search_query, outputs=search_results)
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
demo.launch()
|
app2.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from sentence_transformers import SentenceTransformer, CrossEncoder
|
| 9 |
+
import faiss
|
| 10 |
+
|
| 11 |
+
# --- Configuration ---
|
| 12 |
+
INDEX_DIR = "./index_dir"
|
| 13 |
+
EMBED_MODEL = "intfloat/e5-base-v2"
|
| 14 |
+
RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
| 15 |
+
FETCH_K = 40
|
| 16 |
+
TOP_K = 8
|
| 17 |
+
|
| 18 |
+
# --- Global variables to hold models and data ---
|
| 19 |
+
index = None
|
| 20 |
+
meta = None
|
| 21 |
+
embedder = None
|
| 22 |
+
reranker = None
|
| 23 |
+
|
| 24 |
+
# --- Utility Functions (adapted from pdf_semsearch.py) ---
|
| 25 |
+
|
| 26 |
+
def e5_prefix(text: str, is_query: bool, model_name: str) -> str:
|
| 27 |
+
"""Add E5-style prefixes if using an e5 model."""
|
| 28 |
+
if "e5" in model_name.lower():
|
| 29 |
+
return f"{'query' if is_query else 'passage'}: {text}"
|
| 30 |
+
return text
|
| 31 |
+
|
| 32 |
+
def read_metadata(meta_path: str) -> List[Dict]:
|
| 33 |
+
"""Reads metadata from a JSONL file."""
|
| 34 |
+
out = []
|
| 35 |
+
with open(meta_path, "r", encoding="utf-8") as f:
|
| 36 |
+
for line in f:
|
| 37 |
+
out.append(json.loads(line))
|
| 38 |
+
return out
|
| 39 |
+
|
| 40 |
+
def pretty_snippet(s: str, max_len: int = 320) -> str:
|
| 41 |
+
"""Cleans up and truncates text for display."""
|
| 42 |
+
s = " ".join(s.split())
|
| 43 |
+
return s if len(s) <= max_len else s[: max_len - 1] + "…"
|
| 44 |
+
|
| 45 |
+
# --- Model and Data Loading ---
|
| 46 |
+
|
| 47 |
+
def load_models_and_data():
|
| 48 |
+
"""Loads the FAISS index, metadata, and models into memory."""
|
| 49 |
+
global index, meta, embedder, reranker
|
| 50 |
+
|
| 51 |
+
index_path = os.path.join(INDEX_DIR, "faiss.index")
|
| 52 |
+
meta_path = os.path.join(INDEX_DIR, "meta.jsonl")
|
| 53 |
+
|
| 54 |
+
if not os.path.exists(index_path) or not os.path.exists(meta_path):
|
| 55 |
+
raise FileNotFoundError(
|
| 56 |
+
f"Index not found in '{INDEX_DIR}'. "
|
| 57 |
+
"Please run the indexing command from pdf_semsearch.py first."
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
print(f"[*] Loading FAISS index: {index_path}")
|
| 61 |
+
index = faiss.read_index(index_path)
|
| 62 |
+
|
| 63 |
+
print("[*] Loading metadata…")
|
| 64 |
+
meta = read_metadata(meta_path)
|
| 65 |
+
|
| 66 |
+
print(f"[*] Loading embedding model: {EMBED_MODEL}")
|
| 67 |
+
embedder = SentenceTransformer(EMBED_MODEL)
|
| 68 |
+
|
| 69 |
+
print(f"[*] Loading reranker model: {RERANKER_MODEL}")
|
| 70 |
+
reranker = CrossEncoder(RERANKER_MODEL)
|
| 71 |
+
print("[✓] Models and data loaded.")
|
| 72 |
+
|
| 73 |
+
# --- Search Function ---
|
| 74 |
+
|
| 75 |
+
def search(query: str):
|
| 76 |
+
"""
|
| 77 |
+
Performs semantic search on the loaded index.
|
| 78 |
+
Takes a user query, finds relevant chunks, reranks them, and returns formatted results.
|
| 79 |
+
"""
|
| 80 |
+
if not query or not query.strip():
|
| 81 |
+
return "Please enter a search query."
|
| 82 |
+
|
| 83 |
+
if not all([index, meta, embedder, reranker]):
|
| 84 |
+
return "Error: Models or data not loaded. Please check the console."
|
| 85 |
+
|
| 86 |
+
# 1. Embed the query
|
| 87 |
+
query_text = e5_prefix(query, is_query=True, model_name=EMBED_MODEL)
|
| 88 |
+
qvec = embedder.encode([query_text], normalize_embeddings=True).astype("float32")
|
| 89 |
+
|
| 90 |
+
# 2. Search the FAISS index
|
| 91 |
+
D, I = index.search(qvec, FETCH_K)
|
| 92 |
+
|
| 93 |
+
# 3. Retrieve candidates
|
| 94 |
+
candidates = []
|
| 95 |
+
for j, idx in enumerate(I[0]):
|
| 96 |
+
if idx == -1:
|
| 97 |
+
continue
|
| 98 |
+
rec = dict(meta[idx])
|
| 99 |
+
rec["ann_score"] = float(D[0][j])
|
| 100 |
+
candidates.append(rec)
|
| 101 |
+
|
| 102 |
+
if not candidates:
|
| 103 |
+
return "No results found."
|
| 104 |
+
|
| 105 |
+
# 4. Rerank the candidates
|
| 106 |
+
pairs = [(query, c["text"]) for c in candidates]
|
| 107 |
+
scores = reranker.predict(pairs)
|
| 108 |
+
for c, s in zip(candidates, scores):
|
| 109 |
+
c["rerank_score"] = float(s)
|
| 110 |
+
|
| 111 |
+
candidates.sort(key=lambda x: x["rerank_score"], reverse=True)
|
| 112 |
+
|
| 113 |
+
# 5. Format the top results for display
|
| 114 |
+
results = candidates[:TOP_K]
|
| 115 |
+
output = f"## Results for: \"{query}\"\n\n"
|
| 116 |
+
for i, r in enumerate(results, start=1):
|
| 117 |
+
base = Path(r["doc_path"]).name
|
| 118 |
+
score = r.get("rerank_score", r["ann_score"])
|
| 119 |
+
output += (
|
| 120 |
+
f"**{i}. {base} (Page: {r['page']}, Score: {score:.3f})**\n\n"
|
| 121 |
+
f"> {pretty_snippet(r['text'])}\n\n"
|
| 122 |
+
"---"
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
return output
|
| 126 |
+
|
| 127 |
+
# --- Gradio App ---
|
| 128 |
+
|
| 129 |
+
def create_gradio_app():
|
| 130 |
+
"""Creates and returns the Gradio interface."""
|
| 131 |
+
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
| 132 |
+
gr.Markdown(
|
| 133 |
+
"""
|
| 134 |
+
# Semantic PDF Search
|
| 135 |
+
Enter a query to search through the indexed PDF documents.
|
| 136 |
+
The index must be created first using `pdf_semsearch.py`.
|
| 137 |
+
"""
|
| 138 |
+
)
|
| 139 |
+
with gr.Row():
|
| 140 |
+
query_input = gr.Textbox(
|
| 141 |
+
label="Search Query",
|
| 142 |
+
placeholder="e.g., KRAS G12C eligibility in lung cancer",
|
| 143 |
+
lines=1,
|
| 144 |
+
scale=4,
|
| 145 |
+
)
|
| 146 |
+
search_button = gr.Button("Search", variant="primary", scale=1)
|
| 147 |
+
|
| 148 |
+
results_output = gr.Markdown(label="Search Results")
|
| 149 |
+
|
| 150 |
+
search_button.click(
|
| 151 |
+
fn=search,
|
| 152 |
+
inputs=query_input,
|
| 153 |
+
outputs=results_output,
|
| 154 |
+
)
|
| 155 |
+
query_input.submit(
|
| 156 |
+
fn=search,
|
| 157 |
+
inputs=query_input,
|
| 158 |
+
outputs=results_output,
|
| 159 |
+
)
|
| 160 |
+
return iface
|
| 161 |
+
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
load_models_and_data()
|
| 164 |
+
app = create_gradio_app()
|
| 165 |
+
app.launch()
|
cbioportal_study_pmids.csv
ADDED
|
@@ -0,0 +1,733 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
studyId,pmid
|
| 2 |
+
all_stjude_2015,25730765
|
| 3 |
+
acyc_fmi_2014,24418857
|
| 4 |
+
acyc_mgh_2016,26829750
|
| 5 |
+
appendiceal_msk_2022,36493333
|
| 6 |
+
blca_plasmacytoid_mskcc_2016,26901067
|
| 7 |
+
blca_mskcc_solit_2014,25092538
|
| 8 |
+
blca_nmibc_2017,28583311
|
| 9 |
+
brca_mapk_hp_msk_2021,34795269
|
| 10 |
+
bowel_colitis_msk_2022,36611031
|
| 11 |
+
bladder_columbia_msk_2018,29625057
|
| 12 |
+
bladder_msk_2023,37682528
|
| 13 |
+
bm_nsclc_mskcc_2023,37591896
|
| 14 |
+
cfdna_msk_2019,31768066
|
| 15 |
+
ccrcc_dfci_2019,29301960
|
| 16 |
+
cervix_msk_2023,37643132
|
| 17 |
+
chol_jhu_2013,24185509
|
| 18 |
+
chol_nccs_2013,24185513
|
| 19 |
+
chol_nus_2012,22561520
|
| 20 |
+
coadread_mskcc,25164765
|
| 21 |
+
cllsll_icgc_2011,22158541
|
| 22 |
+
coad_caseccc_2015,25583493
|
| 23 |
+
chol_msk_2018,29848569
|
| 24 |
+
chol_icgc_2017,28667006
|
| 25 |
+
coadread_mskresistance_2022,36355783
|
| 26 |
+
cscc_dfarber_2015,25589618
|
| 27 |
+
ctcl_columbia_2015,26551667
|
| 28 |
+
crc_eo_2020,34405229
|
| 29 |
+
crc_apc_impact_2020,32730818
|
| 30 |
+
crc_nigerian_2020,34819518
|
| 31 |
+
crc_dd_2022,35235413
|
| 32 |
+
difg_msk_2023,37910594
|
| 33 |
+
escc_ucla_2014,24686850
|
| 34 |
+
hcc_msk_venturaa_2018,30052636
|
| 35 |
+
gct_msk_2020,32897884
|
| 36 |
+
egc_msk_tp53_ccr_2022,35377946
|
| 37 |
+
gbc_mskcc_2022,36228155
|
| 38 |
+
gist_msk_2022,36593350
|
| 39 |
+
egc_msk_2023,37699004
|
| 40 |
+
hcc_jcopo_msk_2023,37769223
|
| 41 |
+
histiocytosis_cobi_msk_2019,30867592
|
| 42 |
+
ihch_ismms_2015,25608663
|
| 43 |
+
lgsoc_mapk_msk_2022,35443055
|
| 44 |
+
luad_tsp,18948947
|
| 45 |
+
lymphoma_cellline_msk_2020,33067607
|
| 46 |
+
lung_msk_mind_2020,36038778
|
| 47 |
+
mbc_msk_2021,33863915
|
| 48 |
+
mnm_washu_2016,27959731
|
| 49 |
+
metastatic_solid_tumors_mich_2017,28783718
|
| 50 |
+
mixed_kunga_msk_2022,35585047
|
| 51 |
+
msk_ch_ped_2021,35078859
|
| 52 |
+
mtnn_msk_2022,37078708
|
| 53 |
+
msk_ch_2023,38147626
|
| 54 |
+
nsclc_unito_2016,27346245
|
| 55 |
+
nsclc_ctdx_msk_2022,36357680
|
| 56 |
+
pediatric_dkfz_2017,29489754
|
| 57 |
+
paired_bladder_2022,36543146
|
| 58 |
+
scco_mskcc,24658004
|
| 59 |
+
sarc_mskcc,20601955
|
| 60 |
+
rectal_msk_2019,31591597
|
| 61 |
+
rbl_cfdna_msk_2020,32633890
|
| 62 |
+
rbl_mskcc_2020,33466343
|
| 63 |
+
rms_msk_2023,37315267
|
| 64 |
+
sarcoma_msk_2023,37350195
|
| 65 |
+
skcm_vanderbilt_mskcc_2015,32913971
|
| 66 |
+
soft_tissue_msk_2023,37730754
|
| 67 |
+
ucec_ccr_msk_2022,35849120
|
| 68 |
+
ucec_ancestry_cds_msk_2023,37651310
|
| 69 |
+
ucec_msk_2024,38653864
|
| 70 |
+
urcc_mskcc_2016,27713405
|
| 71 |
+
utuc_mskcc_2015,26278805
|
| 72 |
+
utuc_msk_2019,32332851
|
| 73 |
+
utuc_pdx_msk_2019,32332851
|
| 74 |
+
plmeso_msk_2024,38630790
|
| 75 |
+
pancreas_msk_2024,39214094
|
| 76 |
+
lms_msk_2024,38488807
|
| 77 |
+
prostate_msk_2024,38949888
|
| 78 |
+
panet_msk_2018,30687805
|
| 79 |
+
makeanimpact_ccr_2023,36862133
|
| 80 |
+
acbc_mskcc_2015,26095796
|
| 81 |
+
blca_tcga_pan_can_atlas_2018,29625048
|
| 82 |
+
blca_tcga_pan_can_atlas_2018,29596782
|
| 83 |
+
blca_tcga_pan_can_atlas_2018,29622463
|
| 84 |
+
blca_tcga_pan_can_atlas_2018,29617662
|
| 85 |
+
blca_tcga_pan_can_atlas_2018,29625055
|
| 86 |
+
blca_tcga_pan_can_atlas_2018,29625050
|
| 87 |
+
blca_tcga_pan_can_atlas_2018,29617662
|
| 88 |
+
blca_tcga_pan_can_atlas_2018,32214244
|
| 89 |
+
blca_tcga_pan_can_atlas_2018,29625049
|
| 90 |
+
blca_tcga_pan_can_atlas_2018,29850653
|
| 91 |
+
blca_tcga_pan_can_atlas_2018,36334560
|
| 92 |
+
acc_2019,31483290
|
| 93 |
+
blca_msk_tcga_2020,30290956
|
| 94 |
+
pcnsl_msk_2024,38995739
|
| 95 |
+
msk_ctdna_vte_2024,39147831
|
| 96 |
+
cellline_ccle_broad,22460905
|
| 97 |
+
ccrcc_irc_2014,24487277
|
| 98 |
+
ccrcc_utokyo_2013,23797736
|
| 99 |
+
coadread_genentech,22895193
|
| 100 |
+
cellline_nci60,22802077
|
| 101 |
+
cll_iuopa_2015,26200345
|
| 102 |
+
brca_metabric,27161491
|
| 103 |
+
brca_metabric,30867590
|
| 104 |
+
brca_metabric,22522925
|
| 105 |
+
coadread_dfci_2016,27149842
|
| 106 |
+
cll_broad_2015,26466571
|
| 107 |
+
brca_tcga_pan_can_atlas_2018,29625048
|
| 108 |
+
brca_tcga_pan_can_atlas_2018,29596782
|
| 109 |
+
brca_tcga_pan_can_atlas_2018,29622463
|
| 110 |
+
brca_tcga_pan_can_atlas_2018,29617662
|
| 111 |
+
brca_tcga_pan_can_atlas_2018,29625055
|
| 112 |
+
brca_tcga_pan_can_atlas_2018,29625050
|
| 113 |
+
brca_tcga_pan_can_atlas_2018,29617662
|
| 114 |
+
brca_tcga_pan_can_atlas_2018,30643250
|
| 115 |
+
brca_tcga_pan_can_atlas_2018,32214244
|
| 116 |
+
brca_tcga_pan_can_atlas_2018,29625049
|
| 117 |
+
brca_tcga_pan_can_atlas_2018,29850653
|
| 118 |
+
brca_tcga_pan_can_atlas_2018,36334560
|
| 119 |
+
cesc_tcga_pan_can_atlas_2018,29625048
|
| 120 |
+
cesc_tcga_pan_can_atlas_2018,29596782
|
| 121 |
+
cesc_tcga_pan_can_atlas_2018,29622463
|
| 122 |
+
cesc_tcga_pan_can_atlas_2018,29617662
|
| 123 |
+
cesc_tcga_pan_can_atlas_2018,29625055
|
| 124 |
+
cesc_tcga_pan_can_atlas_2018,29625050
|
| 125 |
+
cesc_tcga_pan_can_atlas_2018,29617662
|
| 126 |
+
cesc_tcga_pan_can_atlas_2018,30643250
|
| 127 |
+
cesc_tcga_pan_can_atlas_2018,32214244
|
| 128 |
+
cesc_tcga_pan_can_atlas_2018,29625049
|
| 129 |
+
cesc_tcga_pan_can_atlas_2018,29850653
|
| 130 |
+
cesc_tcga_pan_can_atlas_2018,36334560
|
| 131 |
+
chol_tcga_pan_can_atlas_2018,29625048
|
| 132 |
+
chol_tcga_pan_can_atlas_2018,29596782
|
| 133 |
+
chol_tcga_pan_can_atlas_2018,29622463
|
| 134 |
+
chol_tcga_pan_can_atlas_2018,29617662
|
| 135 |
+
chol_tcga_pan_can_atlas_2018,29625055
|
| 136 |
+
chol_tcga_pan_can_atlas_2018,29625050
|
| 137 |
+
chol_tcga_pan_can_atlas_2018,29617662
|
| 138 |
+
chol_tcga_pan_can_atlas_2018,32214244
|
| 139 |
+
chol_tcga_pan_can_atlas_2018,29625049
|
| 140 |
+
chol_tcga_pan_can_atlas_2018,29850653
|
| 141 |
+
chol_tcga_pan_can_atlas_2018,36334560
|
| 142 |
+
ccle_broad_2019,31068700
|
| 143 |
+
ccle_broad_2019,31978347
|
| 144 |
+
coad_cptac_2019,31031003
|
| 145 |
+
coadread_cass_2020,32888432
|
| 146 |
+
cll_broad_2022,35927489
|
| 147 |
+
coadread_tcga_pub,22810696
|
| 148 |
+
desm_broad_2015,26343386
|
| 149 |
+
dlbc_broad_2012,22343534
|
| 150 |
+
cscc_hgsc_bcm_2014,25303977
|
| 151 |
+
coadread_tcga_pan_can_atlas_2018,29625048
|
| 152 |
+
coadread_tcga_pan_can_atlas_2018,29596782
|
| 153 |
+
coadread_tcga_pan_can_atlas_2018,29622463
|
| 154 |
+
coadread_tcga_pan_can_atlas_2018,29617662
|
| 155 |
+
coadread_tcga_pan_can_atlas_2018,29625055
|
| 156 |
+
coadread_tcga_pan_can_atlas_2018,29625050
|
| 157 |
+
coadread_tcga_pan_can_atlas_2018,29617662
|
| 158 |
+
coadread_tcga_pan_can_atlas_2018,30643250
|
| 159 |
+
coadread_tcga_pan_can_atlas_2018,32214244
|
| 160 |
+
coadread_tcga_pan_can_atlas_2018,29625049
|
| 161 |
+
coadread_tcga_pan_can_atlas_2018,29850653
|
| 162 |
+
coadread_tcga_pan_can_atlas_2018,36334560
|
| 163 |
+
dlbc_tcga_pan_can_atlas_2018,29625048
|
| 164 |
+
dlbc_tcga_pan_can_atlas_2018,29596782
|
| 165 |
+
dlbc_tcga_pan_can_atlas_2018,29622463
|
| 166 |
+
dlbc_tcga_pan_can_atlas_2018,29617662
|
| 167 |
+
dlbc_tcga_pan_can_atlas_2018,29625055
|
| 168 |
+
dlbc_tcga_pan_can_atlas_2018,29625050
|
| 169 |
+
dlbc_tcga_pan_can_atlas_2018,29617662
|
| 170 |
+
dlbc_tcga_pan_can_atlas_2018,32214244
|
| 171 |
+
dlbc_tcga_pan_can_atlas_2018,29625049
|
| 172 |
+
dlbc_tcga_pan_can_atlas_2018,29850653
|
| 173 |
+
dlbc_tcga_pan_can_atlas_2018,36334560
|
| 174 |
+
difg_glass_2019,31748746
|
| 175 |
+
cscc_ucsf_2021,34272401
|
| 176 |
+
cscc_ranson_2022,35982973
|
| 177 |
+
difg_glass,35649412
|
| 178 |
+
difg_glass,38117484
|
| 179 |
+
es_dfarber_broad_2014,25186949
|
| 180 |
+
esca_tcga_pan_can_atlas_2018,29625048
|
| 181 |
+
esca_tcga_pan_can_atlas_2018,29596782
|
| 182 |
+
esca_tcga_pan_can_atlas_2018,29622463
|
| 183 |
+
esca_tcga_pan_can_atlas_2018,29617662
|
| 184 |
+
esca_tcga_pan_can_atlas_2018,29625055
|
| 185 |
+
esca_tcga_pan_can_atlas_2018,29625050
|
| 186 |
+
esca_tcga_pan_can_atlas_2018,29617662
|
| 187 |
+
esca_tcga_pan_can_atlas_2018,32214244
|
| 188 |
+
esca_tcga_pan_can_atlas_2018,29625049
|
| 189 |
+
esca_tcga_pan_can_atlas_2018,29850653
|
| 190 |
+
esca_tcga_pan_can_atlas_2018,36334560
|
| 191 |
+
gist_msk_2023,37477937
|
| 192 |
+
kirc_tcga_pub,23792563
|
| 193 |
+
hcc_msk_2024,38864854
|
| 194 |
+
laml_tcga_pub,23634996
|
| 195 |
+
luad_mskcc_2023_met_organotropism,37084736
|
| 196 |
+
mbl_sickkids_2016,26760213
|
| 197 |
+
mixed_pipseq_2017,28007021
|
| 198 |
+
mds_mskcc_2020,27276561
|
| 199 |
+
mds_mskcc_2020,30333627
|
| 200 |
+
mds_mskcc_2020,24030381
|
| 201 |
+
mixed_msk_tcga_2021,34635660
|
| 202 |
+
nhl_bcgsc_2011,21796119
|
| 203 |
+
nhl_bcgsc_2013,23699601
|
| 204 |
+
prad_broad_2013,23622249
|
| 205 |
+
prad_mskcc,20579941
|
| 206 |
+
prad_tcga_pub,26544944
|
| 207 |
+
pcpg_tcga_pub,28162975
|
| 208 |
+
pptc_2019,31693904
|
| 209 |
+
prad_cdk12_mskcc_2020,32317181
|
| 210 |
+
prad_pik3r1_msk_2021,35670774
|
| 211 |
+
pog570_bcgsc_2020,35121966
|
| 212 |
+
prad_organoids_msk_2022,35617398
|
| 213 |
+
ptad_msk_2024,38758238
|
| 214 |
+
prad_msk_mdanderson_2023,38488813
|
| 215 |
+
stad_tcga_pub,25079317
|
| 216 |
+
rectal_msk_2022,35970919
|
| 217 |
+
sarcoma_msk_2022,35705558
|
| 218 |
+
hnsc_tcga_pan_can_atlas_2018,29625048
|
| 219 |
+
hnsc_tcga_pan_can_atlas_2018,29596782
|
| 220 |
+
hnsc_tcga_pan_can_atlas_2018,29622463
|
| 221 |
+
hnsc_tcga_pan_can_atlas_2018,29617662
|
| 222 |
+
hnsc_tcga_pan_can_atlas_2018,29625055
|
| 223 |
+
hnsc_tcga_pan_can_atlas_2018,29625050
|
| 224 |
+
hnsc_tcga_pan_can_atlas_2018,29617662
|
| 225 |
+
hnsc_tcga_pan_can_atlas_2018,30643250
|
| 226 |
+
hnsc_tcga_pan_can_atlas_2018,32214244
|
| 227 |
+
hnsc_tcga_pan_can_atlas_2018,29625049
|
| 228 |
+
hnsc_tcga_pan_can_atlas_2018,29850653
|
| 229 |
+
hnsc_tcga_pan_can_atlas_2018,36334560
|
| 230 |
+
kich_tcga_pan_can_atlas_2018,29625048
|
| 231 |
+
kich_tcga_pan_can_atlas_2018,29596782
|
| 232 |
+
kich_tcga_pan_can_atlas_2018,29622463
|
| 233 |
+
kich_tcga_pan_can_atlas_2018,29617662
|
| 234 |
+
kich_tcga_pan_can_atlas_2018,29625055
|
| 235 |
+
kich_tcga_pan_can_atlas_2018,29625050
|
| 236 |
+
kich_tcga_pan_can_atlas_2018,29617662
|
| 237 |
+
kich_tcga_pan_can_atlas_2018,32214244
|
| 238 |
+
kich_tcga_pan_can_atlas_2018,29625049
|
| 239 |
+
kich_tcga_pan_can_atlas_2018,29850653
|
| 240 |
+
kich_tcga_pan_can_atlas_2018,36334560
|
| 241 |
+
kirc_tcga_pan_can_atlas_2018,29625048
|
| 242 |
+
kirc_tcga_pan_can_atlas_2018,29596782
|
| 243 |
+
kirc_tcga_pan_can_atlas_2018,29622463
|
| 244 |
+
kirc_tcga_pan_can_atlas_2018,29617662
|
| 245 |
+
kirc_tcga_pan_can_atlas_2018,29625055
|
| 246 |
+
kirc_tcga_pan_can_atlas_2018,29625050
|
| 247 |
+
kirc_tcga_pan_can_atlas_2018,29617662
|
| 248 |
+
kirc_tcga_pan_can_atlas_2018,30643250
|
| 249 |
+
kirc_tcga_pan_can_atlas_2018,32214244
|
| 250 |
+
kirc_tcga_pan_can_atlas_2018,29625049
|
| 251 |
+
kirc_tcga_pan_can_atlas_2018,29850653
|
| 252 |
+
kirc_tcga_pan_can_atlas_2018,36334560
|
| 253 |
+
kirp_tcga_pan_can_atlas_2018,29625048
|
| 254 |
+
kirp_tcga_pan_can_atlas_2018,29596782
|
| 255 |
+
kirp_tcga_pan_can_atlas_2018,29622463
|
| 256 |
+
kirp_tcga_pan_can_atlas_2018,29617662
|
| 257 |
+
kirp_tcga_pan_can_atlas_2018,29625055
|
| 258 |
+
kirp_tcga_pan_can_atlas_2018,29625050
|
| 259 |
+
kirp_tcga_pan_can_atlas_2018,29617662
|
| 260 |
+
kirp_tcga_pan_can_atlas_2018,30643250
|
| 261 |
+
kirp_tcga_pan_can_atlas_2018,32214244
|
| 262 |
+
kirp_tcga_pan_can_atlas_2018,29625049
|
| 263 |
+
kirp_tcga_pan_can_atlas_2018,29850653
|
| 264 |
+
kirp_tcga_pan_can_atlas_2018,36334560
|
| 265 |
+
laml_tcga_pan_can_atlas_2018,29625048
|
| 266 |
+
laml_tcga_pan_can_atlas_2018,29596782
|
| 267 |
+
laml_tcga_pan_can_atlas_2018,29622463
|
| 268 |
+
laml_tcga_pan_can_atlas_2018,29617662
|
| 269 |
+
laml_tcga_pan_can_atlas_2018,29625055
|
| 270 |
+
laml_tcga_pan_can_atlas_2018,29625050
|
| 271 |
+
laml_tcga_pan_can_atlas_2018,29617662
|
| 272 |
+
laml_tcga_pan_can_atlas_2018,32214244
|
| 273 |
+
laml_tcga_pan_can_atlas_2018,29625049
|
| 274 |
+
laml_tcga_pan_can_atlas_2018,29850653
|
| 275 |
+
laml_tcga_pan_can_atlas_2018,36334560
|
| 276 |
+
lihc_tcga_pan_can_atlas_2018,29625048
|
| 277 |
+
lihc_tcga_pan_can_atlas_2018,29596782
|
| 278 |
+
lihc_tcga_pan_can_atlas_2018,29622463
|
| 279 |
+
lihc_tcga_pan_can_atlas_2018,29617662
|
| 280 |
+
lihc_tcga_pan_can_atlas_2018,29625055
|
| 281 |
+
lihc_tcga_pan_can_atlas_2018,29625050
|
| 282 |
+
lihc_tcga_pan_can_atlas_2018,29617662
|
| 283 |
+
lihc_tcga_pan_can_atlas_2018,30643250
|
| 284 |
+
lihc_tcga_pan_can_atlas_2018,32214244
|
| 285 |
+
lihc_tcga_pan_can_atlas_2018,29625049
|
| 286 |
+
lihc_tcga_pan_can_atlas_2018,29850653
|
| 287 |
+
lihc_tcga_pan_can_atlas_2018,36334560
|
| 288 |
+
luad_tcga_pan_can_atlas_2018,29625048
|
| 289 |
+
luad_tcga_pan_can_atlas_2018,29596782
|
| 290 |
+
luad_tcga_pan_can_atlas_2018,29622463
|
| 291 |
+
luad_tcga_pan_can_atlas_2018,29617662
|
| 292 |
+
luad_tcga_pan_can_atlas_2018,29625055
|
| 293 |
+
luad_tcga_pan_can_atlas_2018,29625050
|
| 294 |
+
luad_tcga_pan_can_atlas_2018,29617662
|
| 295 |
+
luad_tcga_pan_can_atlas_2018,30643250
|
| 296 |
+
luad_tcga_pan_can_atlas_2018,32214244
|
| 297 |
+
luad_tcga_pan_can_atlas_2018,29625049
|
| 298 |
+
luad_tcga_pan_can_atlas_2018,29850653
|
| 299 |
+
luad_tcga_pan_can_atlas_2018,36334560
|
| 300 |
+
lusc_tcga_pan_can_atlas_2018,29625048
|
| 301 |
+
lusc_tcga_pan_can_atlas_2018,29596782
|
| 302 |
+
lusc_tcga_pan_can_atlas_2018,29622463
|
| 303 |
+
lusc_tcga_pan_can_atlas_2018,29617662
|
| 304 |
+
lusc_tcga_pan_can_atlas_2018,29625055
|
| 305 |
+
lusc_tcga_pan_can_atlas_2018,29625050
|
| 306 |
+
lusc_tcga_pan_can_atlas_2018,29617662
|
| 307 |
+
lusc_tcga_pan_can_atlas_2018,30643250
|
| 308 |
+
lusc_tcga_pan_can_atlas_2018,32214244
|
| 309 |
+
lusc_tcga_pan_can_atlas_2018,29625049
|
| 310 |
+
lusc_tcga_pan_can_atlas_2018,29850653
|
| 311 |
+
lusc_tcga_pan_can_atlas_2018,36334560
|
| 312 |
+
meso_tcga_pan_can_atlas_2018,29625048
|
| 313 |
+
meso_tcga_pan_can_atlas_2018,29596782
|
| 314 |
+
meso_tcga_pan_can_atlas_2018,29622463
|
| 315 |
+
meso_tcga_pan_can_atlas_2018,29617662
|
| 316 |
+
meso_tcga_pan_can_atlas_2018,29625055
|
| 317 |
+
meso_tcga_pan_can_atlas_2018,29625050
|
| 318 |
+
meso_tcga_pan_can_atlas_2018,29617662
|
| 319 |
+
meso_tcga_pan_can_atlas_2018,32214244
|
| 320 |
+
meso_tcga_pan_can_atlas_2018,29625049
|
| 321 |
+
meso_tcga_pan_can_atlas_2018,29850653
|
| 322 |
+
meso_tcga_pan_can_atlas_2018,36334560
|
| 323 |
+
ov_tcga_pan_can_atlas_2018,29625048
|
| 324 |
+
ov_tcga_pan_can_atlas_2018,29596782
|
| 325 |
+
ov_tcga_pan_can_atlas_2018,29622463
|
| 326 |
+
ov_tcga_pan_can_atlas_2018,29617662
|
| 327 |
+
ov_tcga_pan_can_atlas_2018,29625055
|
| 328 |
+
ov_tcga_pan_can_atlas_2018,29625050
|
| 329 |
+
ov_tcga_pan_can_atlas_2018,29617662
|
| 330 |
+
ov_tcga_pan_can_atlas_2018,30643250
|
| 331 |
+
ov_tcga_pan_can_atlas_2018,32214244
|
| 332 |
+
ov_tcga_pan_can_atlas_2018,29625049
|
| 333 |
+
ov_tcga_pan_can_atlas_2018,29850653
|
| 334 |
+
ov_tcga_pan_can_atlas_2018,36334560
|
| 335 |
+
paad_tcga_pan_can_atlas_2018,29625048
|
| 336 |
+
paad_tcga_pan_can_atlas_2018,29596782
|
| 337 |
+
paad_tcga_pan_can_atlas_2018,29622463
|
| 338 |
+
paad_tcga_pan_can_atlas_2018,29617662
|
| 339 |
+
paad_tcga_pan_can_atlas_2018,29625055
|
| 340 |
+
paad_tcga_pan_can_atlas_2018,29625050
|
| 341 |
+
paad_tcga_pan_can_atlas_2018,29617662
|
| 342 |
+
paad_tcga_pan_can_atlas_2018,30643250
|
| 343 |
+
paad_tcga_pan_can_atlas_2018,32214244
|
| 344 |
+
paad_tcga_pan_can_atlas_2018,29625049
|
| 345 |
+
paad_tcga_pan_can_atlas_2018,29850653
|
| 346 |
+
paad_tcga_pan_can_atlas_2018,36334560
|
| 347 |
+
pcpg_tcga_pan_can_atlas_2018,29625048
|
| 348 |
+
pcpg_tcga_pan_can_atlas_2018,29596782
|
| 349 |
+
pcpg_tcga_pan_can_atlas_2018,29622463
|
| 350 |
+
pcpg_tcga_pan_can_atlas_2018,29617662
|
| 351 |
+
pcpg_tcga_pan_can_atlas_2018,29625055
|
| 352 |
+
pcpg_tcga_pan_can_atlas_2018,29625050
|
| 353 |
+
pcpg_tcga_pan_can_atlas_2018,29617662
|
| 354 |
+
pcpg_tcga_pan_can_atlas_2018,30643250
|
| 355 |
+
pcpg_tcga_pan_can_atlas_2018,32214244
|
| 356 |
+
pcpg_tcga_pan_can_atlas_2018,29625049
|
| 357 |
+
pcpg_tcga_pan_can_atlas_2018,29850653
|
| 358 |
+
pcpg_tcga_pan_can_atlas_2018,36334560
|
| 359 |
+
prad_tcga_pan_can_atlas_2018,29625048
|
| 360 |
+
prad_tcga_pan_can_atlas_2018,29596782
|
| 361 |
+
prad_tcga_pan_can_atlas_2018,29622463
|
| 362 |
+
prad_tcga_pan_can_atlas_2018,29617662
|
| 363 |
+
prad_tcga_pan_can_atlas_2018,29625055
|
| 364 |
+
prad_tcga_pan_can_atlas_2018,29625050
|
| 365 |
+
prad_tcga_pan_can_atlas_2018,29617662
|
| 366 |
+
prad_tcga_pan_can_atlas_2018,30643250
|
| 367 |
+
prad_tcga_pan_can_atlas_2018,32214244
|
| 368 |
+
prad_tcga_pan_can_atlas_2018,29625049
|
| 369 |
+
prad_tcga_pan_can_atlas_2018,29850653
|
| 370 |
+
prad_tcga_pan_can_atlas_2018,36334560
|
| 371 |
+
sarc_tcga_pan_can_atlas_2018,29625048
|
| 372 |
+
sarc_tcga_pan_can_atlas_2018,29596782
|
| 373 |
+
sarc_tcga_pan_can_atlas_2018,29622463
|
| 374 |
+
sarc_tcga_pan_can_atlas_2018,29617662
|
| 375 |
+
sarc_tcga_pan_can_atlas_2018,29625055
|
| 376 |
+
sarc_tcga_pan_can_atlas_2018,29625050
|
| 377 |
+
sarc_tcga_pan_can_atlas_2018,29617662
|
| 378 |
+
sarc_tcga_pan_can_atlas_2018,32214244
|
| 379 |
+
sarc_tcga_pan_can_atlas_2018,29625049
|
| 380 |
+
sarc_tcga_pan_can_atlas_2018,29850653
|
| 381 |
+
sarc_tcga_pan_can_atlas_2018,36334560
|
| 382 |
+
skcm_tcga_pan_can_atlas_2018,29625048
|
| 383 |
+
skcm_tcga_pan_can_atlas_2018,29596782
|
| 384 |
+
skcm_tcga_pan_can_atlas_2018,29622463
|
| 385 |
+
skcm_tcga_pan_can_atlas_2018,29617662
|
| 386 |
+
skcm_tcga_pan_can_atlas_2018,29625055
|
| 387 |
+
skcm_tcga_pan_can_atlas_2018,29625050
|
| 388 |
+
skcm_tcga_pan_can_atlas_2018,29617662
|
| 389 |
+
skcm_tcga_pan_can_atlas_2018,30643250
|
| 390 |
+
skcm_tcga_pan_can_atlas_2018,32214244
|
| 391 |
+
skcm_tcga_pan_can_atlas_2018,29625049
|
| 392 |
+
skcm_tcga_pan_can_atlas_2018,29850653
|
| 393 |
+
skcm_tcga_pan_can_atlas_2018,36334560
|
| 394 |
+
stad_tcga_pan_can_atlas_2018,29625048
|
| 395 |
+
stad_tcga_pan_can_atlas_2018,29596782
|
| 396 |
+
stad_tcga_pan_can_atlas_2018,29622463
|
| 397 |
+
stad_tcga_pan_can_atlas_2018,29617662
|
| 398 |
+
stad_tcga_pan_can_atlas_2018,29625055
|
| 399 |
+
stad_tcga_pan_can_atlas_2018,29625050
|
| 400 |
+
stad_tcga_pan_can_atlas_2018,29617662
|
| 401 |
+
stad_tcga_pan_can_atlas_2018,32214244
|
| 402 |
+
stad_tcga_pan_can_atlas_2018,29625049
|
| 403 |
+
stad_tcga_pan_can_atlas_2018,29850653
|
| 404 |
+
stad_tcga_pan_can_atlas_2018,36334560
|
| 405 |
+
tgct_tcga_pan_can_atlas_2018,29625048
|
| 406 |
+
tgct_tcga_pan_can_atlas_2018,29596782
|
| 407 |
+
tgct_tcga_pan_can_atlas_2018,29622463
|
| 408 |
+
tgct_tcga_pan_can_atlas_2018,29617662
|
| 409 |
+
tgct_tcga_pan_can_atlas_2018,29625055
|
| 410 |
+
tgct_tcga_pan_can_atlas_2018,29625050
|
| 411 |
+
tgct_tcga_pan_can_atlas_2018,29617662
|
| 412 |
+
tgct_tcga_pan_can_atlas_2018,32214244
|
| 413 |
+
tgct_tcga_pan_can_atlas_2018,29625049
|
| 414 |
+
tgct_tcga_pan_can_atlas_2018,29850653
|
| 415 |
+
tgct_tcga_pan_can_atlas_2018,36334560
|
| 416 |
+
thca_tcga_pan_can_atlas_2018,29625048
|
| 417 |
+
thca_tcga_pan_can_atlas_2018,29596782
|
| 418 |
+
thca_tcga_pan_can_atlas_2018,29622463
|
| 419 |
+
thca_tcga_pan_can_atlas_2018,29617662
|
| 420 |
+
thca_tcga_pan_can_atlas_2018,29625055
|
| 421 |
+
thca_tcga_pan_can_atlas_2018,29625050
|
| 422 |
+
thca_tcga_pan_can_atlas_2018,29617662
|
| 423 |
+
thca_tcga_pan_can_atlas_2018,30643250
|
| 424 |
+
thca_tcga_pan_can_atlas_2018,32214244
|
| 425 |
+
thca_tcga_pan_can_atlas_2018,29625049
|
| 426 |
+
thca_tcga_pan_can_atlas_2018,29850653
|
| 427 |
+
thca_tcga_pan_can_atlas_2018,36334560
|
| 428 |
+
thym_tcga_pan_can_atlas_2018,29625048
|
| 429 |
+
thym_tcga_pan_can_atlas_2018,29596782
|
| 430 |
+
thym_tcga_pan_can_atlas_2018,29622463
|
| 431 |
+
thym_tcga_pan_can_atlas_2018,29617662
|
| 432 |
+
thym_tcga_pan_can_atlas_2018,29625055
|
| 433 |
+
thym_tcga_pan_can_atlas_2018,29625050
|
| 434 |
+
thym_tcga_pan_can_atlas_2018,29617662
|
| 435 |
+
thym_tcga_pan_can_atlas_2018,32214244
|
| 436 |
+
thym_tcga_pan_can_atlas_2018,29625049
|
| 437 |
+
thym_tcga_pan_can_atlas_2018,29850653
|
| 438 |
+
thym_tcga_pan_can_atlas_2018,36334560
|
| 439 |
+
ucec_tcga_pan_can_atlas_2018,29625048
|
| 440 |
+
ucec_tcga_pan_can_atlas_2018,29596782
|
| 441 |
+
ucec_tcga_pan_can_atlas_2018,29622463
|
| 442 |
+
ucec_tcga_pan_can_atlas_2018,29617662
|
| 443 |
+
ucec_tcga_pan_can_atlas_2018,29625055
|
| 444 |
+
ucec_tcga_pan_can_atlas_2018,29625050
|
| 445 |
+
ucec_tcga_pan_can_atlas_2018,29617662
|
| 446 |
+
ucec_tcga_pan_can_atlas_2018,30643250
|
| 447 |
+
ucec_tcga_pan_can_atlas_2018,32214244
|
| 448 |
+
ucec_tcga_pan_can_atlas_2018,29625049
|
| 449 |
+
ucec_tcga_pan_can_atlas_2018,29850653
|
| 450 |
+
ucec_tcga_pan_can_atlas_2018,36334560
|
| 451 |
+
ucs_tcga_pan_can_atlas_2018,29625048
|
| 452 |
+
ucs_tcga_pan_can_atlas_2018,29596782
|
| 453 |
+
ucs_tcga_pan_can_atlas_2018,29622463
|
| 454 |
+
ucs_tcga_pan_can_atlas_2018,29617662
|
| 455 |
+
ucs_tcga_pan_can_atlas_2018,29625055
|
| 456 |
+
ucs_tcga_pan_can_atlas_2018,29625050
|
| 457 |
+
ucs_tcga_pan_can_atlas_2018,29617662
|
| 458 |
+
ucs_tcga_pan_can_atlas_2018,32214244
|
| 459 |
+
ucs_tcga_pan_can_atlas_2018,29625049
|
| 460 |
+
ucs_tcga_pan_can_atlas_2018,29850653
|
| 461 |
+
ucs_tcga_pan_can_atlas_2018,36334560
|
| 462 |
+
uvm_tcga_pan_can_atlas_2018,29625048
|
| 463 |
+
uvm_tcga_pan_can_atlas_2018,29596782
|
| 464 |
+
uvm_tcga_pan_can_atlas_2018,29622463
|
| 465 |
+
uvm_tcga_pan_can_atlas_2018,29617662
|
| 466 |
+
uvm_tcga_pan_can_atlas_2018,29625055
|
| 467 |
+
uvm_tcga_pan_can_atlas_2018,29625050
|
| 468 |
+
uvm_tcga_pan_can_atlas_2018,29617662
|
| 469 |
+
uvm_tcga_pan_can_atlas_2018,32214244
|
| 470 |
+
uvm_tcga_pan_can_atlas_2018,29625049
|
| 471 |
+
uvm_tcga_pan_can_atlas_2018,29850653
|
| 472 |
+
uvm_tcga_pan_can_atlas_2018,36334560
|
| 473 |
+
coad_silu_2022,37202560
|
| 474 |
+
acc_tcga_pan_can_atlas_2018,29625048
|
| 475 |
+
acc_tcga_pan_can_atlas_2018,29596782
|
| 476 |
+
acc_tcga_pan_can_atlas_2018,29622463
|
| 477 |
+
acc_tcga_pan_can_atlas_2018,29617662
|
| 478 |
+
acc_tcga_pan_can_atlas_2018,29625055
|
| 479 |
+
acc_tcga_pan_can_atlas_2018,29625050
|
| 480 |
+
acc_tcga_pan_can_atlas_2018,29617662
|
| 481 |
+
acc_tcga_pan_can_atlas_2018,32214244
|
| 482 |
+
acc_tcga_pan_can_atlas_2018,29625049
|
| 483 |
+
acc_tcga_pan_can_atlas_2018,29850653
|
| 484 |
+
acc_tcga_pan_can_atlas_2018,36334560
|
| 485 |
+
msk_chord_2024,39506116
|
| 486 |
+
pancan_mappyacts_2022,35292802
|
| 487 |
+
msk_met_2021,35120664
|
| 488 |
+
blca_msk_2024,39499893
|
| 489 |
+
brca_fuscc_2020,32719455
|
| 490 |
+
thyroid_gatci_2024,38412093
|
| 491 |
+
braf_msk_impact_2024,38922339
|
| 492 |
+
bcc_unige_2016,26950094
|
| 493 |
+
ampca_bcm_2016,26804919
|
| 494 |
+
blca_dfarber_mskcc_2014,25096233
|
| 495 |
+
blca_mskcc_solit_2012,23897969
|
| 496 |
+
blca_bgi,24121792
|
| 497 |
+
all_stjude_2013,23334668
|
| 498 |
+
acyc_mskcc_2013,23685749
|
| 499 |
+
acyc_jhu_2016,26862087
|
| 500 |
+
acyc_mda_2015,26631609
|
| 501 |
+
acyc_sanger_2013,23778141
|
| 502 |
+
all_stjude_2016,27776115
|
| 503 |
+
angs_project_painter_2018,32042194
|
| 504 |
+
bfn_duke_nus_2015,26437033
|
| 505 |
+
blca_cornell_2016,27749842
|
| 506 |
+
aml_ohsu_2018,30333627
|
| 507 |
+
blca_bcan_hcrn_2022,36333289
|
| 508 |
+
aml_ohsu_2022,35868306
|
| 509 |
+
asclc_msk_2024,39185963
|
| 510 |
+
brca_bccrc_xenograft_2014,25470049
|
| 511 |
+
brca_broad,22722202
|
| 512 |
+
brca_bccrc,22495314
|
| 513 |
+
brca_igr_2015,28027327
|
| 514 |
+
blca_tcga_pub_2017,28988769
|
| 515 |
+
brca_mskcc_2019,31552290
|
| 516 |
+
brca_jup_msk_2020,33263939
|
| 517 |
+
brain_cptac_2020,33242424
|
| 518 |
+
brca_cptac_2020,33212010
|
| 519 |
+
brca_dfci_2020,32404308
|
| 520 |
+
brca_sanger,22722201
|
| 521 |
+
brca_tcga_pub,23000897
|
| 522 |
+
breast_msk_2018,30205045
|
| 523 |
+
breast_alpelisib_2020,32864625
|
| 524 |
+
brca_smc_2018,29713003
|
| 525 |
+
breast_ink4_msk_2021,34544752
|
| 526 |
+
brca_pareja_msk_2020,32220886
|
| 527 |
+
crc_msk_2017,29316426
|
| 528 |
+
pancan_pcawg_2020,32025007
|
| 529 |
+
pdac_msk_2024,39753968
|
| 530 |
+
braf_msk_archer_2024,38922339
|
| 531 |
+
sarcoma_ucla_2024,39305899
|
| 532 |
+
csf_msk_2024,39289779
|
| 533 |
+
normal_skin_fibroblast_2024,39091884
|
| 534 |
+
normal_skin_keratinocytes_2024,39091884
|
| 535 |
+
normal_skin_melanocytes_2024,33029006
|
| 536 |
+
normal_skin_melanocytes_2024,39091884
|
| 537 |
+
normal_skin_melanocytes_2024,38895302
|
| 538 |
+
normal_skin_melanocytes_2024,39975212
|
| 539 |
+
chl_sccc_2023,36723991
|
| 540 |
+
blca_msk_2025,40256659
|
| 541 |
+
esca_broad,23525077
|
| 542 |
+
escc_icgc,24670651
|
| 543 |
+
es_iocurie_2014,25223734
|
| 544 |
+
gbc_shanghai_2014,24997986
|
| 545 |
+
egc_tmucih_2015,25583476
|
| 546 |
+
egc_msk_2017,29122777
|
| 547 |
+
dlbcl_duke_2017,28985567
|
| 548 |
+
dlbcl_dfci_2018,29713087
|
| 549 |
+
gbc_msk_2018,30427539
|
| 550 |
+
egc_trap_msk_2020,32437664
|
| 551 |
+
egc_mskcc_2020,33795256
|
| 552 |
+
egc_trap_ccr_msk_2023,37406106
|
| 553 |
+
hnsc_broad,21798893
|
| 554 |
+
hnc_mskcc_2016,27442865
|
| 555 |
+
hcc_inserm_fr_2015,25822088
|
| 556 |
+
gct_msk_2016,27646943
|
| 557 |
+
hcc_mskimpact_2018,30373752
|
| 558 |
+
glioma_mskcc_2019,31263031
|
| 559 |
+
glioma_msk_2018,30675060
|
| 560 |
+
hccihch_pku_2019,31130341
|
| 561 |
+
hgsoc_msk_2021,34819508
|
| 562 |
+
hcc_meric_2021,35508466
|
| 563 |
+
hcc_clca_2024,38355797
|
| 564 |
+
kirc_bgi,22138691
|
| 565 |
+
kich_tcga_pub,25155756
|
| 566 |
+
hnsc_jhu,21798897
|
| 567 |
+
hnsc_mdanderson_2013,23619168
|
| 568 |
+
ihch_smmu_2014,25526346
|
| 569 |
+
ihch_mskcc_2020,33963001
|
| 570 |
+
ihch_msk_2021,33765338
|
| 571 |
+
lgg_ucsf_2014,24336570
|
| 572 |
+
lgggbm_tcga_pub,26824661
|
| 573 |
+
lihc_amc_prv,24798001
|
| 574 |
+
lihc_riken,22634756
|
| 575 |
+
luad_mskcc_2015,25765070
|
| 576 |
+
luad_broad,22980975
|
| 577 |
+
liad_inserm_fr_2014,24735922
|
| 578 |
+
lcll_broad_2013,23415222
|
| 579 |
+
luad_msk_npjpo_2021,34290393
|
| 580 |
+
luad_cptac_2020,32649874
|
| 581 |
+
lusc_tcga_pub,22960745
|
| 582 |
+
mbl_broad_2012,22820256
|
| 583 |
+
mbl_icgc,22832583
|
| 584 |
+
mbl_pcgp,22722829
|
| 585 |
+
lung_msk_2017,28336552
|
| 586 |
+
luad_mskcc_2020,32791233
|
| 587 |
+
luad_oncosg_2020,32015526
|
| 588 |
+
lung_smc_2016,27634761
|
| 589 |
+
lung_pdx_msk_2021,35440124
|
| 590 |
+
mbl_dkfz_2017,28726821
|
| 591 |
+
lusc_cptac_2021,34358469
|
| 592 |
+
lung_nci_2022,34493867
|
| 593 |
+
mm_broad,24434212
|
| 594 |
+
mcl_idibips_2013,24145436
|
| 595 |
+
mds_tokyo_2011,21909114
|
| 596 |
+
mel_tsam_liang_2017,28373299
|
| 597 |
+
mel_ucla_2016,26997480
|
| 598 |
+
mixed_allen_2018,30150660
|
| 599 |
+
mixed_selpercatinib_2020,35304457
|
| 600 |
+
mixed_cfdna_msk_2020,34059130
|
| 601 |
+
mel_dfci_2019,31792460
|
| 602 |
+
mel_mskimpact_2020,33509808
|
| 603 |
+
mbn_sfu_2023,36201743
|
| 604 |
+
mbn_msk_2024,38497151
|
| 605 |
+
npc_nusingapore,24952746
|
| 606 |
+
nepc_wcm_2016,26855148
|
| 607 |
+
nbl_ucologne_2015,26466568
|
| 608 |
+
nbl_broad_2013,23334666
|
| 609 |
+
mrt_bcgsc_2016,26977886
|
| 610 |
+
mpn_cimr_2013,24325359
|
| 611 |
+
nsclc_mskcc_2015,25765070
|
| 612 |
+
nsclc_mskcc_2018,29657128
|
| 613 |
+
msk_access_2021,34145282
|
| 614 |
+
mng_utoronto_2021,34433969
|
| 615 |
+
mpnst_mskcc,25240281
|
| 616 |
+
nbl_amc_2012,22367537
|
| 617 |
+
nccrcc_genentech_2014,25401301
|
| 618 |
+
ov_tcga_pub,21720365
|
| 619 |
+
paac_jhu_2014,24293293
|
| 620 |
+
paad_icgc,23103869
|
| 621 |
+
paad_utsw_2015,25855536
|
| 622 |
+
nsclc_tcga_broad_2016,27158780
|
| 623 |
+
paad_qcmg_uq_2016,26909576
|
| 624 |
+
pact_jhu_2011,22158988
|
| 625 |
+
nsclc_tracerx_2017,28445112
|
| 626 |
+
nsclc_tracerx_2017,28445469
|
| 627 |
+
nsclc_pd1_msk_2018,29337640
|
| 628 |
+
ntrk_msk_2019,31871300
|
| 629 |
+
pan_origimed_2020,35871175
|
| 630 |
+
paad_cptac_2021,34534465
|
| 631 |
+
nst_nfosi_ntap,32561749
|
| 632 |
+
panet_jhu_2011,21252315
|
| 633 |
+
pcnsl_mayo_2015,25991819
|
| 634 |
+
prad_broad,22610119
|
| 635 |
+
crc_hta11_htan_2021,34910928
|
| 636 |
+
panet_shanghai_2013,24326773
|
| 637 |
+
plmeso_nyu_2015,25488749
|
| 638 |
+
prad_cpcg_2017,28068672
|
| 639 |
+
panet_arcnet_2017,28199314
|
| 640 |
+
past_dkfz_heidelberg_2013,23817572
|
| 641 |
+
prad_eururol_2017,28927585
|
| 642 |
+
prad_fhcrc,26928463
|
| 643 |
+
prad_mich,22722839
|
| 644 |
+
prad_mskcc_2014,25024180
|
| 645 |
+
prad_su2c_2015,26000489
|
| 646 |
+
prad_mskcc_2017,28825054
|
| 647 |
+
prad_p1000,29610475
|
| 648 |
+
prad_su2c_2019,31061129
|
| 649 |
+
prostate_dkfz_2018,30537516
|
| 650 |
+
prad_msk_2019,31564440
|
| 651 |
+
prad_mskcc_cheny1_organoids_2014,25201530
|
| 652 |
+
prad_mcspc_mskcc_2020,32220891
|
| 653 |
+
prad_msk_stopsack_2021,34667026
|
| 654 |
+
prostate_pcbm_swiss_2019,35504881
|
| 655 |
+
sclc_clcgp,22941188
|
| 656 |
+
sclc_jhu,22941189
|
| 657 |
+
skcm_broad,22817889
|
| 658 |
+
rms_nih_2014,24436047
|
| 659 |
+
sarc_tcga_pub,29100075
|
| 660 |
+
sclc_cancercell_gardner_2017,28196596
|
| 661 |
+
sclc_ucologne_2015,26168399
|
| 662 |
+
sarcoma_mskcc_2022,35705560
|
| 663 |
+
skcm_broad_dfarber,22622578
|
| 664 |
+
skcm_yale,22842228
|
| 665 |
+
stad_pfizer_uhongkong,24816253
|
| 666 |
+
skcm_broad_brafresist_2012,24265153
|
| 667 |
+
skcm_mskcc_2014,25409260
|
| 668 |
+
skcm_tcga_pub_2015,26091043
|
| 669 |
+
skcm_dfci_2015,26359337
|
| 670 |
+
stad_uhongkong,22037554
|
| 671 |
+
stad_utokyo,24816255
|
| 672 |
+
tet_nci_2014,24974848
|
| 673 |
+
thyroid_mskcc_2016,26878173
|
| 674 |
+
stes_tcga_pub,28052061
|
| 675 |
+
summit_2018,29420467
|
| 676 |
+
stmyec_wcm_2022,36577525
|
| 677 |
+
ucs_jhu_2014,25233892
|
| 678 |
+
ucec_tcga_pub,23636398
|
| 679 |
+
um_qimr_2016,26683228
|
| 680 |
+
ucec_msk_2018,30068706
|
| 681 |
+
uccc_nih_2017,28485815
|
| 682 |
+
tmb_mskcc_2018,30643254
|
| 683 |
+
ucec_cptac_2020,32059776
|
| 684 |
+
ucec_ccr_cfdna_msk_2022,36007103
|
| 685 |
+
vsc_cuk_2018,29422544
|
| 686 |
+
utuc_cornell_baylor_mdacc_2019,31278255
|
| 687 |
+
usarc_msk_2020,32299819
|
| 688 |
+
utuc_igbmc_2021,33397444
|
| 689 |
+
lgg_tcga_pan_can_atlas_2018,29625048
|
| 690 |
+
lgg_tcga_pan_can_atlas_2018,29596782
|
| 691 |
+
lgg_tcga_pan_can_atlas_2018,29622463
|
| 692 |
+
lgg_tcga_pan_can_atlas_2018,29617662
|
| 693 |
+
lgg_tcga_pan_can_atlas_2018,29625055
|
| 694 |
+
lgg_tcga_pan_can_atlas_2018,29625050
|
| 695 |
+
lgg_tcga_pan_can_atlas_2018,29617662
|
| 696 |
+
lgg_tcga_pan_can_atlas_2018,30643250
|
| 697 |
+
lgg_tcga_pan_can_atlas_2018,32214244
|
| 698 |
+
lgg_tcga_pan_can_atlas_2018,29625049
|
| 699 |
+
lgg_tcga_pan_can_atlas_2018,29850653
|
| 700 |
+
lgg_tcga_pan_can_atlas_2018,36334560
|
| 701 |
+
crc_orion_2024,39386479
|
| 702 |
+
brca_aurora_2023,36585450
|
| 703 |
+
schw_ctf_synodos_2025,33025139
|
| 704 |
+
ovary_geomx_gray_foundation_2024,39386723
|
| 705 |
+
brca_tcga_pub2015,26451490
|
| 706 |
+
hnsc_tcga_pub,25631445
|
| 707 |
+
luad_tcga_pub,25079552
|
| 708 |
+
thca_tcga_pub,25417114
|
| 709 |
+
blca_tcga_pub,24476821
|
| 710 |
+
msk_ch_2020,33106634
|
| 711 |
+
msk_spectrum_tme_2022,36517593
|
| 712 |
+
pancan_mimsi_msk_2024,39746944
|
| 713 |
+
mel_iatlas_riaz_nivolumab_2017,29033130
|
| 714 |
+
stad_oncosg_2018,29670109
|
| 715 |
+
gbm_tcga_pub,18772890
|
| 716 |
+
gbm_tcga_pub2013,24120142
|
| 717 |
+
odg_msk_2017,28472509
|
| 718 |
+
gbm_tcga_pan_can_atlas_2018,29625048
|
| 719 |
+
gbm_tcga_pan_can_atlas_2018,29596782
|
| 720 |
+
gbm_tcga_pan_can_atlas_2018,29622463
|
| 721 |
+
gbm_tcga_pan_can_atlas_2018,29617662
|
| 722 |
+
gbm_tcga_pan_can_atlas_2018,29625055
|
| 723 |
+
gbm_tcga_pan_can_atlas_2018,29625050
|
| 724 |
+
gbm_tcga_pan_can_atlas_2018,29617662
|
| 725 |
+
gbm_tcga_pan_can_atlas_2018,30643250
|
| 726 |
+
gbm_tcga_pan_can_atlas_2018,32214244
|
| 727 |
+
gbm_tcga_pan_can_atlas_2018,29625049
|
| 728 |
+
gbm_tcga_pan_can_atlas_2018,29850653
|
| 729 |
+
gbm_tcga_pan_can_atlas_2018,36334560
|
| 730 |
+
gbm_mayo_pdx_sarkaria_2019,31852831
|
| 731 |
+
gbm_columbia_2019,30742119
|
| 732 |
+
gbm_cptac_2021,33577785
|
| 733 |
+
msk_impact_2017,28481359
|
index_dir/faiss.index
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bf5bafdb8fd3f9bbf8bcc66f81d773d6831262a9de7f72a9eba16985cf24a7c
|
| 3 |
+
size 115104813
|
index_dir/meta.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ef5e13e32fdc66c13c2bf468a9601f371d7a2538430af05be29c8d4a91e242b
|
| 3 |
+
size 31852250
|
mcp_server.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Minimal MCP server using fastmcp to expose the PDF search script as a tool.
|
| 4 |
+
Uses argparse to configure the server (host, port).
|
| 5 |
+
Uses environment variables for the index config (INDEX_DIR, etc.).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import argparse # <-- Added this import
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
from contextlib import asynccontextmanager
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from typing import Annotated, Dict, List, Optional
|
| 14 |
+
|
| 15 |
+
# --- Vector index ---
|
| 16 |
+
import faiss
|
| 17 |
+
|
| 18 |
+
# --- NLP / embeddings ---
|
| 19 |
+
import numpy as np
|
| 20 |
+
|
| 21 |
+
# --- MCP Server ---
|
| 22 |
+
from fastmcp import Context, FastMCP
|
| 23 |
+
from sentence_transformers import CrossEncoder, SentenceTransformer
|
| 24 |
+
|
| 25 |
+
# ---------------------------
|
| 26 |
+
# Configuration (from Environment Variables)
|
| 27 |
+
# ---------------------------
|
| 28 |
+
INDEX_DIR = Path(os.environ.get("INDEX_DIR", "./index_dir"))
|
| 29 |
+
EMBED_MODEL = os.environ.get("EMBED_MODEL", "intfloat/e5-base-v2")
|
| 30 |
+
RERANKER_MODEL = os.environ.get("RERANKER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
|
| 31 |
+
FETCH_K = int(os.environ.get("FETCH_K", 40))
|
| 32 |
+
|
| 33 |
+
# ---------------------------
|
| 34 |
+
# Global state to hold models
|
| 35 |
+
# ---------------------------
|
| 36 |
+
models = {}
|
| 37 |
+
|
| 38 |
+
# ---------------------------
|
| 39 |
+
# Copied Utilities
|
| 40 |
+
# ---------------------------
|
| 41 |
+
def read_metadata(meta_path: str) -> List[Dict]:
|
| 42 |
+
out = []
|
| 43 |
+
with open(meta_path, "r", encoding="utf-8") as f:
|
| 44 |
+
for line in f:
|
| 45 |
+
out.append(json.loads(line))
|
| 46 |
+
return out
|
| 47 |
+
|
| 48 |
+
def e5_prefix(text: str, is_query: bool, model_name: str) -> str:
|
| 49 |
+
if "e5" in model_name.lower():
|
| 50 |
+
return f"{'query' if is_query else 'passage'}: {text}"
|
| 51 |
+
return text
|
| 52 |
+
|
| 53 |
+
# ---------------------------
|
| 54 |
+
# Server Startup & Shutdown
|
| 55 |
+
# ---------------------------
|
| 56 |
+
@asynccontextmanager
|
| 57 |
+
async def lifespan(mcp: FastMCP):
|
| 58 |
+
"""
|
| 59 |
+
Handles loading the models on startup.
|
| 60 |
+
"""
|
| 61 |
+
print("[*] Server starting... Loading models...")
|
| 62 |
+
index_path = INDEX_DIR / "faiss.index"
|
| 63 |
+
meta_path = INDEX_DIR / "meta.jsonl"
|
| 64 |
+
|
| 65 |
+
if not index_path.exists() or not meta_path.exists():
|
| 66 |
+
print(f"[!] ERROR: Index files not found in '{INDEX_DIR}'.")
|
| 67 |
+
else:
|
| 68 |
+
print(f"[*] Loading FAISS index: {index_path}")
|
| 69 |
+
models["index"] = faiss.read_index(str(index_path))
|
| 70 |
+
|
| 71 |
+
print(f"[*] Loading metadata: {meta_path}")
|
| 72 |
+
models["meta"] = read_metadata(str(meta_path))
|
| 73 |
+
|
| 74 |
+
print(f"[*] Loading embedding model: {EMBED_MODEL}")
|
| 75 |
+
models["embedder"] = SentenceTransformer(EMBED_MODEL)
|
| 76 |
+
|
| 77 |
+
print(f"[*] Loading reranker model: {RERANKER_MODEL}")
|
| 78 |
+
models["reranker"] = CrossEncoder(RERANKER_MODEL)
|
| 79 |
+
|
| 80 |
+
print("[✓] All models and index loaded. Server is ready.")
|
| 81 |
+
|
| 82 |
+
yield
|
| 83 |
+
|
| 84 |
+
print("[*] Server shutting down... Clearing models.")
|
| 85 |
+
models.clear()
|
| 86 |
+
|
| 87 |
+
# ---------------------------
|
| 88 |
+
# Create the MCP Server
|
| 89 |
+
# ---------------------------
|
| 90 |
+
mcp = FastMCP(
|
| 91 |
+
name="PDF Corpus Search Tool",
|
| 92 |
+
lifespan=lifespan
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# ---------------------------
|
| 96 |
+
# The MCP Tool
|
| 97 |
+
# ---------------------------
|
| 98 |
+
Query = Annotated[str, "The semantic search query to run against the documents."]
|
| 99 |
+
TopK = Annotated[int, "The final number of results to return.", "default: 5"]
|
| 100 |
+
Rerank = Annotated[bool, "Whether to use a cross-encoder to rerank results. Default: true.", "default: true"]
|
| 101 |
+
|
| 102 |
+
@mcp.tool()
|
| 103 |
+
def search_pdf_corpus(
|
| 104 |
+
query: Query,
|
| 105 |
+
top_k: TopK = 5,
|
| 106 |
+
rerank: Rerank = True
|
| 107 |
+
) -> List[Dict]:
|
| 108 |
+
"""
|
| 109 |
+
Searches a private corpus of PDF documents for relevant text chunks.
|
| 110 |
+
Use this to answer questions about specific topics found in the user's files.
|
| 111 |
+
"""
|
| 112 |
+
if "index" not in models:
|
| 113 |
+
return [{"error": "Index is not loaded. Check server logs."}]
|
| 114 |
+
|
| 115 |
+
# ... (search logic remains identical to before) ...
|
| 116 |
+
# 1. Get pre-loaded assets
|
| 117 |
+
index = models["index"]
|
| 118 |
+
meta = models["meta"]
|
| 119 |
+
embedder = models["embedder"]
|
| 120 |
+
|
| 121 |
+
# 2. Embed Query
|
| 122 |
+
query_text = e5_prefix(query, is_query=True, model_name=EMBED_MODEL)
|
| 123 |
+
qvec = embedder.encode([query_text], normalize_embeddings=True).astype("float32")
|
| 124 |
+
|
| 125 |
+
# 3. FAISS Search
|
| 126 |
+
D, I = index.search(qvec, FETCH_K)
|
| 127 |
+
|
| 128 |
+
# 4. Get Candidates
|
| 129 |
+
candidates = []
|
| 130 |
+
for j, idx in enumerate(I[0]):
|
| 131 |
+
if idx == -1: continue
|
| 132 |
+
rec = dict(meta[idx])
|
| 133 |
+
rec["ann_score"] = float(D[0][j])
|
| 134 |
+
candidates.append(rec)
|
| 135 |
+
|
| 136 |
+
if not candidates:
|
| 137 |
+
return []
|
| 138 |
+
|
| 139 |
+
# 5. Optional Reranking
|
| 140 |
+
if rerank:
|
| 141 |
+
pairs = [(query, c["text"]) for c in candidates]
|
| 142 |
+
scores = models["reranker"].predict(pairs)
|
| 143 |
+
for c, s in zip(candidates, scores):
|
| 144 |
+
c["rerank_score"] = float(s)
|
| 145 |
+
candidates.sort(key=lambda x: x["rerank_score"], reverse=True)
|
| 146 |
+
else:
|
| 147 |
+
candidates.sort(key=lambda x: x["ann_score"], reverse=True)
|
| 148 |
+
|
| 149 |
+
# 6. Format and return top_k results
|
| 150 |
+
final_results = []
|
| 151 |
+
for r in candidates[:top_k]:
|
| 152 |
+
final_results.append({
|
| 153 |
+
"doc_path": r["doc_path"],
|
| 154 |
+
"page": r["page"],
|
| 155 |
+
"score": r.get("rerank_score", r["ann_score"]),
|
| 156 |
+
"text": r["text"]
|
| 157 |
+
})
|
| 158 |
+
|
| 159 |
+
return final_results
|
| 160 |
+
|
| 161 |
+
# ---------------------------
|
| 162 |
+
# Run the Server
|
| 163 |
+
# ---------------------------
|
| 164 |
+
if __name__ == "__main__":
|
| 165 |
+
# --- This is the corrected block ---
|
| 166 |
+
|
| 167 |
+
parser = argparse.ArgumentParser(description="Run the PDF Search MCP Server")
|
| 168 |
+
|
| 169 |
+
# Add arguments for server configuration
|
| 170 |
+
parser.add_argument(
|
| 171 |
+
"--host",
|
| 172 |
+
type=str,
|
| 173 |
+
default="localhost",
|
| 174 |
+
help="Host to bind the server to (default: 0.0.0.0)"
|
| 175 |
+
)
|
| 176 |
+
parser.add_argument(
|
| 177 |
+
"--port",
|
| 178 |
+
type=int,
|
| 179 |
+
default=8123,
|
| 180 |
+
help="Port to run the server on (default: 8123)"
|
| 181 |
+
)
|
| 182 |
+
parser.add_argument(
|
| 183 |
+
"--transport",
|
| 184 |
+
type=str,
|
| 185 |
+
default="http",
|
| 186 |
+
choices=["http"], # fastmcp primarily uses http
|
| 187 |
+
help="Server transport protocol (default: http)"
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
args = parser.parse_args()
|
| 191 |
+
|
| 192 |
+
print(f"--- Starting PDF Search MCP Server on {args.transport}://{args.host}:{args.port} ---")
|
| 193 |
+
print(f"--- Using INDEX_DIR: {INDEX_DIR.resolve()} ---")
|
| 194 |
+
|
| 195 |
+
# Pass the parsed arguments to mcp.run()
|
| 196 |
+
mcp.run(
|
| 197 |
+
transport=args.transport,
|
| 198 |
+
host=args.host,
|
| 199 |
+
port=args.port
|
| 200 |
+
)
|
pdf_semsearch.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Tiny CLI for open-source semantic search over PDFs.
|
| 4 |
+
- Index: extract → chunk → embed → FAISS
|
| 5 |
+
- Search: embed query → ANN → (optional) rerank
|
| 6 |
+
|
| 7 |
+
Examples:
|
| 8 |
+
# Index all PDFs in ./pdfs into ./index_dir
|
| 9 |
+
python pdf_semsearch.py index --pdf-dir ./pdfs --index-dir ./index_dir
|
| 10 |
+
|
| 11 |
+
# Search with reranking
|
| 12 |
+
python pdf_semsearch.py search --index-dir ./index_dir -q "KRAS G12C eligibility in lung cancer" --top-k 5 --rerank
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
import json
|
| 18 |
+
import argparse
|
| 19 |
+
import hashlib
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import List, Dict, Tuple, Optional
|
| 22 |
+
|
| 23 |
+
import numpy as np
|
| 24 |
+
from tqdm import tqdm
|
| 25 |
+
|
| 26 |
+
# --- PDF parsing / OCR ---
|
| 27 |
+
import pdfplumber
|
| 28 |
+
|
| 29 |
+
# OCR is optional; only imported if --ocr is used or needed
|
| 30 |
+
try:
|
| 31 |
+
from pdf2image import convert_from_path # requires poppler
|
| 32 |
+
import pytesseract # requires tesseract runtime
|
| 33 |
+
_OCR_AVAILABLE = True
|
| 34 |
+
except Exception:
|
| 35 |
+
_OCR_AVAILABLE = False
|
| 36 |
+
|
| 37 |
+
# --- NLP / embeddings ---
|
| 38 |
+
import spacy
|
| 39 |
+
from sentence_transformers import SentenceTransformer, CrossEncoder
|
| 40 |
+
|
| 41 |
+
# --- Vector index ---
|
| 42 |
+
import faiss
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# ---------------------------
|
| 46 |
+
# Utilities
|
| 47 |
+
# ---------------------------
|
| 48 |
+
def sha1_16(s: str) -> str:
|
| 49 |
+
return hashlib.sha1(s.encode("utf-8")).hexdigest()[:16]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def ensure_dir(p: str):
|
| 53 |
+
Path(p).mkdir(parents=True, exist_ok=True)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def load_spacy(model: str = "en_core_web_sm"):
|
| 57 |
+
try:
|
| 58 |
+
return spacy.load(model)
|
| 59 |
+
except OSError as e:
|
| 60 |
+
print(
|
| 61 |
+
f"[!] spaCy model '{model}' not found. Install it once with:\n"
|
| 62 |
+
f" python -m spacy download {model}\n"
|
| 63 |
+
)
|
| 64 |
+
raise e
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def e5_prefix(text: str, is_query: bool, model_name: str) -> str:
|
| 68 |
+
# Add E5-style prefixes if using an e5 model
|
| 69 |
+
if "e5" in model_name.lower():
|
| 70 |
+
return f"{'query' if is_query else 'passage'}: {text}"
|
| 71 |
+
return text # BGE & others usually don't need prefixes
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def chunk_sentences(nlp, text: str, target_chars: int = 900, overlap: int = 120) -> List[str]:
|
| 75 |
+
"""Sentence-aware chunking around target_chars with soft overlap."""
|
| 76 |
+
doc = nlp(text)
|
| 77 |
+
sents = [s.text.strip() for s in doc.sents if s.text.strip()]
|
| 78 |
+
chunks, cur, cur_len = [], [], 0
|
| 79 |
+
for s in sents:
|
| 80 |
+
if cur and cur_len + len(s) > target_chars:
|
| 81 |
+
chunk = " ".join(cur)
|
| 82 |
+
chunks.append(chunk)
|
| 83 |
+
tail = chunk[-overlap:] if overlap > 0 else ""
|
| 84 |
+
cur = [tail, s] if tail else [s]
|
| 85 |
+
cur_len = len(" ".join(cur))
|
| 86 |
+
else:
|
| 87 |
+
cur.append(s)
|
| 88 |
+
cur_len += len(s)
|
| 89 |
+
if cur:
|
| 90 |
+
chunks.append(" ".join(cur))
|
| 91 |
+
# Fallback if text had no sentence boundaries
|
| 92 |
+
if not chunks and text.strip():
|
| 93 |
+
chunks = [text[:target_chars]]
|
| 94 |
+
return chunks
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def extract_pdf_text(pdf_path: str) -> List[Tuple[int, str]]:
|
| 98 |
+
"""Return [(page_num, text)] using pdfplumber only (born-digital PDFs)."""
|
| 99 |
+
pages = []
|
| 100 |
+
with pdfplumber.open(pdf_path) as pdf:
|
| 101 |
+
for i, page in enumerate(pdf.pages, start=1):
|
| 102 |
+
txt = page.extract_text() or ""
|
| 103 |
+
pages.append((i, txt))
|
| 104 |
+
return pages
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def extract_pdf_text_with_ocr(pdf_path: str, dpi: int = 300, min_len: int = 20) -> List[Tuple[int, str]]:
|
| 108 |
+
"""
|
| 109 |
+
Return [(page_num, text)] using pdfplumber and selective OCR if page text is too short.
|
| 110 |
+
Requires poppler & tesseract installed.
|
| 111 |
+
"""
|
| 112 |
+
if not _OCR_AVAILABLE:
|
| 113 |
+
raise RuntimeError("OCR requested but pdf2image/pytesseract not available.")
|
| 114 |
+
|
| 115 |
+
out = []
|
| 116 |
+
with pdfplumber.open(pdf_path) as pdf:
|
| 117 |
+
page_count = len(pdf.pages)
|
| 118 |
+
|
| 119 |
+
for i in range(1, page_count + 1):
|
| 120 |
+
with pdfplumber.open(pdf_path) as pdf:
|
| 121 |
+
txt = (pdf.pages[i - 1].extract_text() or "").strip()
|
| 122 |
+
|
| 123 |
+
if len(txt) >= min_len:
|
| 124 |
+
out.append((i, txt))
|
| 125 |
+
continue
|
| 126 |
+
|
| 127 |
+
# OCR fallback for this page only
|
| 128 |
+
pil = convert_from_path(pdf_path, first_page=i, last_page=i, dpi=dpi)[0]
|
| 129 |
+
ocr_txt = pytesseract.image_to_string(pil, lang="eng")
|
| 130 |
+
out.append((i, ocr_txt or ""))
|
| 131 |
+
|
| 132 |
+
return out
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def build_corpus(pdf_dir: str, use_ocr: bool, nlp, chunk_chars: int, overlap: int, min_text_len_for_ocr: int) -> List[Dict]:
|
| 136 |
+
corpus = []
|
| 137 |
+
pdf_files = sorted(Path(pdf_dir).glob("**/*.pdf"))
|
| 138 |
+
for pdf_file in tqdm(pdf_files, desc="Reading PDFs"):
|
| 139 |
+
try:
|
| 140 |
+
pages = extract_pdf_text_with_ocr(str(pdf_file), min_len=min_text_len_for_ocr) if use_ocr \
|
| 141 |
+
else extract_pdf_text(str(pdf_file))
|
| 142 |
+
except Exception as e:
|
| 143 |
+
print(f"[!] Failed to read {pdf_file}: {e}")
|
| 144 |
+
continue
|
| 145 |
+
|
| 146 |
+
for page_num, txt in pages:
|
| 147 |
+
if not txt or not txt.strip():
|
| 148 |
+
continue
|
| 149 |
+
for idx, chunk in enumerate(chunk_sentences(nlp, txt, target_chars=chunk_chars, overlap=overlap)):
|
| 150 |
+
corpus.append({
|
| 151 |
+
"doc_path": str(pdf_file),
|
| 152 |
+
"page": page_num,
|
| 153 |
+
"chunk_id": idx,
|
| 154 |
+
"text": chunk
|
| 155 |
+
})
|
| 156 |
+
return corpus
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def write_metadata(meta_path: str, corpus: List[Dict]):
|
| 160 |
+
with open(meta_path, "w", encoding="utf-8") as f:
|
| 161 |
+
for rec in corpus:
|
| 162 |
+
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def read_metadata(meta_path: str) -> List[Dict]:
|
| 166 |
+
out = []
|
| 167 |
+
with open(meta_path, "r", encoding="utf-8") as f:
|
| 168 |
+
for line in f:
|
| 169 |
+
out.append(json.loads(line))
|
| 170 |
+
return out
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# ---------------------------
|
| 174 |
+
# Indexing
|
| 175 |
+
# ---------------------------
|
| 176 |
+
def cmd_index(args):
|
| 177 |
+
ensure_dir(args.index_dir)
|
| 178 |
+
|
| 179 |
+
if args.ocr and not _OCR_AVAILABLE:
|
| 180 |
+
print("[!] --ocr requested but OCR deps not available. Install poppler, tesseract, pdf2image, pytesseract.")
|
| 181 |
+
sys.exit(2)
|
| 182 |
+
|
| 183 |
+
print("[*] Loading spaCy...")
|
| 184 |
+
nlp = load_spacy("en_core_web_sm")
|
| 185 |
+
|
| 186 |
+
print("[*] Building corpus from PDFs...")
|
| 187 |
+
corpus = build_corpus(
|
| 188 |
+
pdf_dir=args.pdf_dir,
|
| 189 |
+
use_ocr=args.ocr,
|
| 190 |
+
nlp=nlp,
|
| 191 |
+
chunk_chars=args.chunk_chars,
|
| 192 |
+
overlap=args.overlap,
|
| 193 |
+
min_text_len_for_ocr=args.ocr_min_text_len
|
| 194 |
+
)
|
| 195 |
+
if not corpus:
|
| 196 |
+
print("[!] No text found. Are your PDFs scanned? Try --ocr.")
|
| 197 |
+
sys.exit(1)
|
| 198 |
+
|
| 199 |
+
meta_path = os.path.join(args.index_dir, "meta.jsonl")
|
| 200 |
+
write_metadata(meta_path, corpus)
|
| 201 |
+
print(f"[*] Wrote metadata for {len(corpus)} chunks to {meta_path}")
|
| 202 |
+
|
| 203 |
+
print(f"[*] Loading embedding model: {args.embed_model}")
|
| 204 |
+
embedder = SentenceTransformer(args.embed_model)
|
| 205 |
+
|
| 206 |
+
texts = [e5_prefix(rec["text"], is_query=False, model_name=args.embed_model) for rec in corpus]
|
| 207 |
+
|
| 208 |
+
print("[*] Encoding chunks...")
|
| 209 |
+
embeddings = embedder.encode(
|
| 210 |
+
texts,
|
| 211 |
+
batch_size=args.batch_size,
|
| 212 |
+
normalize_embeddings=True,
|
| 213 |
+
show_progress_bar=True
|
| 214 |
+
).astype("float32")
|
| 215 |
+
|
| 216 |
+
dim = embeddings.shape[1]
|
| 217 |
+
index = faiss.IndexFlatIP(dim) # cosine via normalized vectors + inner product
|
| 218 |
+
index.add(embeddings)
|
| 219 |
+
|
| 220 |
+
index_path = os.path.join(args.index_dir, "faiss.index")
|
| 221 |
+
faiss.write_index(index, index_path)
|
| 222 |
+
print(f"[*] Wrote FAISS index to {index_path}")
|
| 223 |
+
|
| 224 |
+
print("[✓] Indexing complete.")
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# ---------------------------
|
| 228 |
+
# Searching
|
| 229 |
+
# ---------------------------
|
| 230 |
+
def pretty_snippet(s: str, max_len: int = 320) -> str:
|
| 231 |
+
s = " ".join(s.split())
|
| 232 |
+
return s if len(s) <= max_len else s[: max_len - 1] + "…"
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def cmd_search(args):
|
| 236 |
+
index_path = os.path.join(args.index_dir, "faiss.index")
|
| 237 |
+
meta_path = os.path.join(args.index_dir, "meta.jsonl")
|
| 238 |
+
if not os.path.exists(index_path) or not os.path.exists(meta_path):
|
| 239 |
+
print("[!] Index not found. Run 'index' first.")
|
| 240 |
+
sys.exit(1)
|
| 241 |
+
|
| 242 |
+
print(f"[*] Loading FAISS index: {index_path}")
|
| 243 |
+
index = faiss.read_index(index_path)
|
| 244 |
+
|
| 245 |
+
print("[*] Loading metadata…")
|
| 246 |
+
meta = read_metadata(meta_path)
|
| 247 |
+
|
| 248 |
+
print(f"[*] Loading embedding model: {args.embed_model}")
|
| 249 |
+
embedder = SentenceTransformer(args.embed_model)
|
| 250 |
+
|
| 251 |
+
query_text = e5_prefix(args.query, is_query=True, model_name=args.embed_model)
|
| 252 |
+
qvec = embedder.encode([query_text], normalize_embeddings=True).astype("float32")
|
| 253 |
+
D, I = index.search(qvec, args.fetch_k)
|
| 254 |
+
|
| 255 |
+
candidates = []
|
| 256 |
+
for j, idx in enumerate(I[0]):
|
| 257 |
+
if idx == -1:
|
| 258 |
+
continue
|
| 259 |
+
rec = dict(meta[idx])
|
| 260 |
+
rec["ann_score"] = float(D[0][j])
|
| 261 |
+
candidates.append(rec)
|
| 262 |
+
|
| 263 |
+
if not candidates:
|
| 264 |
+
print("[!] No results.")
|
| 265 |
+
sys.exit(0)
|
| 266 |
+
|
| 267 |
+
# Optional reranking
|
| 268 |
+
if args.rerank:
|
| 269 |
+
print(f"[*] Reranking top {len(candidates)} with {args.reranker_model}…")
|
| 270 |
+
reranker = CrossEncoder(args.reranker_model)
|
| 271 |
+
pairs = [(args.query, c["text"]) for c in candidates]
|
| 272 |
+
scores = reranker.predict(pairs)
|
| 273 |
+
for c, s in zip(candidates, scores):
|
| 274 |
+
c["rerank_score"] = float(s)
|
| 275 |
+
candidates.sort(key=lambda x: x["rerank_score"], reverse=True)
|
| 276 |
+
else:
|
| 277 |
+
candidates.sort(key=lambda x: x["ann_score"], reverse=True)
|
| 278 |
+
|
| 279 |
+
results = candidates[: args.top_k]
|
| 280 |
+
|
| 281 |
+
# Print nicely
|
| 282 |
+
print("\n=== Results ===\n")
|
| 283 |
+
for i, r in enumerate(results, start=1):
|
| 284 |
+
base = Path(r["doc_path"]).name
|
| 285 |
+
score = r.get("rerank_score", r["ann_score"])
|
| 286 |
+
print(f"{i}. {base} p.{r['page']} score={score:.3f}")
|
| 287 |
+
print(f" {pretty_snippet(r['text'])}\n")
|
| 288 |
+
|
| 289 |
+
if args.jsonl:
|
| 290 |
+
out = []
|
| 291 |
+
for r in results:
|
| 292 |
+
out.append({
|
| 293 |
+
"doc_path": r["doc_path"],
|
| 294 |
+
"page": r["page"],
|
| 295 |
+
"score": r.get("rerank_score", r["ann_score"]),
|
| 296 |
+
"text": r["text"]
|
| 297 |
+
})
|
| 298 |
+
print(json.dumps(out, ensure_ascii=False, indent=2))
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
# ---------------------------
|
| 302 |
+
# Main (argparse)
|
| 303 |
+
# ---------------------------
|
| 304 |
+
def main():
|
| 305 |
+
parser = argparse.ArgumentParser(description="Tiny CLI for semantic PDF search (FAISS + Sentence-Transformers)")
|
| 306 |
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
| 307 |
+
|
| 308 |
+
# index
|
| 309 |
+
p_index = sub.add_parser("index", help="Index PDFs into a FAISS index")
|
| 310 |
+
p_index.add_argument("--pdf-dir", required=True, help="Folder with PDFs")
|
| 311 |
+
p_index.add_argument("--index-dir", required=True, help="Folder to write index & metadata")
|
| 312 |
+
p_index.add_argument("--embed-model", default="intfloat/e5-base-v2", help="Sentence-Transformers model name")
|
| 313 |
+
p_index.add_argument("--batch-size", type=int, default=64, help="Embedding batch size")
|
| 314 |
+
p_index.add_argument("--chunk-chars", type=int, default=900, help="Target characters per chunk")
|
| 315 |
+
p_index.add_argument("--overlap", type=int, default=120, help="Overlap characters between chunks")
|
| 316 |
+
p_index.add_argument("--ocr", action="store_true", help="Enable OCR fallback for scan-like pages")
|
| 317 |
+
p_index.add_argument("--ocr-min-text-len", type=int, default=20, help="If page text < N chars, OCR that page")
|
| 318 |
+
p_index.set_defaults(func=cmd_index)
|
| 319 |
+
|
| 320 |
+
# search
|
| 321 |
+
p_search = sub.add_parser("search", help="Search an existing index")
|
| 322 |
+
p_search.add_argument("--index-dir", required=True, help="Folder with faiss.index and meta.jsonl")
|
| 323 |
+
p_search.add_argument("-q", "--query", required=True, help="Search query")
|
| 324 |
+
p_search.add_argument("--top-k", type=int, default=8, help="How many results to show")
|
| 325 |
+
p_search.add_argument("--fetch-k", type=int, default=40, help="First-stage ANN fetch depth (before rerank)")
|
| 326 |
+
p_search.add_argument("--embed-model", default="intfloat/e5-base-v2", help="Sentence-Transformers model name")
|
| 327 |
+
p_search.add_argument("--rerank", action="store_true", help="Enable CrossEncoder reranking")
|
| 328 |
+
p_search.add_argument("--reranker-model", default="cross-encoder/ms-marco-MiniLM-L-6-v2", help="CrossEncoder name")
|
| 329 |
+
p_search.add_argument("--jsonl", action="store_true", help="Also print results as JSON to stdout")
|
| 330 |
+
p_search.set_defaults(func=cmd_search)
|
| 331 |
+
|
| 332 |
+
args = parser.parse_args()
|
| 333 |
+
args.func(args)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
if __name__ == "__main__":
|
| 337 |
+
main()
|
pmc_pdf_downloader.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Download NIH/PMC PDFs in batches from a CSV produced by pmid2pmcid.py.
|
| 4 |
+
|
| 5 |
+
Input CSV schema (minimum):
|
| 6 |
+
pmid,pmcid,doi,status,errmsg
|
| 7 |
+
Only rows with a non-empty PMCID are attempted (NIH/PMC full text).
|
| 8 |
+
|
| 9 |
+
Features
|
| 10 |
+
- Batch processing with per-batch delay
|
| 11 |
+
- Concurrency (threaded) with polite throttling
|
| 12 |
+
- Robust URL strategy (handles /pdf/ and /pdf/<PMCID>.pdf)
|
| 13 |
+
- Retries with backoff for 429/5xx
|
| 14 |
+
- Resume: skips already-downloaded files unless --overwrite
|
| 15 |
+
- Manifest CSV of successes/failures
|
| 16 |
+
|
| 17 |
+
Examples
|
| 18 |
+
python pmc_pdf_downloader.py --in pmid_to_pmcid.csv --out-dir ./pmc_pdfs \
|
| 19 |
+
--batch-size 40 --concurrency 4 --delay 1.0 --email you@org.edu
|
| 20 |
+
|
| 21 |
+
# Overwrite existing PDFs and be extra slow/polite
|
| 22 |
+
python pmc_pdf_downloader.py --in map.csv --out-dir ./pmc_pdfs --overwrite \
|
| 23 |
+
--batch-size 20 --concurrency 2 --delay 2.0
|
| 24 |
+
"""
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import argparse
|
| 28 |
+
import csv
|
| 29 |
+
import os
|
| 30 |
+
import time
|
| 31 |
+
import sys
|
| 32 |
+
import math
|
| 33 |
+
import re
|
| 34 |
+
import pathlib
|
| 35 |
+
from typing import List, Dict, Optional, Tuple
|
| 36 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 37 |
+
|
| 38 |
+
import requests
|
| 39 |
+
|
| 40 |
+
UA = "pmc-pdf-downloader/1.0 (+https://example.org)"
|
| 41 |
+
PMC_HOSTS = [
|
| 42 |
+
"https://www.ncbi.nlm.nih.gov",
|
| 43 |
+
"https://pmc.ncbi.nlm.nih.gov",
|
| 44 |
+
]
|
| 45 |
+
# Strategies we try in order for each host
|
| 46 |
+
PMC_PATH_PATTERNS = [
|
| 47 |
+
"/pmc/articles/{pmcid}/pdf/", # canonical "directory" that serves main PDF
|
| 48 |
+
"/pmc/articles/{pmcid}/pdf/{pmcid}.pdf", # explicit filename
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
def read_rows(csv_path: str) -> List[Dict[str, str]]:
|
| 52 |
+
rows = []
|
| 53 |
+
with open(csv_path, newline="", encoding="utf-8") as f:
|
| 54 |
+
rd = csv.DictReader(f)
|
| 55 |
+
if not rd.fieldnames:
|
| 56 |
+
raise ValueError("CSV appears to have no header row.")
|
| 57 |
+
for r in rd:
|
| 58 |
+
rows.append({k: (v or "").strip() for k, v in r.items()})
|
| 59 |
+
return rows
|
| 60 |
+
|
| 61 |
+
def valid_pmcid(pmcid: str) -> bool:
|
| 62 |
+
# Accept forms like "PMC12345" (case-insensitive)
|
| 63 |
+
return bool(re.fullmatch(r"(?i)PMC\d+", pmcid or ""))
|
| 64 |
+
|
| 65 |
+
def ensure_dir(path: str):
|
| 66 |
+
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
|
| 67 |
+
|
| 68 |
+
def sanitize_filename(name: str) -> str:
|
| 69 |
+
return re.sub(r"[^A-Za-z0-9._\-]+", "_", name)
|
| 70 |
+
|
| 71 |
+
def pick_filename(pmcid: str, pmid: str = "", doi: str = "") -> str:
|
| 72 |
+
base = pmcid.upper() if pmcid else (pmid or "UNKNOWN")
|
| 73 |
+
return sanitize_filename(f"{base}.pdf")
|
| 74 |
+
|
| 75 |
+
def stream_download(url: str, dest_path: str, timeout: int, session: requests.Session) -> Tuple[bool, str]:
|
| 76 |
+
"""Stream to disk; returns (ok, message)."""
|
| 77 |
+
with session.get(url, stream=True, timeout=timeout) as r:
|
| 78 |
+
if r.status_code != 200 or "application/pdf" not in r.headers.get("Content-Type", "").lower():
|
| 79 |
+
return False, f"HTTP {r.status_code} CT={r.headers.get('Content-Type')}"
|
| 80 |
+
# Respect filename from header if present
|
| 81 |
+
cd = r.headers.get("Content-Disposition", "")
|
| 82 |
+
if "filename=" in cd:
|
| 83 |
+
# simple parse; keep extension .pdf
|
| 84 |
+
fname = cd.split("filename=")[-1].strip('"; ')
|
| 85 |
+
if fname:
|
| 86 |
+
dest_dir = os.path.dirname(dest_path)
|
| 87 |
+
dest_path = os.path.join(dest_dir, sanitize_filename(fname))
|
| 88 |
+
with open(dest_path, "wb") as f:
|
| 89 |
+
for chunk in r.iter_content(chunk_size=1024 * 256):
|
| 90 |
+
if chunk:
|
| 91 |
+
f.write(chunk)
|
| 92 |
+
return True, "ok"
|
| 93 |
+
|
| 94 |
+
def try_download_pmc_pdf(pmcid: str, out_dir: str, timeout: int, session: requests.Session) -> Tuple[bool, str, str]:
|
| 95 |
+
"""
|
| 96 |
+
Attempt multiple PMC URL variants across hosts. Returns (ok, msg, final_path_or_empty).
|
| 97 |
+
"""
|
| 98 |
+
for host in PMC_HOSTS:
|
| 99 |
+
for pattern in PMC_PATH_PATTERNS:
|
| 100 |
+
url = f"{host}{pattern.format(pmcid=pmcid)}"
|
| 101 |
+
target = os.path.join(out_dir, pick_filename(pmcid))
|
| 102 |
+
ok, msg = stream_download(url, target, timeout=timeout, session=session)
|
| 103 |
+
if ok:
|
| 104 |
+
return True, f"{host} {msg}", target
|
| 105 |
+
return False, "no_pdf_found", ""
|
| 106 |
+
|
| 107 |
+
def polite_retry(fn, *, retries=4, backoff=1.5, initial_delay=0.0, on_retry=None):
|
| 108 |
+
def wrapper(*args, **kwargs):
|
| 109 |
+
delay = initial_delay
|
| 110 |
+
attempt = 0
|
| 111 |
+
while True:
|
| 112 |
+
try:
|
| 113 |
+
return fn(*args, **kwargs)
|
| 114 |
+
except requests.RequestException as e:
|
| 115 |
+
attempt += 1
|
| 116 |
+
if attempt > retries:
|
| 117 |
+
raise
|
| 118 |
+
if on_retry:
|
| 119 |
+
on_retry(attempt, e)
|
| 120 |
+
time.sleep(max(0.25, delay))
|
| 121 |
+
delay *= backoff
|
| 122 |
+
return wrapper
|
| 123 |
+
|
| 124 |
+
def worker(row: Dict[str, str],
|
| 125 |
+
out_dir: str,
|
| 126 |
+
timeout: int,
|
| 127 |
+
overwrite: bool,
|
| 128 |
+
email: Optional[str]) -> Dict[str, str]:
|
| 129 |
+
pmid = row.get("pmid", "")
|
| 130 |
+
pmcid = row.get("pmcid", "")
|
| 131 |
+
doi = row.get("doi", "")
|
| 132 |
+
result = {
|
| 133 |
+
"pmid": pmid,
|
| 134 |
+
"pmcid": pmcid,
|
| 135 |
+
"doi": doi,
|
| 136 |
+
"status": "",
|
| 137 |
+
"message": "",
|
| 138 |
+
"file": "",
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
if not valid_pmcid(pmcid):
|
| 142 |
+
result["status"] = "skip"
|
| 143 |
+
result["message"] = "no_pmcid"
|
| 144 |
+
return result
|
| 145 |
+
|
| 146 |
+
target_path = os.path.join(out_dir, pick_filename(pmcid, pmid, doi))
|
| 147 |
+
if os.path.exists(target_path) and not overwrite:
|
| 148 |
+
result["status"] = "ok_cached"
|
| 149 |
+
result["file"] = target_path
|
| 150 |
+
return result
|
| 151 |
+
|
| 152 |
+
headers = {"User-Agent": UA}
|
| 153 |
+
if email:
|
| 154 |
+
headers["From"] = email # be nice; some servers log contact
|
| 155 |
+
|
| 156 |
+
with requests.Session() as s:
|
| 157 |
+
s.headers.update(headers)
|
| 158 |
+
|
| 159 |
+
def _on_retry(attempt, exc):
|
| 160 |
+
# print minimal retry info
|
| 161 |
+
sys.stderr.write(f"[retry] {pmcid} attempt {attempt}: {exc}\n")
|
| 162 |
+
|
| 163 |
+
safe_download = polite_retry(
|
| 164 |
+
lambda: try_download_pmc_pdf(pmcid, out_dir, timeout, s),
|
| 165 |
+
retries=4, backoff=1.8, initial_delay=0.5, on_retry=_on_retry
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
ok, msg, final_path = safe_download()
|
| 170 |
+
if ok:
|
| 171 |
+
result["status"] = "ok"
|
| 172 |
+
result["message"] = msg
|
| 173 |
+
result["file"] = final_path
|
| 174 |
+
else:
|
| 175 |
+
result["status"] = "fail"
|
| 176 |
+
result["message"] = msg
|
| 177 |
+
except Exception as e:
|
| 178 |
+
result["status"] = "error"
|
| 179 |
+
result["message"] = f"{type(e).__name__}: {e}"
|
| 180 |
+
|
| 181 |
+
return result
|
| 182 |
+
|
| 183 |
+
def write_manifest(path: str, rows: List[Dict[str, str]]):
|
| 184 |
+
fieldnames = ["pmid", "pmcid", "doi", "status", "message", "file"]
|
| 185 |
+
with open(path, "w", newline="", encoding="utf-8") as f:
|
| 186 |
+
w = csv.DictWriter(f, fieldnames=fieldnames)
|
| 187 |
+
w.writeheader()
|
| 188 |
+
for r in rows:
|
| 189 |
+
w.writerow(r)
|
| 190 |
+
|
| 191 |
+
def chunked(seq, size):
|
| 192 |
+
for i in range(0, len(seq), size):
|
| 193 |
+
yield seq[i:i+size]
|
| 194 |
+
|
| 195 |
+
def main():
|
| 196 |
+
ap = argparse.ArgumentParser(description="Download NIH/PMC PDFs in batches from a pmid→pmcid CSV.")
|
| 197 |
+
ap.add_argument("--in", dest="in_csv", required=True, help="Input CSV (from pmid2pmcid.py)")
|
| 198 |
+
ap.add_argument("--out-dir", required=True, help="Directory to write PDFs")
|
| 199 |
+
ap.add_argument("--manifest", default="pmc_download_manifest.csv", help="CSV manifest output (default: pmc_download_manifest.csv)")
|
| 200 |
+
ap.add_argument("--batch-size", type=int, default=40, help="Items per batch (default: 40)")
|
| 201 |
+
ap.add_argument("--concurrency", type=int, default=4, help="Concurrent downloads per batch (default: 4)")
|
| 202 |
+
ap.add_argument("--delay", type=float, default=1.0, help="Seconds to sleep between batches (default: 1.0)")
|
| 203 |
+
ap.add_argument("--timeout", type=int, default=60, help="Per-request timeout seconds (default: 60)")
|
| 204 |
+
ap.add_argument("--overwrite", action="store_true", help="Overwrite existing PDFs")
|
| 205 |
+
ap.add_argument("--email", help="Contact email (sent in headers)")
|
| 206 |
+
args = ap.parse_args()
|
| 207 |
+
|
| 208 |
+
ensure_dir(args.out_dir)
|
| 209 |
+
rows = read_rows(args.in_csv)
|
| 210 |
+
# keep only rows with PMCID
|
| 211 |
+
todo = [r for r in rows if valid_pmcid(r.get("pmcid", ""))]
|
| 212 |
+
|
| 213 |
+
results: List[Dict[str, str]] = []
|
| 214 |
+
total = len(todo)
|
| 215 |
+
if total == 0:
|
| 216 |
+
print("No rows with a valid PMCID found. Nothing to download.")
|
| 217 |
+
sys.exit(0)
|
| 218 |
+
|
| 219 |
+
print(f"Found {total} entries with PMCID. Starting downloads…")
|
| 220 |
+
count = 0
|
| 221 |
+
for batch_num, batch in enumerate(chunked(todo, args.batch_size), start=1):
|
| 222 |
+
print(f"Batch {batch_num}: {len(batch)} items")
|
| 223 |
+
with ThreadPoolExecutor(max_workers=args.concurrency) as ex:
|
| 224 |
+
futs = [ex.submit(worker, r, args.out_dir, args.timeout, args.overwrite, args.email) for r in batch]
|
| 225 |
+
for fut in as_completed(futs):
|
| 226 |
+
res = fut.result()
|
| 227 |
+
results.append(res)
|
| 228 |
+
count += 1
|
| 229 |
+
if res["status"].startswith("ok"):
|
| 230 |
+
print(f" ✓ {res['pmcid']} → {os.path.basename(res['file'])}")
|
| 231 |
+
elif res["status"] == "skip":
|
| 232 |
+
print(f" - {res['pmcid']} skipped ({res['message']})")
|
| 233 |
+
else:
|
| 234 |
+
print(f" ✗ {res['pmcid']} ({res['message']})")
|
| 235 |
+
# polite pause between batches
|
| 236 |
+
time.sleep(max(0.0, args.delay))
|
| 237 |
+
|
| 238 |
+
write_manifest(args.manifest, results)
|
| 239 |
+
ok = sum(1 for r in results if r["status"].startswith("ok"))
|
| 240 |
+
fail = sum(1 for r in results if r["status"] in ("fail", "error"))
|
| 241 |
+
skip = sum(1 for r in results if r["status"] == "skip")
|
| 242 |
+
print(f"\nDone. ok={ok}, fail={fail}, skip={skip}. Manifest: {args.manifest}")
|
| 243 |
+
|
| 244 |
+
if __name__ == "__main__":
|
| 245 |
+
main()
|
pmcids.txt
ADDED
|
@@ -0,0 +1,732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PMC4553269
|
| 2 |
+
PMC11834963
|
| 3 |
+
PMC4767593
|
| 4 |
+
PMC10043565
|
| 5 |
+
PMC4827439
|
| 6 |
+
PMC4312739
|
| 7 |
+
PMC6007852
|
| 8 |
+
PMC8602441
|
| 9 |
+
PMC9825391
|
| 10 |
+
PMC5890941
|
| 11 |
+
PMC11233068
|
| 12 |
+
PMC10435547
|
| 13 |
+
PMC7061455
|
| 14 |
+
PMC6035749
|
| 15 |
+
PMC10644000
|
| 16 |
+
PMC4013720
|
| 17 |
+
PMC12468851
|
| 18 |
+
PMC12468851
|
| 19 |
+
PMC4189196
|
| 20 |
+
PMC12541443
|
| 21 |
+
PMC4313860
|
| 22 |
+
PMC6642361
|
| 23 |
+
PMC5628134
|
| 24 |
+
PMC9827113
|
| 25 |
+
PMC4359951
|
| 26 |
+
PMC4878831
|
| 27 |
+
PMC8634406
|
| 28 |
+
PMC7680360
|
| 29 |
+
PMC8613248
|
| 30 |
+
PMC8906458
|
| 31 |
+
PMC10841595
|
| 32 |
+
PMC4070589
|
| 33 |
+
PMC6063411
|
| 34 |
+
PMC7685753
|
| 35 |
+
PMC9197876
|
| 36 |
+
PMC9772093
|
| 37 |
+
PMC9807588
|
| 38 |
+
PMC10852615
|
| 39 |
+
PMC10581608
|
| 40 |
+
PMC6438729
|
| 41 |
+
PMC12463604
|
| 42 |
+
PMC9582036
|
| 43 |
+
PMC2694412
|
| 44 |
+
PMC8057264
|
| 45 |
+
PMC9586871
|
| 46 |
+
PMC8052452
|
| 47 |
+
PMC5217532
|
| 48 |
+
PMC5995337
|
| 49 |
+
PMC9117241
|
| 50 |
+
PMC9983778
|
| 51 |
+
PMC10480533
|
| 52 |
+
PMC10875331
|
| 53 |
+
PMC12433863
|
| 54 |
+
PMC10338177
|
| 55 |
+
PMC12541558
|
| 56 |
+
PMC9882421
|
| 57 |
+
PMC5699446
|
| 58 |
+
PMC2911503
|
| 59 |
+
PMC7385919
|
| 60 |
+
PMC7476838
|
| 61 |
+
PMC7796332
|
| 62 |
+
PMC10309566
|
| 63 |
+
PMC10756077
|
| 64 |
+
PMC7446400
|
| 65 |
+
PMC10511463
|
| 66 |
+
PMC9529954
|
| 67 |
+
PMC11149479
|
| 68 |
+
PMC11108776
|
| 69 |
+
PMC5059781
|
| 70 |
+
PMC4675454
|
| 71 |
+
PMC7181640
|
| 72 |
+
PMC7181640
|
| 73 |
+
PMC11216861
|
| 74 |
+
PMC11419252
|
| 75 |
+
PMC11096044
|
| 76 |
+
PMC11371520
|
| 77 |
+
PMC6345401
|
| 78 |
+
PMC10330105
|
| 79 |
+
PMC4676955
|
| 80 |
+
PMC5957518
|
| 81 |
+
PMC6075717
|
| 82 |
+
PMC6028190
|
| 83 |
+
PMC5916809
|
| 84 |
+
PMC6066282
|
| 85 |
+
PMC6070353
|
| 86 |
+
PMC5916809
|
| 87 |
+
PMC7500457
|
| 88 |
+
PMC5916814
|
| 89 |
+
PMC5972025
|
| 90 |
+
PMC12390932
|
| 91 |
+
PMC6763222
|
| 92 |
+
PMC6339572
|
| 93 |
+
PMC11398981
|
| 94 |
+
PMC11405286
|
| 95 |
+
PMC3320027
|
| 96 |
+
PMC4636053
|
| 97 |
+
PMC12533196
|
| 98 |
+
PMC3690621
|
| 99 |
+
PMC3399763
|
| 100 |
+
PMC12487679
|
| 101 |
+
PMC4866047
|
| 102 |
+
PMC6647838
|
| 103 |
+
PMC3440846
|
| 104 |
+
PMC4850357
|
| 105 |
+
PMC4815041
|
| 106 |
+
PMC5957518
|
| 107 |
+
PMC6075717
|
| 108 |
+
PMC6028190
|
| 109 |
+
PMC5916809
|
| 110 |
+
PMC6066282
|
| 111 |
+
PMC6070353
|
| 112 |
+
PMC5916809
|
| 113 |
+
PMC12521747
|
| 114 |
+
PMC7500457
|
| 115 |
+
PMC5916814
|
| 116 |
+
PMC5972025
|
| 117 |
+
PMC12390932
|
| 118 |
+
PMC5957518
|
| 119 |
+
PMC6075717
|
| 120 |
+
PMC6028190
|
| 121 |
+
PMC5916809
|
| 122 |
+
PMC6066282
|
| 123 |
+
PMC6070353
|
| 124 |
+
PMC5916809
|
| 125 |
+
PMC12521747
|
| 126 |
+
PMC7500457
|
| 127 |
+
PMC5916814
|
| 128 |
+
PMC5972025
|
| 129 |
+
PMC12390932
|
| 130 |
+
PMC5957518
|
| 131 |
+
PMC6075717
|
| 132 |
+
PMC6028190
|
| 133 |
+
PMC5916809
|
| 134 |
+
PMC6066282
|
| 135 |
+
PMC6070353
|
| 136 |
+
PMC5916809
|
| 137 |
+
PMC7500457
|
| 138 |
+
PMC5916814
|
| 139 |
+
PMC5972025
|
| 140 |
+
PMC12390932
|
| 141 |
+
PMC6697103
|
| 142 |
+
PMC7339254
|
| 143 |
+
PMC6768830
|
| 144 |
+
PMC12545938
|
| 145 |
+
PMC10084830
|
| 146 |
+
PMC3401966
|
| 147 |
+
PMC4589486
|
| 148 |
+
PMC3309757
|
| 149 |
+
PMC4367811
|
| 150 |
+
PMC5957518
|
| 151 |
+
PMC6075717
|
| 152 |
+
PMC6028190
|
| 153 |
+
PMC5916809
|
| 154 |
+
PMC6066282
|
| 155 |
+
PMC6070353
|
| 156 |
+
PMC5916809
|
| 157 |
+
PMC12521747
|
| 158 |
+
PMC7500457
|
| 159 |
+
PMC5916814
|
| 160 |
+
PMC5972025
|
| 161 |
+
PMC12390932
|
| 162 |
+
PMC5957518
|
| 163 |
+
PMC6075717
|
| 164 |
+
PMC6028190
|
| 165 |
+
PMC5916809
|
| 166 |
+
PMC6066282
|
| 167 |
+
PMC6070353
|
| 168 |
+
PMC5916809
|
| 169 |
+
PMC7500457
|
| 170 |
+
PMC5916814
|
| 171 |
+
PMC5972025
|
| 172 |
+
PMC12390932
|
| 173 |
+
PMC6897368
|
| 174 |
+
PMC8285521
|
| 175 |
+
PMC9379253
|
| 176 |
+
PMC9189056
|
| 177 |
+
PMC10911804
|
| 178 |
+
PMC12552549
|
| 179 |
+
PMC5957518
|
| 180 |
+
PMC6075717
|
| 181 |
+
PMC6028190
|
| 182 |
+
PMC5916809
|
| 183 |
+
PMC6066282
|
| 184 |
+
PMC6070353
|
| 185 |
+
PMC5916809
|
| 186 |
+
PMC7500457
|
| 187 |
+
PMC5916814
|
| 188 |
+
PMC5972025
|
| 189 |
+
PMC12390932
|
| 190 |
+
PMC11095631
|
| 191 |
+
PMC3771322
|
| 192 |
+
PMC11326964
|
| 193 |
+
PMC3767041
|
| 194 |
+
PMC10391526
|
| 195 |
+
PMC4936195
|
| 196 |
+
PMC5180407
|
| 197 |
+
PMC4979995
|
| 198 |
+
PMC6280667
|
| 199 |
+
PMC3837510
|
| 200 |
+
PMC8505423
|
| 201 |
+
PMC3210554
|
| 202 |
+
PMC3744992
|
| 203 |
+
PMC3690918
|
| 204 |
+
PMC3198787
|
| 205 |
+
PMC4695400
|
| 206 |
+
PMC5643159
|
| 207 |
+
PMC6880934
|
| 208 |
+
PMC7572747
|
| 209 |
+
PMC9438279
|
| 210 |
+
PMC12504664
|
| 211 |
+
PMC9299269
|
| 212 |
+
PMC11101347
|
| 213 |
+
PMC11094415
|
| 214 |
+
PMC4170219
|
| 215 |
+
PMC9801308
|
| 216 |
+
PMC9200814
|
| 217 |
+
PMC5957518
|
| 218 |
+
PMC6075717
|
| 219 |
+
PMC6028190
|
| 220 |
+
PMC5916809
|
| 221 |
+
PMC6066282
|
| 222 |
+
PMC6070353
|
| 223 |
+
PMC5916809
|
| 224 |
+
PMC12521747
|
| 225 |
+
PMC7500457
|
| 226 |
+
PMC5916814
|
| 227 |
+
PMC5972025
|
| 228 |
+
PMC12390932
|
| 229 |
+
PMC5957518
|
| 230 |
+
PMC6075717
|
| 231 |
+
PMC6028190
|
| 232 |
+
PMC5916809
|
| 233 |
+
PMC6066282
|
| 234 |
+
PMC6070353
|
| 235 |
+
PMC5916809
|
| 236 |
+
PMC7500457
|
| 237 |
+
PMC5916814
|
| 238 |
+
PMC5972025
|
| 239 |
+
PMC12390932
|
| 240 |
+
PMC5957518
|
| 241 |
+
PMC6075717
|
| 242 |
+
PMC6028190
|
| 243 |
+
PMC5916809
|
| 244 |
+
PMC6066282
|
| 245 |
+
PMC6070353
|
| 246 |
+
PMC5916809
|
| 247 |
+
PMC12521747
|
| 248 |
+
PMC7500457
|
| 249 |
+
PMC5916814
|
| 250 |
+
PMC5972025
|
| 251 |
+
PMC12390932
|
| 252 |
+
PMC5957518
|
| 253 |
+
PMC6075717
|
| 254 |
+
PMC6028190
|
| 255 |
+
PMC5916809
|
| 256 |
+
PMC6066282
|
| 257 |
+
PMC6070353
|
| 258 |
+
PMC5916809
|
| 259 |
+
PMC12521747
|
| 260 |
+
PMC7500457
|
| 261 |
+
PMC5916814
|
| 262 |
+
PMC5972025
|
| 263 |
+
PMC12390932
|
| 264 |
+
PMC5957518
|
| 265 |
+
PMC6075717
|
| 266 |
+
PMC6028190
|
| 267 |
+
PMC5916809
|
| 268 |
+
PMC6066282
|
| 269 |
+
PMC6070353
|
| 270 |
+
PMC5916809
|
| 271 |
+
PMC7500457
|
| 272 |
+
PMC5916814
|
| 273 |
+
PMC5972025
|
| 274 |
+
PMC12390932
|
| 275 |
+
PMC5957518
|
| 276 |
+
PMC6075717
|
| 277 |
+
PMC6028190
|
| 278 |
+
PMC5916809
|
| 279 |
+
PMC6066282
|
| 280 |
+
PMC6070353
|
| 281 |
+
PMC5916809
|
| 282 |
+
PMC12521747
|
| 283 |
+
PMC7500457
|
| 284 |
+
PMC5916814
|
| 285 |
+
PMC5972025
|
| 286 |
+
PMC12390932
|
| 287 |
+
PMC5957518
|
| 288 |
+
PMC6075717
|
| 289 |
+
PMC6028190
|
| 290 |
+
PMC5916809
|
| 291 |
+
PMC6066282
|
| 292 |
+
PMC6070353
|
| 293 |
+
PMC5916809
|
| 294 |
+
PMC12521747
|
| 295 |
+
PMC7500457
|
| 296 |
+
PMC5916814
|
| 297 |
+
PMC5972025
|
| 298 |
+
PMC12390932
|
| 299 |
+
PMC5957518
|
| 300 |
+
PMC6075717
|
| 301 |
+
PMC6028190
|
| 302 |
+
PMC5916809
|
| 303 |
+
PMC6066282
|
| 304 |
+
PMC6070353
|
| 305 |
+
PMC5916809
|
| 306 |
+
PMC12521747
|
| 307 |
+
PMC7500457
|
| 308 |
+
PMC5916814
|
| 309 |
+
PMC5972025
|
| 310 |
+
PMC12390932
|
| 311 |
+
PMC5957518
|
| 312 |
+
PMC6075717
|
| 313 |
+
PMC6028190
|
| 314 |
+
PMC5916809
|
| 315 |
+
PMC6066282
|
| 316 |
+
PMC6070353
|
| 317 |
+
PMC5916809
|
| 318 |
+
PMC7500457
|
| 319 |
+
PMC5916814
|
| 320 |
+
PMC5972025
|
| 321 |
+
PMC12390932
|
| 322 |
+
PMC5957518
|
| 323 |
+
PMC6075717
|
| 324 |
+
PMC6028190
|
| 325 |
+
PMC5916809
|
| 326 |
+
PMC6066282
|
| 327 |
+
PMC6070353
|
| 328 |
+
PMC5916809
|
| 329 |
+
PMC12521747
|
| 330 |
+
PMC7500457
|
| 331 |
+
PMC5916814
|
| 332 |
+
PMC5972025
|
| 333 |
+
PMC12390932
|
| 334 |
+
PMC5957518
|
| 335 |
+
PMC6075717
|
| 336 |
+
PMC6028190
|
| 337 |
+
PMC5916809
|
| 338 |
+
PMC6066282
|
| 339 |
+
PMC6070353
|
| 340 |
+
PMC5916809
|
| 341 |
+
PMC12521747
|
| 342 |
+
PMC7500457
|
| 343 |
+
PMC5916814
|
| 344 |
+
PMC5972025
|
| 345 |
+
PMC12390932
|
| 346 |
+
PMC5957518
|
| 347 |
+
PMC6075717
|
| 348 |
+
PMC6028190
|
| 349 |
+
PMC5916809
|
| 350 |
+
PMC6066282
|
| 351 |
+
PMC6070353
|
| 352 |
+
PMC5916809
|
| 353 |
+
PMC12521747
|
| 354 |
+
PMC7500457
|
| 355 |
+
PMC5916814
|
| 356 |
+
PMC5972025
|
| 357 |
+
PMC12390932
|
| 358 |
+
PMC5957518
|
| 359 |
+
PMC6075717
|
| 360 |
+
PMC6028190
|
| 361 |
+
PMC5916809
|
| 362 |
+
PMC6066282
|
| 363 |
+
PMC6070353
|
| 364 |
+
PMC5916809
|
| 365 |
+
PMC12521747
|
| 366 |
+
PMC7500457
|
| 367 |
+
PMC5916814
|
| 368 |
+
PMC5972025
|
| 369 |
+
PMC12390932
|
| 370 |
+
PMC5957518
|
| 371 |
+
PMC6075717
|
| 372 |
+
PMC6028190
|
| 373 |
+
PMC5916809
|
| 374 |
+
PMC6066282
|
| 375 |
+
PMC6070353
|
| 376 |
+
PMC5916809
|
| 377 |
+
PMC7500457
|
| 378 |
+
PMC5916814
|
| 379 |
+
PMC5972025
|
| 380 |
+
PMC12390932
|
| 381 |
+
PMC5957518
|
| 382 |
+
PMC6075717
|
| 383 |
+
PMC6028190
|
| 384 |
+
PMC5916809
|
| 385 |
+
PMC6066282
|
| 386 |
+
PMC6070353
|
| 387 |
+
PMC5916809
|
| 388 |
+
PMC12521747
|
| 389 |
+
PMC7500457
|
| 390 |
+
PMC5916814
|
| 391 |
+
PMC5972025
|
| 392 |
+
PMC12390932
|
| 393 |
+
PMC5957518
|
| 394 |
+
PMC6075717
|
| 395 |
+
PMC6028190
|
| 396 |
+
PMC5916809
|
| 397 |
+
PMC6066282
|
| 398 |
+
PMC6070353
|
| 399 |
+
PMC5916809
|
| 400 |
+
PMC7500457
|
| 401 |
+
PMC5916814
|
| 402 |
+
PMC5972025
|
| 403 |
+
PMC12390932
|
| 404 |
+
PMC5957518
|
| 405 |
+
PMC6075717
|
| 406 |
+
PMC6028190
|
| 407 |
+
PMC5916809
|
| 408 |
+
PMC6066282
|
| 409 |
+
PMC6070353
|
| 410 |
+
PMC5916809
|
| 411 |
+
PMC7500457
|
| 412 |
+
PMC5916814
|
| 413 |
+
PMC5972025
|
| 414 |
+
PMC12390932
|
| 415 |
+
PMC5957518
|
| 416 |
+
PMC6075717
|
| 417 |
+
PMC6028190
|
| 418 |
+
PMC5916809
|
| 419 |
+
PMC6066282
|
| 420 |
+
PMC6070353
|
| 421 |
+
PMC5916809
|
| 422 |
+
PMC12521747
|
| 423 |
+
PMC7500457
|
| 424 |
+
PMC5916814
|
| 425 |
+
PMC5972025
|
| 426 |
+
PMC12390932
|
| 427 |
+
PMC5957518
|
| 428 |
+
PMC6075717
|
| 429 |
+
PMC6028190
|
| 430 |
+
PMC5916809
|
| 431 |
+
PMC6066282
|
| 432 |
+
PMC6070353
|
| 433 |
+
PMC5916809
|
| 434 |
+
PMC7500457
|
| 435 |
+
PMC5916814
|
| 436 |
+
PMC5972025
|
| 437 |
+
PMC12390932
|
| 438 |
+
PMC5957518
|
| 439 |
+
PMC6075717
|
| 440 |
+
PMC6028190
|
| 441 |
+
PMC5916809
|
| 442 |
+
PMC6066282
|
| 443 |
+
PMC6070353
|
| 444 |
+
PMC5916809
|
| 445 |
+
PMC12521747
|
| 446 |
+
PMC7500457
|
| 447 |
+
PMC5916814
|
| 448 |
+
PMC5972025
|
| 449 |
+
PMC12390932
|
| 450 |
+
PMC5957518
|
| 451 |
+
PMC6075717
|
| 452 |
+
PMC6028190
|
| 453 |
+
PMC5916809
|
| 454 |
+
PMC6066282
|
| 455 |
+
PMC6070353
|
| 456 |
+
PMC5916809
|
| 457 |
+
PMC7500457
|
| 458 |
+
PMC5916814
|
| 459 |
+
PMC5972025
|
| 460 |
+
PMC12390932
|
| 461 |
+
PMC5957518
|
| 462 |
+
PMC6075717
|
| 463 |
+
PMC6028190
|
| 464 |
+
PMC5916809
|
| 465 |
+
PMC6066282
|
| 466 |
+
PMC6070353
|
| 467 |
+
PMC5916809
|
| 468 |
+
PMC7500457
|
| 469 |
+
PMC5916814
|
| 470 |
+
PMC5972025
|
| 471 |
+
PMC12390932
|
| 472 |
+
PMC10202816
|
| 473 |
+
PMC5957518
|
| 474 |
+
PMC6075717
|
| 475 |
+
PMC6028190
|
| 476 |
+
PMC5916809
|
| 477 |
+
PMC6066282
|
| 478 |
+
PMC6070353
|
| 479 |
+
PMC5916809
|
| 480 |
+
PMC7500457
|
| 481 |
+
PMC5916814
|
| 482 |
+
PMC5972025
|
| 483 |
+
PMC12390932
|
| 484 |
+
PMC11655358
|
| 485 |
+
PMC9394403
|
| 486 |
+
PMC9147702
|
| 487 |
+
PMC12088707
|
| 488 |
+
PMC8027015
|
| 489 |
+
PMC11077417
|
| 490 |
+
PMC11371517
|
| 491 |
+
PMC12448954
|
| 492 |
+
PMC4982376
|
| 493 |
+
PMC4238969
|
| 494 |
+
PMC3753703
|
| 495 |
+
PMC7512009
|
| 496 |
+
PMC3919793
|
| 497 |
+
PMC3708595
|
| 498 |
+
PMC4818686
|
| 499 |
+
PMC4807116
|
| 500 |
+
PMC3999050
|
| 501 |
+
PMC5144107
|
| 502 |
+
PMC12534164
|
| 503 |
+
PMC12458290
|
| 504 |
+
PMC5549141
|
| 505 |
+
PMC6280667
|
| 506 |
+
PMC9636269
|
| 507 |
+
PMC9378589
|
| 508 |
+
PMC11726019
|
| 509 |
+
PMC4864027
|
| 510 |
+
PMC4148686
|
| 511 |
+
PMC3863681
|
| 512 |
+
PMC5189935
|
| 513 |
+
PMC5687509
|
| 514 |
+
PMC6757060
|
| 515 |
+
PMC7869928
|
| 516 |
+
PMC8143193
|
| 517 |
+
PMC8077737
|
| 518 |
+
PMC8815415
|
| 519 |
+
PMC3428862
|
| 520 |
+
PMC3465532
|
| 521 |
+
PMC6327853
|
| 522 |
+
PMC7450824
|
| 523 |
+
PMC5928087
|
| 524 |
+
PMC8831444
|
| 525 |
+
PMC7367727
|
| 526 |
+
PMC5765991
|
| 527 |
+
PMC7025898
|
| 528 |
+
PMC11835752
|
| 529 |
+
PMC11371517
|
| 530 |
+
PMC12318355
|
| 531 |
+
PMC11406943
|
| 532 |
+
PMC11291049
|
| 533 |
+
PMC11291049
|
| 534 |
+
PMC7581540
|
| 535 |
+
PMC11291049
|
| 536 |
+
PMC11185634
|
| 537 |
+
PMC11839034
|
| 538 |
+
PMC10150291
|
| 539 |
+
PMC12008543
|
| 540 |
+
PMC3678719
|
| 541 |
+
PMC12524419
|
| 542 |
+
PMC4264969
|
| 543 |
+
PMC12363069
|
| 544 |
+
PMC4313862
|
| 545 |
+
PMC5813492
|
| 546 |
+
PMC5659841
|
| 547 |
+
PMC6613387
|
| 548 |
+
PMC6636637
|
| 549 |
+
PMC8229851
|
| 550 |
+
PMC8228505
|
| 551 |
+
PMC10502449
|
| 552 |
+
PMC3415217
|
| 553 |
+
PMC5253129
|
| 554 |
+
PMC4587544
|
| 555 |
+
PMC5477828
|
| 556 |
+
PMC6689131
|
| 557 |
+
PMC6753053
|
| 558 |
+
PMC6457907
|
| 559 |
+
PMC8317046
|
| 560 |
+
PMC8613272
|
| 561 |
+
PMC9068765
|
| 562 |
+
PMC12548999
|
| 563 |
+
PMC12432380
|
| 564 |
+
PMC4160352
|
| 565 |
+
PMC3162986
|
| 566 |
+
PMC3858325
|
| 567 |
+
PMC12468851
|
| 568 |
+
PMC8282702
|
| 569 |
+
PMC8713028
|
| 570 |
+
PMC3998672
|
| 571 |
+
PMC4754110
|
| 572 |
+
PMC12452113
|
| 573 |
+
PMC12529571
|
| 574 |
+
PMC4993154
|
| 575 |
+
PMC3557932
|
| 576 |
+
PMC12261305
|
| 577 |
+
PMC3575604
|
| 578 |
+
PMC8295366
|
| 579 |
+
PMC7373300
|
| 580 |
+
PMC3466113
|
| 581 |
+
PMC3413789
|
| 582 |
+
PMC3662966
|
| 583 |
+
PMC3412905
|
| 584 |
+
PMC5482929
|
| 585 |
+
PMC7704768
|
| 586 |
+
PMC12443039
|
| 587 |
+
PMC10937974
|
| 588 |
+
PMC9018685
|
| 589 |
+
PMC5905700
|
| 590 |
+
PMC8475722
|
| 591 |
+
PMC8432745
|
| 592 |
+
PMC4241387
|
| 593 |
+
PMC3831489
|
| 594 |
+
PMC12550706
|
| 595 |
+
PMC5378171
|
| 596 |
+
PMC4808437
|
| 597 |
+
PMC6119118
|
| 598 |
+
PMC8933489
|
| 599 |
+
PMC8165771
|
| 600 |
+
PMC6898788
|
| 601 |
+
PMC8046739
|
| 602 |
+
PMC10023728
|
| 603 |
+
PMC11215372
|
| 604 |
+
PMC12468275
|
| 605 |
+
PMC4777652
|
| 606 |
+
PMC4881306
|
| 607 |
+
PMC3682833
|
| 608 |
+
PMC5094835
|
| 609 |
+
PMC3966280
|
| 610 |
+
PMC4993154
|
| 611 |
+
PMC5953836
|
| 612 |
+
PMC8213710
|
| 613 |
+
PMC11604310
|
| 614 |
+
PMC4249650
|
| 615 |
+
PMC12508281
|
| 616 |
+
PMC4489427
|
| 617 |
+
PMC3163504
|
| 618 |
+
PMC4048021
|
| 619 |
+
PMC3530898
|
| 620 |
+
PMC4403382
|
| 621 |
+
PMC4884143
|
| 622 |
+
PMC12553238
|
| 623 |
+
PMC3248495
|
| 624 |
+
PMC12553238
|
| 625 |
+
PMC5812436
|
| 626 |
+
PMC6075848
|
| 627 |
+
PMC7124988
|
| 628 |
+
PMC9308789
|
| 629 |
+
PMC8654574
|
| 630 |
+
PMC7305302
|
| 631 |
+
PMC3144496
|
| 632 |
+
PMC4558226
|
| 633 |
+
PMC3673022
|
| 634 |
+
PMC8941949
|
| 635 |
+
PMC12056210
|
| 636 |
+
PMC12404184
|
| 637 |
+
PMC12513462
|
| 638 |
+
PMC12533323
|
| 639 |
+
PMC3951336
|
| 640 |
+
PMC12508145
|
| 641 |
+
PMC5045679
|
| 642 |
+
PMC3396711
|
| 643 |
+
PMC4121784
|
| 644 |
+
PMC4484602
|
| 645 |
+
PMC5558263
|
| 646 |
+
PMC6107367
|
| 647 |
+
PMC6561293
|
| 648 |
+
PMC7444093
|
| 649 |
+
PMC6949382
|
| 650 |
+
PMC4237931
|
| 651 |
+
PMC7334067
|
| 652 |
+
PMC8776579
|
| 653 |
+
PMC9065149
|
| 654 |
+
PMC4915822
|
| 655 |
+
PMC3557461
|
| 656 |
+
PMC3600117
|
| 657 |
+
PMC4462130
|
| 658 |
+
PMC5693358
|
| 659 |
+
PMC5313262
|
| 660 |
+
PMC4861069
|
| 661 |
+
PMC9200818
|
| 662 |
+
PMC3367798
|
| 663 |
+
PMC3432702
|
| 664 |
+
PMC12468435
|
| 665 |
+
PMC3947264
|
| 666 |
+
PMC4315319
|
| 667 |
+
PMC4580370
|
| 668 |
+
PMC5054517
|
| 669 |
+
PMC12489181
|
| 670 |
+
PMC12523349
|
| 671 |
+
PMC5705185
|
| 672 |
+
PMC4767360
|
| 673 |
+
PMC5651175
|
| 674 |
+
PMC5808581
|
| 675 |
+
PMC9808553
|
| 676 |
+
PMC4354107
|
| 677 |
+
PMC3704730
|
| 678 |
+
PMC4826231
|
| 679 |
+
PMC6279519
|
| 680 |
+
PMC5587124
|
| 681 |
+
PMC6365097
|
| 682 |
+
PMC7233456
|
| 683 |
+
PMC9852004
|
| 684 |
+
PMC5903820
|
| 685 |
+
PMC6611775
|
| 686 |
+
PMC7367750
|
| 687 |
+
PMC7780630
|
| 688 |
+
PMC5957518
|
| 689 |
+
PMC6075717
|
| 690 |
+
PMC6028190
|
| 691 |
+
PMC5916809
|
| 692 |
+
PMC6066282
|
| 693 |
+
PMC6070353
|
| 694 |
+
PMC5916809
|
| 695 |
+
PMC12521747
|
| 696 |
+
PMC7500457
|
| 697 |
+
PMC5916814
|
| 698 |
+
PMC5972025
|
| 699 |
+
PMC12390932
|
| 700 |
+
PMC11463659
|
| 701 |
+
PMC9886551
|
| 702 |
+
PMC7785562
|
| 703 |
+
PMC11463462
|
| 704 |
+
PMC4603750
|
| 705 |
+
PMC4311405
|
| 706 |
+
PMC4231481
|
| 707 |
+
PMC4243044
|
| 708 |
+
PMC3962515
|
| 709 |
+
PMC7891089
|
| 710 |
+
PMC9771812
|
| 711 |
+
PMC11696176
|
| 712 |
+
PMC5685550
|
| 713 |
+
PMC5906695
|
| 714 |
+
PMC2671642
|
| 715 |
+
PMC3910500
|
| 716 |
+
PMC5596171
|
| 717 |
+
PMC5957518
|
| 718 |
+
PMC6075717
|
| 719 |
+
PMC6028190
|
| 720 |
+
PMC5916809
|
| 721 |
+
PMC6066282
|
| 722 |
+
PMC6070353
|
| 723 |
+
PMC5916809
|
| 724 |
+
PMC12521747
|
| 725 |
+
PMC7500457
|
| 726 |
+
PMC5916814
|
| 727 |
+
PMC5972025
|
| 728 |
+
PMC12390932
|
| 729 |
+
PMC7056576
|
| 730 |
+
PMC6810613
|
| 731 |
+
PMC8044053
|
| 732 |
+
PMC5461196
|
pmid2pmcid.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import argparse, csv, sys, time, requests
|
| 3 |
+
from typing import Iterable, List, Dict, Optional, Tuple
|
| 4 |
+
try:
|
| 5 |
+
from Bio import Entrez
|
| 6 |
+
except Exception:
|
| 7 |
+
Entrez = None
|
| 8 |
+
|
| 9 |
+
IDCONV_URL = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
|
| 10 |
+
ELINK_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"
|
| 11 |
+
UA = "pmid2pmcid-cli/1.2 (+https://example.org)"
|
| 12 |
+
|
| 13 |
+
# -------------------- IO --------------------
|
| 14 |
+
def read_study_pmids_from_csv(path: str) -> List[Dict[str, str]]:
|
| 15 |
+
"""
|
| 16 |
+
Expect a CSV with at least: studyId, pmid
|
| 17 |
+
Falls back to any column named pmid/PMID/id if studyId is missing.
|
| 18 |
+
Returns list of dicts: {'studyId': <str or ''>, 'pmid': <str>}
|
| 19 |
+
"""
|
| 20 |
+
rows = []
|
| 21 |
+
with open(path, newline="", encoding="utf-8") as f:
|
| 22 |
+
reader = csv.DictReader(f)
|
| 23 |
+
if not reader.fieldnames:
|
| 24 |
+
raise ValueError("No header row found in CSV.")
|
| 25 |
+
|
| 26 |
+
# Discover columns
|
| 27 |
+
study_col = None
|
| 28 |
+
if "studyId" in reader.fieldnames:
|
| 29 |
+
study_col = "studyId"
|
| 30 |
+
|
| 31 |
+
pmid_col = None
|
| 32 |
+
for c in ("pmid", "PMID", "id", "Id", "ID"):
|
| 33 |
+
if c in reader.fieldnames:
|
| 34 |
+
pmid_col = c
|
| 35 |
+
break
|
| 36 |
+
if not pmid_col:
|
| 37 |
+
raise ValueError(f"No pmid-like column in {reader.fieldnames}")
|
| 38 |
+
|
| 39 |
+
for row in reader:
|
| 40 |
+
pmid = (row.get(pmid_col) or "").strip()
|
| 41 |
+
if not pmid:
|
| 42 |
+
continue
|
| 43 |
+
study = (row.get(study_col) or "").strip() if study_col else ""
|
| 44 |
+
rows.append({"studyId": study, "pmid": pmid})
|
| 45 |
+
return rows
|
| 46 |
+
|
| 47 |
+
def normalize_pmid(p: str) -> str:
|
| 48 |
+
p = str(p).strip()
|
| 49 |
+
if not p:
|
| 50 |
+
return ""
|
| 51 |
+
if p.lower().startswith("pmid"):
|
| 52 |
+
p = "".join(ch for ch in p if ch.isdigit())
|
| 53 |
+
return p
|
| 54 |
+
|
| 55 |
+
def unique_pmids(rows: List[Dict[str, str]]) -> List[str]:
|
| 56 |
+
seen = set()
|
| 57 |
+
out = []
|
| 58 |
+
for r in rows:
|
| 59 |
+
p = normalize_pmid(r["pmid"])
|
| 60 |
+
if p and p not in seen:
|
| 61 |
+
seen.add(p)
|
| 62 |
+
out.append(p)
|
| 63 |
+
return out
|
| 64 |
+
|
| 65 |
+
# -------------------- NIH resolvers --------------------
|
| 66 |
+
def idconv_batch(pmids: List[str], email: Optional[str], verbose: bool) -> Dict[str, Dict]:
|
| 67 |
+
"""Return mapping {pmid(str): {pmid, pmcid, doi, status, errmsg}} for a batch."""
|
| 68 |
+
params = {
|
| 69 |
+
"ids": ",".join(pmids),
|
| 70 |
+
"format": "json",
|
| 71 |
+
"tool": "pmid2pmcid-cli",
|
| 72 |
+
}
|
| 73 |
+
if email:
|
| 74 |
+
params["email"] = email
|
| 75 |
+
|
| 76 |
+
r = requests.get(IDCONV_URL, params=params, timeout=60,
|
| 77 |
+
headers={"User-Agent": UA})
|
| 78 |
+
r.raise_for_status()
|
| 79 |
+
j = r.json()
|
| 80 |
+
if verbose:
|
| 81 |
+
print("[idconv] records:", len(j.get("records", [])))
|
| 82 |
+
|
| 83 |
+
out: Dict[str, Dict] = {}
|
| 84 |
+
for rec in j.get("records", []):
|
| 85 |
+
# Force string key so lookup matches normalized inputs
|
| 86 |
+
pmid = str(rec.get("pmid") or rec.get("requested-id") or "").strip()
|
| 87 |
+
out[pmid] = {
|
| 88 |
+
"pmid": pmid,
|
| 89 |
+
"pmcid": rec.get("pmcid") or "",
|
| 90 |
+
"doi": rec.get("doi") or "",
|
| 91 |
+
"status": rec.get("status") or "ok",
|
| 92 |
+
"errmsg": rec.get("errmsg") or "",
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
# Ensure every input pmid has an entry (keys are strings)
|
| 96 |
+
for p in pmids:
|
| 97 |
+
ps = str(p).strip()
|
| 98 |
+
out.setdefault(ps, {"pmid": ps, "pmcid": "", "doi": "", "status": "", "errmsg": ""})
|
| 99 |
+
return out
|
| 100 |
+
|
| 101 |
+
def resolve_idconv_all(pmids: List[str], email: Optional[str], sleep=0.34, verbose=False) -> Dict[str, Dict]:
|
| 102 |
+
out: Dict[str, Dict] = {}
|
| 103 |
+
B = 200 # NIH allows up to 200 per request
|
| 104 |
+
for i in range(0, len(pmids), B):
|
| 105 |
+
batch = pmids[i:i+B]
|
| 106 |
+
m = idconv_batch(batch, email=email, verbose=verbose)
|
| 107 |
+
out.update(m)
|
| 108 |
+
time.sleep(sleep)
|
| 109 |
+
return out
|
| 110 |
+
|
| 111 |
+
def elink_pubmed_to_pmc(pmid: str, email: Optional[str], api_key: Optional[str]) -> str:
|
| 112 |
+
if Entrez:
|
| 113 |
+
if not email:
|
| 114 |
+
raise ValueError("E-utilities elink requires --email when Biopython is installed.")
|
| 115 |
+
Entrez.email = email
|
| 116 |
+
if api_key:
|
| 117 |
+
Entrez.api_key = api_key
|
| 118 |
+
h = Entrez.elink(dbfrom="pubmed", db="pmc", id=pmid, retmode="xml")
|
| 119 |
+
recs = Entrez.read(h); h.close()
|
| 120 |
+
try:
|
| 121 |
+
links = recs[0]["LinkSetDb"][0]["Link"]
|
| 122 |
+
if links:
|
| 123 |
+
return "PMC" + links[0]["Id"]
|
| 124 |
+
except Exception:
|
| 125 |
+
return ""
|
| 126 |
+
return ""
|
| 127 |
+
else:
|
| 128 |
+
params = {"dbfrom": "pubmed", "db": "pmc", "id": pmid, "retmode": "json", "tool": "pmid2pmcid-cli"}
|
| 129 |
+
if email:
|
| 130 |
+
params["email"] = email
|
| 131 |
+
r = requests.get(ELINK_URL, params=params, timeout=30, headers={"User-Agent": UA})
|
| 132 |
+
r.raise_for_status()
|
| 133 |
+
j = r.json()
|
| 134 |
+
try:
|
| 135 |
+
links = j["linksets"][0]["linksetdbs"][0]["links"]
|
| 136 |
+
if links:
|
| 137 |
+
return "PMC" + str(links[0])
|
| 138 |
+
except Exception:
|
| 139 |
+
return ""
|
| 140 |
+
return ""
|
| 141 |
+
|
| 142 |
+
def resolve_pmids(pmids: List[str], email: Optional[str], force_elink: bool,
|
| 143 |
+
fallback: bool, api_key: Optional[str], verbose: bool) -> Dict[str, Dict]:
|
| 144 |
+
"""Return mapping pmid(str) -> resolved fields."""
|
| 145 |
+
pmids = [normalize_pmid(p) for p in pmids if normalize_pmid(p)]
|
| 146 |
+
mapping: Dict[str, Dict] = {p: {"pmid": p, "pmcid": "", "doi": "", "status": "", "errmsg": ""} for p in pmids}
|
| 147 |
+
|
| 148 |
+
if not force_elink:
|
| 149 |
+
idc = resolve_idconv_all(pmids, email=email, verbose=verbose)
|
| 150 |
+
mapping.update(idc)
|
| 151 |
+
|
| 152 |
+
if force_elink or fallback:
|
| 153 |
+
for p in pmids:
|
| 154 |
+
need = force_elink or not mapping[p]["pmcid"]
|
| 155 |
+
if need:
|
| 156 |
+
try:
|
| 157 |
+
pmcid = elink_pubmed_to_pmc(p, email=email, api_key=api_key)
|
| 158 |
+
except Exception as e:
|
| 159 |
+
pmcid = ""
|
| 160 |
+
if verbose:
|
| 161 |
+
print(f"[elink] {p} error: {e}")
|
| 162 |
+
if pmcid:
|
| 163 |
+
mapping[p]["pmcid"] = pmcid
|
| 164 |
+
mapping[p]["status"] = (mapping[p]["status"] + ";elink").strip(";")
|
| 165 |
+
|
| 166 |
+
return mapping
|
| 167 |
+
|
| 168 |
+
# -------------------- CLI --------------------
|
| 169 |
+
def main():
|
| 170 |
+
ap = argparse.ArgumentParser(description="Convert PMID → PMCID/DOI and keep studyId in the output when provided.")
|
| 171 |
+
g_in = ap.add_mutually_exclusive_group(required=True)
|
| 172 |
+
g_in.add_argument("--pmids", help="Comma-separated PMIDs, e.g. 29625048,37261122 (no studyId)")
|
| 173 |
+
g_in.add_argument("--in-csv", help="CSV with columns 'studyId' and 'pmid' (at minimum)")
|
| 174 |
+
ap.add_argument("--out", default="pmid_to_pmcid.csv", help="Output CSV path")
|
| 175 |
+
ap.add_argument("--email", help="Your email (recommended; passed to NIH)")
|
| 176 |
+
ap.add_argument("--api-key", help="NCBI API key (optional)")
|
| 177 |
+
ap.add_argument("--fallback-elink", action="store_true", help="If ID Converter has no PMCID, try E-utilities")
|
| 178 |
+
ap.add_argument("--force-elink", action="store_true", help="Use E-utilities for ALL IDs (skip ID Converter)")
|
| 179 |
+
ap.add_argument("--verbose", action="store_true", help="Print raw API info summary")
|
| 180 |
+
args = ap.parse_args()
|
| 181 |
+
|
| 182 |
+
# Build an input list with optional studyId
|
| 183 |
+
if args.pmids:
|
| 184 |
+
input_rows = [{"studyId": "", "pmid": p.strip()} for p in args.pmids.split(",") if p.strip()]
|
| 185 |
+
else:
|
| 186 |
+
input_rows = read_study_pmids_from_csv(args.in_csv)
|
| 187 |
+
|
| 188 |
+
if not input_rows:
|
| 189 |
+
print("No input rows found.", file=sys.stderr)
|
| 190 |
+
sys.exit(1)
|
| 191 |
+
|
| 192 |
+
# Resolve unique PMIDs once
|
| 193 |
+
pmids = unique_pmids(input_rows)
|
| 194 |
+
pmid_map = resolve_pmids(pmids, email=args.email, force_elink=args.force_elink,
|
| 195 |
+
fallback=args.fallback_elink, api_key=args.api_key, verbose=args.verbose)
|
| 196 |
+
|
| 197 |
+
# Re-expand to one row per studyId from input
|
| 198 |
+
out_rows = []
|
| 199 |
+
for r in input_rows:
|
| 200 |
+
p = normalize_pmid(r["pmid"])
|
| 201 |
+
res = pmid_map.get(p, {"pmcid": "", "doi": "", "status": "", "errmsg": ""})
|
| 202 |
+
out_rows.append({
|
| 203 |
+
"studyId": r.get("studyId", ""),
|
| 204 |
+
"pmid": p,
|
| 205 |
+
"pmcid": res.get("pmcid", ""),
|
| 206 |
+
"doi": res.get("doi", ""),
|
| 207 |
+
"status": res.get("status", ""),
|
| 208 |
+
"errmsg": res.get("errmsg", ""),
|
| 209 |
+
})
|
| 210 |
+
|
| 211 |
+
with open(args.out, "w", newline="", encoding="utf-8") as f:
|
| 212 |
+
w = csv.DictWriter(f, fieldnames=["studyId", "pmid", "pmcid", "doi", "status", "errmsg"])
|
| 213 |
+
w.writeheader()
|
| 214 |
+
w.writerows(out_rows)
|
| 215 |
+
|
| 216 |
+
print(f"[✓] Wrote {len(out_rows)} rows to {args.out}")
|
| 217 |
+
|
| 218 |
+
if __name__ == "__main__":
|
| 219 |
+
main()
|
pmid_to_pmcid.csv
ADDED
|
@@ -0,0 +1,733 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
studyId,pmid,pmcid,doi,status,errmsg
|
| 2 |
+
all_stjude_2015,25730765,PMC4553269,10.1038/ng.3230,ok,
|
| 3 |
+
acyc_fmi_2014,24418857,PMC11834963,,error;elink,Identifier not found in PMC
|
| 4 |
+
acyc_mgh_2016,26829750,PMC4767593,10.1038/ng.3502,ok,
|
| 5 |
+
appendiceal_msk_2022,36493333,PMC10043565,10.1200/JCO.22.01392,ok,
|
| 6 |
+
blca_plasmacytoid_mskcc_2016,26901067,PMC4827439,10.1038/ng.3503,ok,
|
| 7 |
+
blca_mskcc_solit_2014,25092538,PMC4312739,10.1016/j.eururo.2014.06.050,ok,
|
| 8 |
+
blca_nmibc_2017,28583311,PMC6007852,10.1016/j.eururo.2017.05.032,ok,
|
| 9 |
+
brca_mapk_hp_msk_2021,34795269,PMC8602441,10.1038/s41467-021-27093-y,ok,
|
| 10 |
+
bowel_colitis_msk_2022,36611031,PMC9825391,10.1038/s41467-022-35592-9,ok,
|
| 11 |
+
bladder_columbia_msk_2018,29625057,PMC5890941,10.1016/j.cell.2018.03.017,ok,
|
| 12 |
+
bladder_msk_2023,37682528,PMC11233068,10.1158/1078-0432.CCR-23-1283,ok,
|
| 13 |
+
bm_nsclc_mskcc_2023,37591896,PMC10435547,10.1038/s41467-023-40793-x,ok,
|
| 14 |
+
cfdna_msk_2019,31768066,PMC7061455,10.1038/s41591-019-0652-7,ok,
|
| 15 |
+
ccrcc_dfci_2019,29301960,PMC6035749,10.1126/science.aan5951,ok,
|
| 16 |
+
cervix_msk_2023,37643132,PMC10644000,10.1158/1078-0432.CCR-23-1078,ok,
|
| 17 |
+
chol_jhu_2013,24185509,PMC4013720,10.1038/ng.2813,ok,
|
| 18 |
+
chol_nccs_2013,24185513,PMC12468851,,error;elink,Identifier not found in PMC
|
| 19 |
+
chol_nus_2012,22561520,PMC12468851,,error;elink,Identifier not found in PMC
|
| 20 |
+
coadread_mskcc,25164765,PMC4189196,10.1186/s13059-014-0454-7,ok,
|
| 21 |
+
cllsll_icgc_2011,22158541,PMC12541443,,error;elink,Identifier not found in PMC
|
| 22 |
+
coad_caseccc_2015,25583493,PMC4313860,10.1073/pnas.1417064112,ok,
|
| 23 |
+
chol_msk_2018,29848569,PMC6642361,10.1158/1078-0432.CCR-18-0078,ok,
|
| 24 |
+
chol_icgc_2017,28667006,PMC5628134,10.1158/2159-8290.CD-17-0368,ok,
|
| 25 |
+
coadread_mskresistance_2022,36355783,PMC9827113,10.1158/2159-8290.CD-22-0405,ok,
|
| 26 |
+
cscc_dfarber_2015,25589618,PMC4359951,10.1158/1078-0432.CCR-14-1773,ok,
|
| 27 |
+
ctcl_columbia_2015,26551667,PMC4878831,10.1038/ng.3442,ok,
|
| 28 |
+
crc_eo_2020,34405229,PMC8634406,10.1093/jnci/djab124,ok,
|
| 29 |
+
crc_apc_impact_2020,32730818,PMC7680360,10.1053/j.gastro.2020.07.041,ok,
|
| 30 |
+
crc_nigerian_2020,34819518,PMC8613248,10.1038/s41467-021-27106-w,ok,
|
| 31 |
+
crc_dd_2022,35235413,PMC8906458,10.1200/PO.21.00365,ok,
|
| 32 |
+
difg_msk_2023,37910594,PMC10841595,10.1158/1078-0432.CCR-23-1180,ok,
|
| 33 |
+
escc_ucla_2014,24686850,PMC4070589,10.1038/ng.2935,ok,
|
| 34 |
+
hcc_msk_venturaa_2018,30052636,PMC6063411,10.1371/journal.pone.0200776,ok,
|
| 35 |
+
gct_msk_2020,32897884,PMC7685753,10.1172/JCI139682,ok,
|
| 36 |
+
egc_msk_tp53_ccr_2022,35377946,PMC9197876,10.1158/1078-0432.CCR-21-4016,ok,
|
| 37 |
+
gbc_mskcc_2022,36228155,PMC9772093,10.1158/1078-0432.CCR-22-1954,ok,
|
| 38 |
+
gist_msk_2022,36593350,PMC9807588,10.1038/s41698-022-00342-z,ok,
|
| 39 |
+
egc_msk_2023,37699004,PMC10852615,10.1093/jnci/djad186,ok,
|
| 40 |
+
hcc_jcopo_msk_2023,37769223,PMC10581608,10.1200/PO.23.00272,ok,
|
| 41 |
+
histiocytosis_cobi_msk_2019,30867592,PMC6438729,10.1038/s41586-019-1012-y,ok,
|
| 42 |
+
ihch_ismms_2015,25608663,PMC12463604,,error;elink,Identifier not found in PMC
|
| 43 |
+
lgsoc_mapk_msk_2022,35443055,PMC9582036,10.1158/1078-0432.CCR-21-4183,ok,
|
| 44 |
+
luad_tsp,18948947,PMC2694412,10.1038/nature07423,ok,
|
| 45 |
+
lymphoma_cellline_msk_2020,33067607,PMC8057264,10.1182/blood.2020008017,ok,
|
| 46 |
+
lung_msk_mind_2020,36038778,PMC9586871,10.1038/s43018-022-00416-8,ok,
|
| 47 |
+
mbc_msk_2021,33863915,PMC8052452,10.1038/s41523-021-00250-8,ok,
|
| 48 |
+
mnm_washu_2016,27959731,PMC5217532,10.1056/NEJMoa1605949,ok,
|
| 49 |
+
metastatic_solid_tumors_mich_2017,28783718,PMC5995337,10.1038/nature23306,ok,
|
| 50 |
+
mixed_kunga_msk_2022,35585047,PMC9117241,10.1038/s41467-022-30233-7,ok,
|
| 51 |
+
msk_ch_ped_2021,35078859,PMC9983778,10.1158/1078-0432.CCR-21-2451,ok,
|
| 52 |
+
mtnn_msk_2022,37078708,PMC10480533,10.1182/bloodadvances.2023009953,ok,
|
| 53 |
+
msk_ch_2023,38147626,PMC10875331,10.1182/bloodadvances.2023011262,ok,
|
| 54 |
+
nsclc_unito_2016,27346245,PMC12433863,,error;elink,Identifier not found in PMC
|
| 55 |
+
nsclc_ctdx_msk_2022,36357680,PMC10338177,10.1038/s41591-022-02047-z,ok,
|
| 56 |
+
pediatric_dkfz_2017,29489754,PMC12541558,,error;elink,Identifier not found in PMC
|
| 57 |
+
paired_bladder_2022,36543146,PMC9882421,10.1016/j.celrep.2022.111859,ok,
|
| 58 |
+
scco_mskcc,24658004,PMC5699446,10.1038/ng.2922,ok,
|
| 59 |
+
sarc_mskcc,20601955,PMC2911503,10.1038/ng.619,ok,
|
| 60 |
+
rectal_msk_2019,31591597,PMC7385919,10.1038/s41591-019-0584-2,ok,
|
| 61 |
+
rbl_cfdna_msk_2020,32633890,PMC7476838,10.1002/cam4.3144,ok,
|
| 62 |
+
rbl_mskcc_2020,33466343,PMC7796332,10.3390/cancers13010149,ok,
|
| 63 |
+
rms_msk_2023,37315267,PMC10309566,10.1200/PO.22.00705,ok,
|
| 64 |
+
sarcoma_msk_2023,37350195,PMC10756077,10.1002/path.6137,ok,
|
| 65 |
+
skcm_vanderbilt_mskcc_2015,32913971,PMC7446400,10.1200/PO.16.00054,ok,
|
| 66 |
+
soft_tissue_msk_2023,37730754,PMC10511463,10.1038/s41698-023-00445-1,ok,
|
| 67 |
+
ucec_ccr_msk_2022,35849120,PMC9529954,10.1158/1078-0432.CCR-22-0713,ok,
|
| 68 |
+
ucec_ancestry_cds_msk_2023,37651310,PMC11149479,10.1158/2159-8290.CD-23-0546,ok,
|
| 69 |
+
ucec_msk_2024,38653864,PMC11108776,10.1038/s41591-024-02942-7,ok,
|
| 70 |
+
urcc_mskcc_2016,27713405,PMC5059781,10.1038/ncomms13131,ok,
|
| 71 |
+
utuc_mskcc_2015,26278805,PMC4675454,10.1016/j.eururo.2015.07.039,ok,
|
| 72 |
+
utuc_msk_2019,32332851,PMC7181640,10.1038/s41467-020-15885-7,ok,
|
| 73 |
+
utuc_pdx_msk_2019,32332851,PMC7181640,10.1038/s41467-020-15885-7,ok,
|
| 74 |
+
plmeso_msk_2024,38630790,PMC11216861,10.1158/1078-0432.CCR-24-0085,ok,
|
| 75 |
+
pancreas_msk_2024,39214094,PMC11419252,10.1016/j.ccell.2024.08.002,ok,
|
| 76 |
+
lms_msk_2024,38488807,PMC11096044,10.1158/1078-0432.CCR-24-0148,ok,
|
| 77 |
+
prostate_msk_2024,38949888,PMC11371520,10.1158/1078-0432.CCR-23-3403,ok,
|
| 78 |
+
panet_msk_2018,30687805,PMC6345401,10.1200/PO.17.00267,ok,
|
| 79 |
+
makeanimpact_ccr_2023,36862133,PMC10330105,10.1158/1078-0432.CCR-22-3247,ok,
|
| 80 |
+
acbc_mskcc_2015,26095796,PMC4676955,10.1002/path.4573,ok,
|
| 81 |
+
blca_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 82 |
+
blca_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 83 |
+
blca_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 84 |
+
blca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 85 |
+
blca_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 86 |
+
blca_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 87 |
+
blca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 88 |
+
blca_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 89 |
+
blca_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 90 |
+
blca_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 91 |
+
blca_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 92 |
+
acc_2019,31483290,PMC6763222,10.1172/JCI128227,ok,
|
| 93 |
+
blca_msk_tcga_2020,30290956,PMC6339572,10.1016/j.eururo.2018.09.002,ok,
|
| 94 |
+
pcnsl_msk_2024,38995739,PMC11398981,10.1158/1078-0432.CCR-24-0605,ok,
|
| 95 |
+
msk_ctdna_vte_2024,39147831,PMC11405286,10.1038/s41591-024-03195-0,ok,
|
| 96 |
+
cellline_ccle_broad,22460905,PMC3320027,10.1038/nature11003,ok,
|
| 97 |
+
ccrcc_irc_2014,24487277,PMC4636053,10.1038/ng.2891,ok,
|
| 98 |
+
ccrcc_utokyo_2013,23797736,PMC12533196,,error;elink,Identifier not found in PMC
|
| 99 |
+
coadread_genentech,22895193,PMC3690621,10.1038/nature11282,ok,
|
| 100 |
+
cellline_nci60,22802077,PMC3399763,10.1158/0008-5472.CAN-12-1370,ok,
|
| 101 |
+
cll_iuopa_2015,26200345,PMC12487679,,error;elink,Identifier not found in PMC
|
| 102 |
+
brca_metabric,27161491,PMC4866047,10.1038/ncomms11479,ok,
|
| 103 |
+
brca_metabric,30867590,PMC6647838,10.1038/s41586-019-1007-8,ok,
|
| 104 |
+
brca_metabric,22522925,PMC3440846,10.1038/nature10983,ok,
|
| 105 |
+
coadread_dfci_2016,27149842,PMC4850357,10.1016/j.celrep.2016.03.075,ok,
|
| 106 |
+
cll_broad_2015,26466571,PMC4815041,10.1038/nature15395,ok,
|
| 107 |
+
brca_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 108 |
+
brca_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 109 |
+
brca_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 110 |
+
brca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 111 |
+
brca_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 112 |
+
brca_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 113 |
+
brca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 114 |
+
brca_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 115 |
+
brca_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 116 |
+
brca_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 117 |
+
brca_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 118 |
+
brca_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 119 |
+
cesc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 120 |
+
cesc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 121 |
+
cesc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 122 |
+
cesc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 123 |
+
cesc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 124 |
+
cesc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 125 |
+
cesc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 126 |
+
cesc_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 127 |
+
cesc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 128 |
+
cesc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 129 |
+
cesc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 130 |
+
cesc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 131 |
+
chol_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 132 |
+
chol_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 133 |
+
chol_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 134 |
+
chol_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 135 |
+
chol_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 136 |
+
chol_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 137 |
+
chol_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 138 |
+
chol_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 139 |
+
chol_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 140 |
+
chol_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 141 |
+
chol_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 142 |
+
ccle_broad_2019,31068700,PMC6697103,10.1038/s41586-019-1186-3,ok,
|
| 143 |
+
ccle_broad_2019,31978347,PMC7339254,10.1016/j.cell.2019.12.023,ok,
|
| 144 |
+
coad_cptac_2019,31031003,PMC6768830,10.1016/j.cell.2019.03.030,ok,
|
| 145 |
+
coadread_cass_2020,32888432,PMC12545938,,error;elink,Identifier not found in PMC
|
| 146 |
+
cll_broad_2022,35927489,PMC10084830,10.1038/s41588-022-01140-w,ok,
|
| 147 |
+
coadread_tcga_pub,22810696,PMC3401966,10.1038/nature11252,ok,
|
| 148 |
+
desm_broad_2015,26343386,PMC4589486,10.1038/ng.3382,ok,
|
| 149 |
+
dlbc_broad_2012,22343534,PMC3309757,10.1073/pnas.1121343109,ok,
|
| 150 |
+
cscc_hgsc_bcm_2014,25303977,PMC4367811,10.1158/1078-0432.CCR-14-1768,ok,
|
| 151 |
+
coadread_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 152 |
+
coadread_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 153 |
+
coadread_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 154 |
+
coadread_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 155 |
+
coadread_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 156 |
+
coadread_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 157 |
+
coadread_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 158 |
+
coadread_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 159 |
+
coadread_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 160 |
+
coadread_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 161 |
+
coadread_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 162 |
+
coadread_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 163 |
+
dlbc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 164 |
+
dlbc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 165 |
+
dlbc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 166 |
+
dlbc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 167 |
+
dlbc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 168 |
+
dlbc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 169 |
+
dlbc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 170 |
+
dlbc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 171 |
+
dlbc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 172 |
+
dlbc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 173 |
+
dlbc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 174 |
+
difg_glass_2019,31748746,PMC6897368,10.1038/s41586-019-1775-1,ok,
|
| 175 |
+
cscc_ucsf_2021,34272401,PMC8285521,10.1038/s41525-021-00226-4,ok,
|
| 176 |
+
cscc_ranson_2022,35982973,PMC9379253,10.3389/fonc.2022.919118,ok,
|
| 177 |
+
difg_glass,35649412,PMC9189056,10.1016/j.cell.2022.04.038,ok,
|
| 178 |
+
difg_glass,38117484,PMC10911804,10.1158/0008-5472.CAN-23-2093,ok,
|
| 179 |
+
es_dfarber_broad_2014,25186949,PMC12552549,,error;elink,Identifier not found in PMC
|
| 180 |
+
esca_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 181 |
+
esca_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 182 |
+
esca_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 183 |
+
esca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 184 |
+
esca_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 185 |
+
esca_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 186 |
+
esca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 187 |
+
esca_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 188 |
+
esca_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 189 |
+
esca_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 190 |
+
esca_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 191 |
+
gist_msk_2023,37477937,PMC11095631,10.1158/1078-0432.CCR-23-1184,ok,
|
| 192 |
+
kirc_tcga_pub,23792563,PMC3771322,10.1038/nature12222,ok,
|
| 193 |
+
hcc_msk_2024,38864854,PMC11326964,10.1158/1078-0432.CCR-24-0657,ok,
|
| 194 |
+
laml_tcga_pub,23634996,PMC3767041,10.1056/NEJMoa1301689,ok,
|
| 195 |
+
luad_mskcc_2023_met_organotropism,37084736,PMC10391526,10.1016/j.ccell.2023.03.018,ok,
|
| 196 |
+
mbl_sickkids_2016,26760213,PMC4936195,10.1038/nature16478,ok,
|
| 197 |
+
mixed_pipseq_2017,28007021,PMC5180407,10.1186/s13073-016-0389-6,ok,
|
| 198 |
+
mds_mskcc_2020,27276561,PMC4979995,10.1056/NEJMoa1516192,ok,
|
| 199 |
+
mds_mskcc_2020,30333627,PMC6280667,10.1038/s41586-018-0623-z,ok,
|
| 200 |
+
mds_mskcc_2020,24030381,PMC3837510,10.1182/blood-2013-08-518886,ok,
|
| 201 |
+
mixed_msk_tcga_2021,34635660,PMC8505423,10.1038/s41523-021-00339-0,ok,
|
| 202 |
+
nhl_bcgsc_2011,21796119,PMC3210554,10.1038/nature10351,ok,
|
| 203 |
+
nhl_bcgsc_2013,23699601,PMC3744992,10.1182/blood-2013-02-483727,ok,
|
| 204 |
+
prad_broad_2013,23622249,PMC3690918,10.1016/j.cell.2013.03.021,ok,
|
| 205 |
+
prad_mskcc,20579941,PMC3198787,10.1016/j.ccr.2010.05.026,ok,
|
| 206 |
+
prad_tcga_pub,26544944,PMC4695400,10.1016/j.cell.2015.10.025,ok,
|
| 207 |
+
pcpg_tcga_pub,28162975,PMC5643159,10.1016/j.ccell.2017.01.001,ok,
|
| 208 |
+
pptc_2019,31693904,PMC6880934,10.1016/j.celrep.2019.09.071,ok,
|
| 209 |
+
prad_cdk12_mskcc_2020,32317181,PMC7572747,10.1016/j.eururo.2020.03.024,ok,
|
| 210 |
+
prad_pik3r1_msk_2021,35670774,PMC9438279,10.1158/1078-0432.CCR-21-4272,ok,
|
| 211 |
+
pog570_bcgsc_2020,35121966,PMC12504664,,error;elink,Identifier not found in PMC
|
| 212 |
+
prad_organoids_msk_2022,35617398,PMC9299269,10.1126/science.abe1505,ok,
|
| 213 |
+
ptad_msk_2024,38758238,PMC11101347,10.1007/s00401-024-02736-8,ok,
|
| 214 |
+
prad_msk_mdanderson_2023,38488813,PMC11094415,10.1158/1078-0432.CCR-23-2438,ok,
|
| 215 |
+
stad_tcga_pub,25079317,PMC4170219,10.1038/nature13480,ok,
|
| 216 |
+
rectal_msk_2022,35970919,PMC9801308,10.1038/s41591-022-01930-z,ok,
|
| 217 |
+
sarcoma_msk_2022,35705558,PMC9200814,10.1038/s41467-022-30496-0,ok,
|
| 218 |
+
hnsc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 219 |
+
hnsc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 220 |
+
hnsc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 221 |
+
hnsc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 222 |
+
hnsc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 223 |
+
hnsc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 224 |
+
hnsc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 225 |
+
hnsc_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 226 |
+
hnsc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 227 |
+
hnsc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 228 |
+
hnsc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 229 |
+
hnsc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 230 |
+
kich_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 231 |
+
kich_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 232 |
+
kich_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 233 |
+
kich_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 234 |
+
kich_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 235 |
+
kich_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 236 |
+
kich_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 237 |
+
kich_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 238 |
+
kich_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 239 |
+
kich_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 240 |
+
kich_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 241 |
+
kirc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 242 |
+
kirc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 243 |
+
kirc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 244 |
+
kirc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 245 |
+
kirc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 246 |
+
kirc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 247 |
+
kirc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 248 |
+
kirc_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 249 |
+
kirc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 250 |
+
kirc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 251 |
+
kirc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 252 |
+
kirc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 253 |
+
kirp_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 254 |
+
kirp_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 255 |
+
kirp_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 256 |
+
kirp_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 257 |
+
kirp_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 258 |
+
kirp_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 259 |
+
kirp_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 260 |
+
kirp_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 261 |
+
kirp_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 262 |
+
kirp_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 263 |
+
kirp_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 264 |
+
kirp_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 265 |
+
laml_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 266 |
+
laml_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 267 |
+
laml_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 268 |
+
laml_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 269 |
+
laml_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 270 |
+
laml_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 271 |
+
laml_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 272 |
+
laml_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 273 |
+
laml_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 274 |
+
laml_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 275 |
+
laml_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 276 |
+
lihc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 277 |
+
lihc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 278 |
+
lihc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 279 |
+
lihc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 280 |
+
lihc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 281 |
+
lihc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 282 |
+
lihc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 283 |
+
lihc_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 284 |
+
lihc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 285 |
+
lihc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 286 |
+
lihc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 287 |
+
lihc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 288 |
+
luad_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 289 |
+
luad_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 290 |
+
luad_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 291 |
+
luad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 292 |
+
luad_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 293 |
+
luad_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 294 |
+
luad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 295 |
+
luad_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 296 |
+
luad_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 297 |
+
luad_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 298 |
+
luad_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 299 |
+
luad_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 300 |
+
lusc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 301 |
+
lusc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 302 |
+
lusc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 303 |
+
lusc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 304 |
+
lusc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 305 |
+
lusc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 306 |
+
lusc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 307 |
+
lusc_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 308 |
+
lusc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 309 |
+
lusc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 310 |
+
lusc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 311 |
+
lusc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 312 |
+
meso_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 313 |
+
meso_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 314 |
+
meso_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 315 |
+
meso_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 316 |
+
meso_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 317 |
+
meso_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 318 |
+
meso_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 319 |
+
meso_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 320 |
+
meso_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 321 |
+
meso_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 322 |
+
meso_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 323 |
+
ov_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 324 |
+
ov_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 325 |
+
ov_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 326 |
+
ov_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 327 |
+
ov_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 328 |
+
ov_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 329 |
+
ov_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 330 |
+
ov_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 331 |
+
ov_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 332 |
+
ov_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 333 |
+
ov_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 334 |
+
ov_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 335 |
+
paad_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 336 |
+
paad_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 337 |
+
paad_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 338 |
+
paad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 339 |
+
paad_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 340 |
+
paad_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 341 |
+
paad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 342 |
+
paad_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 343 |
+
paad_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 344 |
+
paad_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 345 |
+
paad_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 346 |
+
paad_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 347 |
+
pcpg_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 348 |
+
pcpg_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 349 |
+
pcpg_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 350 |
+
pcpg_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 351 |
+
pcpg_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 352 |
+
pcpg_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 353 |
+
pcpg_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 354 |
+
pcpg_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 355 |
+
pcpg_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 356 |
+
pcpg_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 357 |
+
pcpg_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 358 |
+
pcpg_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 359 |
+
prad_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 360 |
+
prad_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 361 |
+
prad_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 362 |
+
prad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 363 |
+
prad_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 364 |
+
prad_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 365 |
+
prad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 366 |
+
prad_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 367 |
+
prad_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 368 |
+
prad_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 369 |
+
prad_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 370 |
+
prad_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 371 |
+
sarc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 372 |
+
sarc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 373 |
+
sarc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 374 |
+
sarc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 375 |
+
sarc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 376 |
+
sarc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 377 |
+
sarc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 378 |
+
sarc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 379 |
+
sarc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 380 |
+
sarc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 381 |
+
sarc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 382 |
+
skcm_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 383 |
+
skcm_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 384 |
+
skcm_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 385 |
+
skcm_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 386 |
+
skcm_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 387 |
+
skcm_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 388 |
+
skcm_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 389 |
+
skcm_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 390 |
+
skcm_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 391 |
+
skcm_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 392 |
+
skcm_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 393 |
+
skcm_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 394 |
+
stad_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 395 |
+
stad_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 396 |
+
stad_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 397 |
+
stad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 398 |
+
stad_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 399 |
+
stad_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 400 |
+
stad_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 401 |
+
stad_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 402 |
+
stad_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 403 |
+
stad_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 404 |
+
stad_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 405 |
+
tgct_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 406 |
+
tgct_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 407 |
+
tgct_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 408 |
+
tgct_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 409 |
+
tgct_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 410 |
+
tgct_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 411 |
+
tgct_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 412 |
+
tgct_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 413 |
+
tgct_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 414 |
+
tgct_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 415 |
+
tgct_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 416 |
+
thca_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 417 |
+
thca_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 418 |
+
thca_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 419 |
+
thca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 420 |
+
thca_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 421 |
+
thca_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 422 |
+
thca_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 423 |
+
thca_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 424 |
+
thca_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 425 |
+
thca_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 426 |
+
thca_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 427 |
+
thca_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 428 |
+
thym_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 429 |
+
thym_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 430 |
+
thym_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 431 |
+
thym_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 432 |
+
thym_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 433 |
+
thym_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 434 |
+
thym_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 435 |
+
thym_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 436 |
+
thym_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 437 |
+
thym_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 438 |
+
thym_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 439 |
+
ucec_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 440 |
+
ucec_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 441 |
+
ucec_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 442 |
+
ucec_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 443 |
+
ucec_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 444 |
+
ucec_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 445 |
+
ucec_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 446 |
+
ucec_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 447 |
+
ucec_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 448 |
+
ucec_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 449 |
+
ucec_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 450 |
+
ucec_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 451 |
+
ucs_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 452 |
+
ucs_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 453 |
+
ucs_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 454 |
+
ucs_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 455 |
+
ucs_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 456 |
+
ucs_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 457 |
+
ucs_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 458 |
+
ucs_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 459 |
+
ucs_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 460 |
+
ucs_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 461 |
+
ucs_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 462 |
+
uvm_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 463 |
+
uvm_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 464 |
+
uvm_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 465 |
+
uvm_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 466 |
+
uvm_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 467 |
+
uvm_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 468 |
+
uvm_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 469 |
+
uvm_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 470 |
+
uvm_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 471 |
+
uvm_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 472 |
+
uvm_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 473 |
+
coad_silu_2022,37202560,PMC10202816,10.1038/s41591-023-02324-5,ok,
|
| 474 |
+
acc_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 475 |
+
acc_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 476 |
+
acc_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 477 |
+
acc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 478 |
+
acc_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 479 |
+
acc_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 480 |
+
acc_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 481 |
+
acc_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 482 |
+
acc_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 483 |
+
acc_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 484 |
+
acc_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 485 |
+
msk_chord_2024,39506116,PMC11655358,10.1038/s41586-024-08167-5,ok,
|
| 486 |
+
pancan_mappyacts_2022,35292802,PMC9394403,10.1158/2159-8290.CD-21-1136,ok,
|
| 487 |
+
msk_met_2021,35120664,PMC9147702,10.1016/j.cell.2022.01.003,ok,
|
| 488 |
+
blca_msk_2024,39499893,PMC12088707,10.1200/PO.24.00287,ok,
|
| 489 |
+
brca_fuscc_2020,32719455,PMC8027015,10.1038/s41422-020-0375-9,ok,
|
| 490 |
+
thyroid_gatci_2024,38412093,PMC11077417,10.1016/j.celrep.2024.113826,ok,
|
| 491 |
+
braf_msk_impact_2024,38922339,PMC11371517,10.1158/1078-0432.CCR-23-3981,ok,
|
| 492 |
+
bcc_unige_2016,26950094,PMC12448954,,error;elink,Identifier not found in PMC
|
| 493 |
+
ampca_bcm_2016,26804919,PMC4982376,10.1016/j.celrep.2015.12.005,ok,
|
| 494 |
+
blca_dfarber_mskcc_2014,25096233,PMC4238969,10.1158/2159-8290.CD-14-0623,ok,
|
| 495 |
+
blca_mskcc_solit_2012,23897969,PMC3753703,10.1200/JCO.2012.46.5740,ok,
|
| 496 |
+
blca_bgi,24121792,PMC7512009,10.1038/ng.2798,ok,
|
| 497 |
+
all_stjude_2013,23334668,PMC3919793,10.1038/ng.2532,ok,
|
| 498 |
+
acyc_mskcc_2013,23685749,PMC3708595,10.1038/ng.2643,ok,
|
| 499 |
+
acyc_jhu_2016,26862087,PMC4818686,10.1158/1940-6207.CAPR-15-0316,ok,
|
| 500 |
+
acyc_mda_2015,26631609,PMC4807116,10.1158/1078-0432.CCR-15-2867-T,ok,
|
| 501 |
+
acyc_sanger_2013,23778141,PMC3999050,10.1172/JCI67201,ok,
|
| 502 |
+
all_stjude_2016,27776115,PMC5144107,10.1038/ng.3691,ok,
|
| 503 |
+
angs_project_painter_2018,32042194,PMC12534164,,error;elink,Identifier not found in PMC
|
| 504 |
+
bfn_duke_nus_2015,26437033,PMC12458290,,error;elink,Identifier not found in PMC
|
| 505 |
+
blca_cornell_2016,27749842,PMC5549141,10.1038/ng.3692,ok,
|
| 506 |
+
aml_ohsu_2018,30333627,PMC6280667,10.1038/s41586-018-0623-z,ok,
|
| 507 |
+
blca_bcan_hcrn_2022,36333289,PMC9636269,10.1038/s41467-022-33980-9,ok,
|
| 508 |
+
aml_ohsu_2022,35868306,PMC9378589,10.1016/j.ccell.2022.07.002,ok,
|
| 509 |
+
asclc_msk_2024,39185963,PMC11726019,10.1158/2159-8290.CD-24-0286,ok,
|
| 510 |
+
brca_bccrc_xenograft_2014,25470049,PMC4864027,10.1038/nature13952,ok,
|
| 511 |
+
brca_broad,22722202,PMC4148686,10.1038/nature11154,ok,
|
| 512 |
+
brca_bccrc,22495314,PMC3863681,10.1038/nature10933,ok,
|
| 513 |
+
brca_igr_2015,28027327,PMC5189935,10.1371/journal.pmed.1002201,ok,
|
| 514 |
+
blca_tcga_pub_2017,28988769,PMC5687509,10.1016/j.cell.2017.09.007,ok,
|
| 515 |
+
brca_mskcc_2019,31552290,PMC6757060,10.1038/s41523-019-0126-6,ok,
|
| 516 |
+
brca_jup_msk_2020,33263939,PMC7869928,10.1002/cjp2.190,ok,
|
| 517 |
+
brain_cptac_2020,33242424,PMC8143193,10.1016/j.cell.2020.10.044,ok,
|
| 518 |
+
brca_cptac_2020,33212010,PMC8077737,10.1016/j.cell.2020.10.036,ok,
|
| 519 |
+
brca_dfci_2020,32404308,PMC8815415,10.1158/2159-8290.CD-19-1390,ok,
|
| 520 |
+
brca_sanger,22722201,PMC3428862,10.1038/nature11017,ok,
|
| 521 |
+
brca_tcga_pub,23000897,PMC3465532,10.1038/nature11412,ok,
|
| 522 |
+
breast_msk_2018,30205045,PMC6327853,10.1016/j.ccell.2018.08.008,ok,
|
| 523 |
+
breast_alpelisib_2020,32864625,PMC7450824,10.1038/s43018-020-0047-1,ok,
|
| 524 |
+
brca_smc_2018,29713003,PMC5928087,10.1038/s41467-018-04129-4,ok,
|
| 525 |
+
breast_ink4_msk_2021,34544752,PMC8831444,10.1158/2159-8290.CD-20-1726,ok,
|
| 526 |
+
brca_pareja_msk_2020,32220886,PMC7367727,10.1158/1078-0432.CCR-19-2563,ok,
|
| 527 |
+
crc_msk_2017,29316426,PMC5765991,10.1016/j.ccell.2017.12.004,ok,
|
| 528 |
+
pancan_pcawg_2020,32025007,PMC7025898,10.1038/s41586-020-1969-6,ok,
|
| 529 |
+
pdac_msk_2024,39753968,PMC11835752,10.1038/s41591-024-03362-3,ok,
|
| 530 |
+
braf_msk_archer_2024,38922339,PMC11371517,10.1158/1078-0432.CCR-23-3981,ok,
|
| 531 |
+
sarcoma_ucla_2024,39305899,PMC12318355,10.1016/j.stem.2024.08.010,ok,
|
| 532 |
+
csf_msk_2024,39289779,PMC11406943,10.1186/s40478-024-01846-4,ok,
|
| 533 |
+
normal_skin_fibroblast_2024,39091884,PMC11291049,10.1101/2024.07.23.604673,ok,
|
| 534 |
+
normal_skin_keratinocytes_2024,39091884,PMC11291049,10.1101/2024.07.23.604673,ok,
|
| 535 |
+
normal_skin_melanocytes_2024,33029006,PMC7581540,10.1038/s41586-020-2785-8,ok,
|
| 536 |
+
normal_skin_melanocytes_2024,39091884,PMC11291049,10.1101/2024.07.23.604673,ok,
|
| 537 |
+
normal_skin_melanocytes_2024,38895302,PMC11185634,10.1101/2024.06.04.597225,ok,
|
| 538 |
+
normal_skin_melanocytes_2024,39975212,PMC11839034,10.1101/2025.02.07.637114,ok,
|
| 539 |
+
chl_sccc_2023,36723991,PMC10150291,10.1158/2643-3230.BCD-22-0128,ok,
|
| 540 |
+
blca_msk_2025,40256659,PMC12008543,10.1016/j.euros.2025.03.009,ok,
|
| 541 |
+
esca_broad,23525077,PMC3678719,10.1038/ng.2591,ok,
|
| 542 |
+
escc_icgc,24670651,PMC12524419,,error;elink,Identifier not found in PMC
|
| 543 |
+
es_iocurie_2014,25223734,PMC4264969,10.1158/2159-8290.CD-14-0622,ok,
|
| 544 |
+
gbc_shanghai_2014,24997986,PMC12363069,,error;elink,Identifier not found in PMC
|
| 545 |
+
egc_tmucih_2015,25583476,PMC4313862,10.1073/pnas.1422640112,ok,
|
| 546 |
+
egc_msk_2017,29122777,PMC5813492,10.1158/2159-8290.CD-17-0787,ok,
|
| 547 |
+
dlbcl_duke_2017,28985567,PMC5659841,10.1016/j.cell.2017.09.027,ok,
|
| 548 |
+
dlbcl_dfci_2018,29713087,PMC6613387,10.1038/s41591-018-0016-8,ok,
|
| 549 |
+
gbc_msk_2018,30427539,PMC6636637,10.1002/cncr.31850,ok,
|
| 550 |
+
egc_trap_msk_2020,32437664,PMC8229851,10.1016/S1470-2045(20)30169-8,ok,
|
| 551 |
+
egc_mskcc_2020,33795256,PMC8228505,10.1158/1078-0432.CCR-20-4707,ok,
|
| 552 |
+
egc_trap_ccr_msk_2023,37406106,PMC10502449,10.1158/1078-0432.CCR-22-3769,ok,
|
| 553 |
+
hnsc_broad,21798893,PMC3415217,10.1126/science.1208130,ok,
|
| 554 |
+
hnc_mskcc_2016,27442865,PMC5253129,10.1001/jamaoncol.2016.1790,ok,
|
| 555 |
+
hcc_inserm_fr_2015,25822088,PMC4587544,10.1038/ng.3252,ok,
|
| 556 |
+
gct_msk_2016,27646943,PMC5477828,10.1200/JCO.2016.68.7798,ok,
|
| 557 |
+
hcc_mskimpact_2018,30373752,PMC6689131,10.1158/1078-0432.CCR-18-2293,ok,
|
| 558 |
+
glioma_mskcc_2019,31263031,PMC6753053,10.1158/1078-0432.CCR-19-0032,ok,
|
| 559 |
+
glioma_msk_2018,30675060,PMC6457907,10.1038/s41586-019-0882-3,ok,
|
| 560 |
+
hccihch_pku_2019,31130341,PMC8317046,10.1016/j.ccell.2019.04.007,ok,
|
| 561 |
+
hgsoc_msk_2021,34819508,PMC8613272,10.1038/s41525-021-00259-9,ok,
|
| 562 |
+
hcc_meric_2021,35508466,PMC9068765,10.1038/s41467-022-29960-8,ok,
|
| 563 |
+
hcc_clca_2024,38355797,PMC12548999,,error;elink,Identifier not found in PMC
|
| 564 |
+
kirc_bgi,22138691,PMC12432380,,error;elink,Identifier not found in PMC
|
| 565 |
+
kich_tcga_pub,25155756,PMC4160352,10.1016/j.ccr.2014.07.014,ok,
|
| 566 |
+
hnsc_jhu,21798897,PMC3162986,10.1126/science.1206923,ok,
|
| 567 |
+
hnsc_mdanderson_2013,23619168,PMC3858325,10.1158/2159-8290.CD-12-0537,ok,
|
| 568 |
+
ihch_smmu_2014,25526346,PMC12468851,,error;elink,Identifier not found in PMC
|
| 569 |
+
ihch_mskcc_2020,33963001,PMC8282702,10.1158/1078-0432.CCR-21-0412,ok,
|
| 570 |
+
ihch_msk_2021,33765338,PMC8713028,10.1002/hep.31829,ok,
|
| 571 |
+
lgg_ucsf_2014,24336570,PMC3998672,10.1126/science.1239947,ok,
|
| 572 |
+
lgggbm_tcga_pub,26824661,PMC4754110,10.1016/j.cell.2015.12.028,ok,
|
| 573 |
+
lihc_amc_prv,24798001,PMC12452113,,error;elink,Identifier not found in PMC
|
| 574 |
+
lihc_riken,22634756,PMC12529571,,error;elink,Identifier not found in PMC
|
| 575 |
+
luad_mskcc_2015,25765070,PMC4993154,10.1126/science.aaa1348,ok,
|
| 576 |
+
luad_broad,22980975,PMC3557932,10.1016/j.cell.2012.08.029,ok,
|
| 577 |
+
liad_inserm_fr_2014,24735922,PMC12261305,,error;elink,Identifier not found in PMC
|
| 578 |
+
lcll_broad_2013,23415222,PMC3575604,10.1016/j.cell.2013.01.019,ok,
|
| 579 |
+
luad_msk_npjpo_2021,34290393,PMC8295366,10.1038/s41698-021-00210-2,ok,
|
| 580 |
+
luad_cptac_2020,32649874,PMC7373300,10.1016/j.cell.2020.06.013,ok,
|
| 581 |
+
lusc_tcga_pub,22960745,PMC3466113,10.1038/nature11404,ok,
|
| 582 |
+
mbl_broad_2012,22820256,PMC3413789,10.1038/nature11329,ok,
|
| 583 |
+
mbl_icgc,22832583,PMC3662966,10.1038/nature11284,ok,
|
| 584 |
+
mbl_pcgp,22722829,PMC3412905,10.1038/nature11213,ok,
|
| 585 |
+
lung_msk_2017,28336552,PMC5482929,10.1158/2159-8290.CD-16-1337,ok,
|
| 586 |
+
luad_mskcc_2020,32791233,PMC7704768,10.1016/j.jtho.2020.08.005,ok,
|
| 587 |
+
luad_oncosg_2020,32015526,PMC12443039,,error;elink,Identifier not found in PMC
|
| 588 |
+
lung_smc_2016,27634761,PMC10937974,,error;elink,Identifier not found in PMC
|
| 589 |
+
lung_pdx_msk_2021,35440124,PMC9018685,10.1038/s41467-022-29794-4,ok,
|
| 590 |
+
mbl_dkfz_2017,28726821,PMC5905700,10.1038/nature22973,ok,
|
| 591 |
+
lusc_cptac_2021,34358469,PMC8475722,10.1016/j.cell.2021.07.016,ok,
|
| 592 |
+
lung_nci_2022,34493867,PMC8432745,10.1038/s41588-021-00920-0,ok,
|
| 593 |
+
mm_broad,24434212,PMC4241387,10.1016/j.ccr.2013.12.015,ok,
|
| 594 |
+
mcl_idibips_2013,24145436,PMC3831489,10.1073/pnas.1314608110,ok,
|
| 595 |
+
mds_tokyo_2011,21909114,PMC12550706,,error;elink,Identifier not found in PMC
|
| 596 |
+
mel_tsam_liang_2017,28373299,PMC5378171,10.1101/gr.213348.116,ok,
|
| 597 |
+
mel_ucla_2016,26997480,PMC4808437,10.1016/j.cell.2016.02.065,ok,
|
| 598 |
+
mixed_allen_2018,30150660,PMC6119118,10.1038/s41588-018-0200-2,ok,
|
| 599 |
+
mixed_selpercatinib_2020,35304457,PMC8933489,10.1038/s41467-022-28848-x,ok,
|
| 600 |
+
mixed_cfdna_msk_2020,34059130,PMC8165771,10.1186/s13073-021-00898-8,ok,
|
| 601 |
+
mel_dfci_2019,31792460,PMC6898788,10.1038/s41591-019-0654-5,ok,
|
| 602 |
+
mel_mskimpact_2020,33509808,PMC8046739,10.1158/1078-0432.CCR-20-4189,ok,
|
| 603 |
+
mbn_sfu_2023,36201743,PMC10023728,10.1182/blood.2022016534,ok,
|
| 604 |
+
mbn_msk_2024,38497151,PMC11215372,10.3324/haematol.2023.284565,ok,
|
| 605 |
+
npc_nusingapore,24952746,PMC12468275,,error;elink,Identifier not found in PMC
|
| 606 |
+
nepc_wcm_2016,26855148,PMC4777652,10.1038/nm.4045,ok,
|
| 607 |
+
nbl_ucologne_2015,26466568,PMC4881306,10.1038/nature14980,ok,
|
| 608 |
+
nbl_broad_2013,23334666,PMC3682833,10.1038/ng.2529,ok,
|
| 609 |
+
mrt_bcgsc_2016,26977886,PMC5094835,10.1016/j.ccell.2016.02.009,ok,
|
| 610 |
+
mpn_cimr_2013,24325359,PMC3966280,10.1056/NEJMoa1312542,ok,
|
| 611 |
+
nsclc_mskcc_2015,25765070,PMC4993154,10.1126/science.aaa1348,ok,
|
| 612 |
+
nsclc_mskcc_2018,29657128,PMC5953836,10.1016/j.ccell.2018.03.018,ok,
|
| 613 |
+
msk_access_2021,34145282,PMC8213710,10.1038/s41467-021-24109-5,ok,
|
| 614 |
+
mng_utoronto_2021,34433969,PMC11604310,10.1038/s41586-021-03850-3,ok,
|
| 615 |
+
mpnst_mskcc,25240281,PMC4249650,10.1038/ng.3095,ok,
|
| 616 |
+
nbl_amc_2012,22367537,PMC12508281,,error;elink,Identifier not found in PMC
|
| 617 |
+
nccrcc_genentech_2014,25401301,PMC4489427,10.1038/ng.3146,ok,
|
| 618 |
+
ov_tcga_pub,21720365,PMC3163504,10.1038/nature10166,ok,
|
| 619 |
+
paac_jhu_2014,24293293,PMC4048021,10.1002/path.4310,ok,
|
| 620 |
+
paad_icgc,23103869,PMC3530898,10.1038/nature11547,ok,
|
| 621 |
+
paad_utsw_2015,25855536,PMC4403382,10.1038/ncomms7744,ok,
|
| 622 |
+
nsclc_tcga_broad_2016,27158780,PMC4884143,10.1038/ng.3564,ok,
|
| 623 |
+
paad_qcmg_uq_2016,26909576,PMC12553238,,error;elink,Identifier not found in PMC
|
| 624 |
+
pact_jhu_2011,22158988,PMC3248495,10.1073/pnas.1118046108,ok,
|
| 625 |
+
nsclc_tracerx_2017,28445112,PMC12553238,,error;elink,Identifier not found in PMC
|
| 626 |
+
nsclc_tracerx_2017,28445469,PMC5812436,10.1038/nature22364,ok,
|
| 627 |
+
nsclc_pd1_msk_2018,29337640,PMC6075848,10.1200/JCO.2017.75.3384,ok,
|
| 628 |
+
ntrk_msk_2019,31871300,PMC7124988,10.1158/1078-0432.CCR-19-3165,ok,
|
| 629 |
+
pan_origimed_2020,35871175,PMC9308789,10.1038/s41467-022-31780-9,ok,
|
| 630 |
+
paad_cptac_2021,34534465,PMC8654574,10.1016/j.cell.2021.08.023,ok,
|
| 631 |
+
nst_nfosi_ntap,32561749,PMC7305302,10.1038/s41597-020-0508-5,ok,
|
| 632 |
+
panet_jhu_2011,21252315,PMC3144496,10.1126/science.1200609,ok,
|
| 633 |
+
pcnsl_mayo_2015,25991819,PMC4558226,10.1158/1078-0432.CCR-14-2116,ok,
|
| 634 |
+
prad_broad,22610119,PMC3673022,10.1038/ng.2279,ok,
|
| 635 |
+
crc_hta11_htan_2021,34910928,PMC8941949,10.1016/j.cell.2021.11.031,ok,
|
| 636 |
+
panet_shanghai_2013,24326773,PMC12056210,,error;elink,Identifier not found in PMC
|
| 637 |
+
plmeso_nyu_2015,25488749,PMC12404184,,error;elink,Identifier not found in PMC
|
| 638 |
+
prad_cpcg_2017,28068672,PMC12513462,,error;elink,Identifier not found in PMC
|
| 639 |
+
panet_arcnet_2017,28199314,PMC12533323,,error;elink,Identifier not found in PMC
|
| 640 |
+
past_dkfz_heidelberg_2013,23817572,PMC3951336,10.1038/ng.2682,ok,
|
| 641 |
+
prad_eururol_2017,28927585,PMC12508145,,error;elink,Identifier not found in PMC
|
| 642 |
+
prad_fhcrc,26928463,PMC5045679,10.1038/nm.4053,ok,
|
| 643 |
+
prad_mich,22722839,PMC3396711,10.1038/nature11125,ok,
|
| 644 |
+
prad_mskcc_2014,25024180,PMC4121784,10.1073/pnas.1411446111,ok,
|
| 645 |
+
prad_su2c_2015,26000489,PMC4484602,10.1016/j.cell.2015.05.001,ok,
|
| 646 |
+
prad_mskcc_2017,28825054,PMC5558263,10.1200/PO.17.00029,ok,
|
| 647 |
+
prad_p1000,29610475,PMC6107367,10.1038/s41588-018-0078-z,ok,
|
| 648 |
+
prad_su2c_2019,31061129,PMC6561293,10.1073/pnas.1902651116,ok,
|
| 649 |
+
prostate_dkfz_2018,30537516,PMC7444093,10.1016/j.ccell.2018.10.016,ok,
|
| 650 |
+
prad_msk_2019,31564440,PMC6949382,10.1016/j.cmet.2019.08.024,ok,
|
| 651 |
+
prad_mskcc_cheny1_organoids_2014,25201530,PMC4237931,10.1016/j.cell.2014.08.016,ok,
|
| 652 |
+
prad_mcspc_mskcc_2020,32220891,PMC7334067,10.1158/1078-0432.CCR-20-0168,ok,
|
| 653 |
+
prad_msk_stopsack_2021,34667026,PMC8776579,10.1158/1078-0432.CCR-21-2577,ok,
|
| 654 |
+
prostate_pcbm_swiss_2019,35504881,PMC9065149,10.1038/s41467-022-30003-5,ok,
|
| 655 |
+
sclc_clcgp,22941188,PMC4915822,10.1038/ng.2396,ok,
|
| 656 |
+
sclc_jhu,22941189,PMC3557461,10.1038/ng.2405,ok,
|
| 657 |
+
skcm_broad,22817889,PMC3600117,10.1016/j.cell.2012.06.024,ok,
|
| 658 |
+
rms_nih_2014,24436047,PMC4462130,10.1158/2159-8290.CD-13-0639,ok,
|
| 659 |
+
sarc_tcga_pub,29100075,PMC5693358,10.1016/j.cell.2017.10.014,ok,
|
| 660 |
+
sclc_cancercell_gardner_2017,28196596,PMC5313262,10.1016/j.ccell.2017.01.006,ok,
|
| 661 |
+
sclc_ucologne_2015,26168399,PMC4861069,10.1038/nature14664,ok,
|
| 662 |
+
sarcoma_mskcc_2022,35705560,PMC9200818,10.1038/s41467-022-30453-x,ok,
|
| 663 |
+
skcm_broad_dfarber,22622578,PMC3367798,10.1038/nature11071,ok,
|
| 664 |
+
skcm_yale,22842228,PMC3432702,10.1038/ng.2359,ok,
|
| 665 |
+
stad_pfizer_uhongkong,24816253,PMC12468435,,error;elink,Identifier not found in PMC
|
| 666 |
+
skcm_broad_brafresist_2012,24265153,PMC3947264,10.1158/2159-8290.CD-13-0617,ok,
|
| 667 |
+
skcm_mskcc_2014,25409260,PMC4315319,10.1056/NEJMoa1406498,ok,
|
| 668 |
+
skcm_tcga_pub_2015,26091043,PMC4580370,10.1016/j.cell.2015.05.044,ok,
|
| 669 |
+
skcm_dfci_2015,26359337,PMC5054517,10.1126/science.aad0095,ok,
|
| 670 |
+
stad_uhongkong,22037554,PMC12489181,,error;elink,Identifier not found in PMC
|
| 671 |
+
stad_utokyo,24816255,PMC12523349,,error;elink,Identifier not found in PMC
|
| 672 |
+
tet_nci_2014,24974848,PMC5705185,10.1038/ng.3016,ok,
|
| 673 |
+
thyroid_mskcc_2016,26878173,PMC4767360,10.1172/JCI85271,ok,
|
| 674 |
+
stes_tcga_pub,28052061,PMC5651175,10.1038/nature20805,ok,
|
| 675 |
+
summit_2018,29420467,PMC5808581,10.1038/nature25475,ok,
|
| 676 |
+
stmyec_wcm_2022,36577525,PMC9808553,10.1101/mcs.a006227,ok,
|
| 677 |
+
ucs_jhu_2014,25233892,PMC4354107,10.1038/ncomms6006,ok,
|
| 678 |
+
ucec_tcga_pub,23636398,PMC3704730,10.1038/nature12113,ok,
|
| 679 |
+
um_qimr_2016,26683228,PMC4826231,10.18632/oncotarget.6614,ok,
|
| 680 |
+
ucec_msk_2018,30068706,PMC6279519,10.1158/1078-0432.CCR-18-0412,ok,
|
| 681 |
+
uccc_nih_2017,28485815,PMC5587124,10.1002/cncr.30745,ok,
|
| 682 |
+
tmb_mskcc_2018,30643254,PMC6365097,10.1038/s41588-018-0312-8,ok,
|
| 683 |
+
ucec_cptac_2020,32059776,PMC7233456,10.1016/j.cell.2020.01.026,ok,
|
| 684 |
+
ucec_ccr_cfdna_msk_2022,36007103,PMC9852004,10.1158/1078-0432.CCR-22-1134,ok,
|
| 685 |
+
vsc_cuk_2018,29422544,PMC5903820,10.1038/emm.2017.265,ok,
|
| 686 |
+
utuc_cornell_baylor_mdacc_2019,31278255,PMC6611775,10.1038/s41467-019-10873-y,ok,
|
| 687 |
+
usarc_msk_2020,32299819,PMC7367750,10.1158/1078-0432.CCR-19-3959,ok,
|
| 688 |
+
utuc_igbmc_2021,33397444,PMC7780630,10.1186/s13059-020-02230-w,ok,
|
| 689 |
+
lgg_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 690 |
+
lgg_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 691 |
+
lgg_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 692 |
+
lgg_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 693 |
+
lgg_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 694 |
+
lgg_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 695 |
+
lgg_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 696 |
+
lgg_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 697 |
+
lgg_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 698 |
+
lgg_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 699 |
+
lgg_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 700 |
+
lgg_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 701 |
+
crc_orion_2024,39386479,PMC11463659,10.1101/2024.09.24.614701,ok,
|
| 702 |
+
brca_aurora_2023,36585450,PMC9886551,10.1038/s43018-022-00491-x,ok,
|
| 703 |
+
schw_ctf_synodos_2025,33025139,PMC7785562,10.1007/s00401-020-02230-x,ok,
|
| 704 |
+
ovary_geomx_gray_foundation_2024,39386723,PMC11463462,10.1101/2024.09.25.615007,ok,
|
| 705 |
+
brca_tcga_pub2015,26451490,PMC4603750,10.1016/j.cell.2015.09.033,ok,
|
| 706 |
+
hnsc_tcga_pub,25631445,PMC4311405,10.1038/nature14129,ok,
|
| 707 |
+
luad_tcga_pub,25079552,PMC4231481,10.1038/nature13385,ok,
|
| 708 |
+
thca_tcga_pub,25417114,PMC4243044,10.1016/j.cell.2014.09.050,ok,
|
| 709 |
+
blca_tcga_pub,24476821,PMC3962515,10.1038/nature12965,ok,
|
| 710 |
+
msk_ch_2020,33106634,PMC7891089,10.1038/s41588-020-00710-0,ok,
|
| 711 |
+
msk_spectrum_tme_2022,36517593,PMC9771812,10.1038/s41586-022-05496-1,ok,
|
| 712 |
+
pancan_mimsi_msk_2024,39746944,PMC11696176,10.1038/s41467-024-54970-z,ok,
|
| 713 |
+
mel_iatlas_riaz_nivolumab_2017,29033130,PMC5685550,10.1016/j.cell.2017.09.028,ok,
|
| 714 |
+
stad_oncosg_2018,29670109,PMC5906695,10.1038/s41467-018-03828-2,ok,
|
| 715 |
+
gbm_tcga_pub,18772890,PMC2671642,10.1038/nature07385,ok,
|
| 716 |
+
gbm_tcga_pub2013,24120142,PMC3910500,10.1016/j.cell.2013.09.034,ok,
|
| 717 |
+
odg_msk_2017,28472509,PMC5596171,10.1093/neuonc/nox086,ok,
|
| 718 |
+
gbm_tcga_pan_can_atlas_2018,29625048,PMC5957518,10.1016/j.cell.2018.03.022,ok,
|
| 719 |
+
gbm_tcga_pan_can_atlas_2018,29596782,PMC6075717,10.1016/j.cels.2018.03.002,ok,
|
| 720 |
+
gbm_tcga_pan_can_atlas_2018,29622463,PMC6028190,10.1016/j.ccell.2018.03.007,ok,
|
| 721 |
+
gbm_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 722 |
+
gbm_tcga_pan_can_atlas_2018,29625055,PMC6066282,10.1016/j.cell.2018.02.052,ok,
|
| 723 |
+
gbm_tcga_pan_can_atlas_2018,29625050,PMC6070353,10.1016/j.cell.2018.03.035,ok,
|
| 724 |
+
gbm_tcga_pan_can_atlas_2018,29617662,PMC5916809,10.1016/j.celrep.2018.03.050,ok,
|
| 725 |
+
gbm_tcga_pan_can_atlas_2018,30643250,PMC12521747,,error;elink,Identifier not found in PMC
|
| 726 |
+
gbm_tcga_pan_can_atlas_2018,32214244,PMC7500457,10.1038/s41586-020-2095-1,ok,
|
| 727 |
+
gbm_tcga_pan_can_atlas_2018,29625049,PMC5916814,10.1016/j.cell.2018.03.033,ok,
|
| 728 |
+
gbm_tcga_pan_can_atlas_2018,29850653,PMC5972025,10.1200/PO.17.00073,ok,
|
| 729 |
+
gbm_tcga_pan_can_atlas_2018,36334560,PMC12390932,,error;elink,Identifier not found in PMC
|
| 730 |
+
gbm_mayo_pdx_sarkaria_2019,31852831,PMC7056576,10.1158/1078-0432.CCR-19-0909,ok,
|
| 731 |
+
gbm_columbia_2019,30742119,PMC6810613,10.1038/s41591-019-0349-y,ok,
|
| 732 |
+
gbm_cptac_2021,33577785,PMC8044053,10.1016/j.ccell.2021.01.006,ok,
|
| 733 |
+
msk_impact_2017,28481359,PMC5461196,10.1038/nm.4333,ok,
|
pull_pdfs.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import csv, sys, time, requests
|
| 3 |
+
|
| 4 |
+
BASE = "https://www.cbioportal.org/api"
|
| 5 |
+
HEADERS = {"Accept": "application/json"} # add 'X-API-KEY' here if your instance needs it
|
| 6 |
+
|
| 7 |
+
def get_all_studies(page_size=500):
|
| 8 |
+
# cBioPortal API supports paging via pageSize/pageNumber
|
| 9 |
+
studies = []
|
| 10 |
+
page = 0
|
| 11 |
+
while True:
|
| 12 |
+
params = {"pageSize": page_size, "pageNumber": page}
|
| 13 |
+
r = requests.get(f"{BASE}/studies", headers=HEADERS, params=params, timeout=60)
|
| 14 |
+
r.raise_for_status()
|
| 15 |
+
batch = r.json()
|
| 16 |
+
if not batch:
|
| 17 |
+
break
|
| 18 |
+
studies.extend(batch)
|
| 19 |
+
page += 1
|
| 20 |
+
# friendly throttle
|
| 21 |
+
time.sleep(0.2)
|
| 22 |
+
return studies
|
| 23 |
+
|
| 24 |
+
def to_list(x):
|
| 25 |
+
if x is None:
|
| 26 |
+
return []
|
| 27 |
+
if isinstance(x, list):
|
| 28 |
+
return x
|
| 29 |
+
# some portals store comma-separated string
|
| 30 |
+
return [s.strip() for s in str(x).split(",") if s.strip()]
|
| 31 |
+
|
| 32 |
+
def main(out_csv="cbioportal_study_pmids.csv"):
|
| 33 |
+
studies = get_all_studies()
|
| 34 |
+
# fields commonly present: studyId, name, shortName, cancerTypeId, description, citation, pmid, etc.
|
| 35 |
+
rows = []
|
| 36 |
+
for s in studies:
|
| 37 |
+
pmids = to_list(s.get("pmid"))
|
| 38 |
+
for pmid in pmids:
|
| 39 |
+
rows.append({
|
| 40 |
+
"studyId": s.get("studyId"),
|
| 41 |
+
#"name": s.get("name"),
|
| 42 |
+
#"pmids": ";".join(pmids) if pmids else ""
|
| 43 |
+
"pmid": pmid
|
| 44 |
+
})
|
| 45 |
+
# write CSV
|
| 46 |
+
with open(out_csv, "w", newline="", encoding="utf-8") as f:
|
| 47 |
+
#w = csv.DictWriter(f, fieldnames=["studyId", "name", "pmids"])
|
| 48 |
+
w = csv.DictWriter(f, fieldnames=["studyId", "pmids"])
|
| 49 |
+
w.writeheader()
|
| 50 |
+
w.writerows(rows)
|
| 51 |
+
print(f"wrote {len(rows)} rows to {out_csv}")
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
out = sys.argv[1] if len(sys.argv) > 1 else "cbioportal_study_pmids.csv"
|
| 55 |
+
main(out)
|
requirements.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
langchain
|
| 3 |
+
sqlalchemy
|
| 4 |
+
langchain-community
|
| 5 |
+
langchain-openai
|
| 6 |
+
pypdf
|
| 7 |
+
tiktoken
|
| 8 |
+
openai
|
| 9 |
+
langchain-text-splitters
|
| 10 |
+
pdfplumber
|
| 11 |
+
pillow
|
| 12 |
+
sentence-transformers
|
| 13 |
+
faiss-cpu
|
| 14 |
+
spacy
|
| 15 |
+
tqdm
|
| 16 |
+
fastmcp
|
| 17 |
+
|
| 18 |
+
# Only needed if you use --ocr
|
| 19 |
+
pdf2image
|
| 20 |
+
pytesseract
|
unfetched_pmcids.tsv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PMC12088707 PMC12088707
|
| 2 |
+
PMC12404184 PMC12404184
|