File size: 2,509 Bytes
e7fe90a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
"""

extract_chain_seq.py



Extracts the 1-letter amino acid sequence for a specified chain

from a PDB or mmCIF file.



Usage:

    python extract_chain_seq.py <input_file> <chain_id>



Outputs:

    The amino acid sequence on stdout (no header).

"""

import sys
import gemmi
from Bio.PDB import PDBParser
# --- Robust import for three_to_one across Biopython versions ---
try:
    from Bio.PDB.Polypeptide import three_to_one
except ImportError:
    try:
        from Bio.SeqUtils import seq1 as three_to_one
    except ImportError:
        from Bio.PDB.Polypeptide import protein_letters_3to1

        def three_to_one(resname):
            resname = (resname or "").strip().upper()
            if resname == "MSE":
                resname = "MET"
            return protein_letters_3to1.get(resname.title(), "X")

if len(sys.argv) != 3:
    print("Usage: python extract_chain_seq.py <input_file> <chain_id>")
    sys.exit(1)

path = sys.argv[1]
chain_id = sys.argv[2]
ext = path.lower().split(".")[-1]


def seq_from_pdb(pdb_path, chain):
    parser = PDBParser(QUIET=True)
    try:
        structure = parser.get_structure("x", pdb_path)
    except Exception as e:
        sys.stderr.write(f"Error reading PDB: {e}\n")
        return ""

    for model in structure:
        for c in model:
            if c.id == chain:
                seq = ""
                for res in c:
                    try:
                        seq += three_to_one(res.get_resname())
                    except KeyError:
                        pass
                return seq
    return ""


def seq_from_cif(cif_path, chain):
    try:
        doc = gemmi.cif.read_file(cif_path)
        structure = gemmi.make_structure_from_block(doc.sole_block())
    except Exception as e:
        sys.stderr.write(f"Error reading CIF: {e}\n")
        return ""

    for model in structure:
        for c in model:
            if c.name == chain:
                seq = ""
                for res in c:
                    if res.is_amino_acid():
                        seq += gemmi.residue_name_3to1.get(res.name, "")
                return seq
    return ""


# Select correct parser
if ext == "pdb":
    seq = seq_from_pdb(path, chain_id)
elif ext in ("cif", "mmcif", "mcif"):
    seq = seq_from_cif(path, chain_id)
else:
    sys.stderr.write(f"Unsupported file extension: {ext}\n")
    seq = ""

# Output sequence
print(seq)