File size: 3,282 Bytes
fcee306
 
 
 
 
ec0e654
fcee306
 
 
 
 
 
 
 
 
 
 
 
 
 
6727dbb
fcee306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec0e654
fcee306
 
 
 
 
 
 
 
 
 
ec0e654
fcee306
 
ec0e654
 
 
 
 
fcee306
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import shutil
import zipfile
import subprocess
import requests
import stat
import sys

from git import Repo
import gdown


def run(cmd, cwd=None):
    """Run a system command with error checking."""
    print(f"Running: {cmd}")
    subprocess.run(cmd, check=True, cwd=cwd)


def check_needs_building():
    return not os.path.exists("fast_align") or not os.path.exists("atools") or not os.path.isdir(
        "okapi-apps_gtk2-linux-x86_64_1.47.0") or not os.path.isdir("fast_align_config")


def main():
    # -------------------------------------------------------------------
    # 1. Clone and compile fast_align
    # -------------------------------------------------------------------

    print("Cloning fast_align repository...")
    Repo.clone_from("https://github.com/clab/fast_align.git", "fast_align_src")

    # Create build directory
    build_dir = "fast_align_src/build"
    os.makedirs(build_dir, exist_ok=True)

    print("Running CMake...")
    run(["cmake", "-DCMAKE_POLICY_VERSION_MINIMUM=3.5", ".."], cwd=build_dir)

    print("Running make...")
    run(["make"], cwd=build_dir)

    # Copy binaries
    shutil.copy("fast_align_src/build/fast_align", ".")
    shutil.copy("fast_align_src/build/atools", ".")

    # Cleanup
    shutil.rmtree("fast_align_src")

    # -------------------------------------------------------------------
    # 2. Download fast_align config files
    # -------------------------------------------------------------------

    print("Downloading fast_align_config.zip using gdown...")
    gdown.download(
        "https://drive.google.com/uc?id=1OS-qbYLAgLJ2n4cpk9usNdTcNARjNaSr",
        "fast_align_config.zip",
        quiet=False
    )

    print("Unzipping config...")
    with zipfile.ZipFile("fast_align_config.zip", "r") as z:
        z.extractall(".")

    os.remove("fast_align_config.zip")

    # -------------------------------------------------------------------
    # 3. Download Okapi Tikal
    # -------------------------------------------------------------------

    url = "https://okapiframework.org/binaries/main/1.47.0/okapi-apps_gtk2-linux-x86_64_1.47.0.zip"
    outfile = "okapi-apps_gtk2-linux-x86_64_1.47.0.zip"
    outfolder = "okapi-apps_gtk2-linux-x86_64_1.47.0"

    print("Downloading Okapi Tikal...")
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(outfile, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)

    print("Unzipping Okapi Tikal...")
    with zipfile.ZipFile(outfile, "r") as z:
        z.extractall(outfolder)

    os.remove(outfile)
    tikal_file = os.path.join(outfolder, "tikal.sh")
    current_permissions = os.stat(tikal_file).st_mode

    # Add execute permissions for user, group, and others
    os.chmod(tikal_file, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

    # -------------------------------------------------------------------
    # 4. Install PspaCy model
    # -------------------------------------------------------------------

    print("Downloading spaCy model...")
    run([sys.executable, "-m", "spacy", "download", "xx_ent_wiki_sm"])

    print("\n All building tasks completed successfully!")


if __name__ == '__main__':
    main()