fromozu commited on
Commit
5eb186c
·
verified ·
1 Parent(s): 6fb3872

Upload hf_backend/config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/config.py +171 -0
hf_backend/config.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass
9
+ class AppConfig:
10
+ dataset_repo_id: str
11
+ hf_token: str
12
+ shared_token: str
13
+ resend_api_key: str
14
+ resend_api_base: str
15
+ from_email: str
16
+ kindle_email_default: str
17
+ translation_model: str
18
+ default_language: str
19
+ batch_size: int
20
+ epub_accumulated_num: int
21
+ epub_backmatter_skip_after_percent: int
22
+ epub_backmatter_titles: str
23
+ gemini_review_mode: str
24
+ gemini_review_min_chinese_ratio: float
25
+ gemini_review_length_ratio_min: float
26
+ gemini_review_length_ratio_max: float
27
+ api_base: str
28
+ ebook_convert_bin: str
29
+ custom_api: str
30
+ gemini_key: str
31
+ claude_key: str
32
+ deepl_key: str
33
+ caiyun_key: str
34
+ openai_key: str
35
+ manifest_path: str
36
+ inbox_prefix: str
37
+ done_prefix: str
38
+ work_dir: Path
39
+ max_jobs_per_run: int
40
+ lock_timeout_minutes: int
41
+ auto_resume_stale_minutes: int
42
+ max_auto_resume_attempts: int
43
+ epub_checkpoint_interval: int
44
+ weekly_url_template: str
45
+ weekly_filename_template: str
46
+ fetch_user_agent: str
47
+ fetch_timeout_seconds: int
48
+ standard_ebooks_search_url: str
49
+ project_gutenberg_search_url: str
50
+ internet_archive_advancedsearch_url: str
51
+ internet_archive_metadata_url_template: str
52
+ src_a_search_url: str
53
+ src_a_base_url: str
54
+ src_b_base_url: str
55
+
56
+ @property
57
+ def repo_root(self) -> Path:
58
+ return Path(__file__).resolve().parent.parent
59
+
60
+ @property
61
+ def make_book_script(self) -> Path:
62
+ return self.repo_root / "bilingual_book_maker" / "make_book.py"
63
+
64
+ def resolve_model(self, preferred: str | None = None) -> str:
65
+ model = (preferred or self.translation_model).strip()
66
+ if model == "google":
67
+ return "gemini"
68
+ return model
69
+
70
+ def resolve_language(self, preferred: str | None = None) -> str:
71
+ return (preferred or self.default_language).strip()
72
+
73
+
74
+ def load_config() -> AppConfig:
75
+ dataset_repo_id = os.getenv("HF_DATASET_REPO_ID", "").strip()
76
+ hf_token = os.getenv("HF_TOKEN", "").strip()
77
+ shared_token = os.getenv("RUN_SHARED_TOKEN", "").strip()
78
+ if not dataset_repo_id:
79
+ raise RuntimeError("HF_DATASET_REPO_ID is required")
80
+ if not hf_token:
81
+ raise RuntimeError("HF_TOKEN is required")
82
+ if not shared_token:
83
+ raise RuntimeError("RUN_SHARED_TOKEN is required")
84
+
85
+ work_dir = Path(os.getenv("WORK_DIR", "/tmp/ebook-work")).resolve()
86
+ work_dir.mkdir(parents=True, exist_ok=True)
87
+
88
+ gemini_key = (
89
+ os.getenv("BBM_GEMINI_ROUTER_API_KEY", "").strip()
90
+ or os.getenv("BBM_GOOGLE_GEMINI_KEY", "").strip()
91
+ )
92
+
93
+ return AppConfig(
94
+ dataset_repo_id=dataset_repo_id,
95
+ hf_token=hf_token,
96
+ shared_token=shared_token,
97
+ resend_api_key=os.getenv("RESEND_API_KEY", "").strip(),
98
+ resend_api_base=os.getenv("RESEND_API_BASE", "https://api.resend.com").strip(),
99
+ from_email=os.getenv("FROM_EMAIL", "").strip(),
100
+ kindle_email_default=os.getenv("KINDLE_EMAIL_DEFAULT", "").strip(),
101
+ translation_model=os.getenv("TRANSLATION_MODEL", "gemini").strip(),
102
+ default_language=os.getenv("DEFAULT_LANGUAGE", "zh-hans").strip(),
103
+ batch_size=int(os.getenv("TRANSLATION_BATCH_SIZE", "10")),
104
+ epub_accumulated_num=int(os.getenv("EPUB_ACCUMULATED_NUM", "2200")),
105
+ epub_backmatter_skip_after_percent=int(
106
+ os.getenv("EPUB_BACKMATTER_SKIP_AFTER_PERCENT", "50")
107
+ ),
108
+ epub_backmatter_titles=os.getenv(
109
+ "EPUB_BACKMATTER_TITLES",
110
+ "about the author,notes about the author,notes on the author,biography,biographical note,credits,acknowledgments,acknowledgements,copyright,bibliography,index,endnotes,notes,further reading,also by the author,about this book,about this ebook",
111
+ ).strip(),
112
+ gemini_review_mode=os.getenv(
113
+ "GEMINI_REVIEW_MODE",
114
+ "suspicious_only",
115
+ ).strip(),
116
+ gemini_review_min_chinese_ratio=float(
117
+ os.getenv("GEMINI_REVIEW_MIN_CHINESE_RATIO", "0.2")
118
+ ),
119
+ gemini_review_length_ratio_min=float(
120
+ os.getenv("GEMINI_REVIEW_LENGTH_RATIO_MIN", "0.35")
121
+ ),
122
+ gemini_review_length_ratio_max=float(
123
+ os.getenv("GEMINI_REVIEW_LENGTH_RATIO_MAX", "2.5")
124
+ ),
125
+ api_base=os.getenv("TRANSLATION_API_BASE", "").strip(),
126
+ ebook_convert_bin=os.getenv("EBOOK_CONVERT_BIN", "ebook-convert").strip(),
127
+ custom_api=os.getenv("BBM_CUSTOM_API", "").strip(),
128
+ gemini_key=gemini_key,
129
+ claude_key=os.getenv("BBM_CLAUDE_API_KEY", "").strip(),
130
+ deepl_key=os.getenv("BBM_DEEPL_API_KEY", "").strip(),
131
+ caiyun_key=os.getenv("BBM_CAIYUN_API_KEY", "").strip(),
132
+ openai_key=os.getenv("BBM_OPENAI_API_KEY", "").strip(),
133
+ manifest_path=os.getenv("HF_MANIFEST_PATH", "jobs/index.json").strip(),
134
+ inbox_prefix=os.getenv("HF_INBOX_PREFIX", "inbox").strip(),
135
+ done_prefix=os.getenv("HF_DONE_PREFIX", "done").strip(),
136
+ work_dir=work_dir,
137
+ max_jobs_per_run=int(os.getenv("MAX_JOBS_PER_RUN", "1")),
138
+ lock_timeout_minutes=int(os.getenv("LOCK_TIMEOUT_MINUTES", "2400")),
139
+ auto_resume_stale_minutes=int(os.getenv("AUTO_RESUME_STALE_MINUTES", "20")),
140
+ max_auto_resume_attempts=int(os.getenv("MAX_AUTO_RESUME_ATTEMPTS", "30")),
141
+ epub_checkpoint_interval=int(os.getenv("EPUB_CHECKPOINT_INTERVAL", "50")),
142
+ weekly_url_template=os.getenv(
143
+ "WEEKLY_SOURCE_URL_TEMPLATE",
144
+ "https://github.com/hehonghui/awesome-english-ebooks/raw/refs/heads/master/01_economist/te_{date}/TheEconomist.{date}.epub",
145
+ ).strip(),
146
+ weekly_filename_template=os.getenv(
147
+ "WEEKLY_SOURCE_FILENAME_TEMPLATE",
148
+ "TheEconomist.{date}.epub",
149
+ ).strip(),
150
+ fetch_user_agent=os.getenv("FETCH_USER_AGENT", "ebook-fetcher/1.0").strip(),
151
+ fetch_timeout_seconds=int(os.getenv("FETCH_TIMEOUT_SECONDS", "30")),
152
+ standard_ebooks_search_url=os.getenv(
153
+ "STANDARD_EBOOKS_SEARCH_URL",
154
+ "https://standardebooks.org/ebooks",
155
+ ).strip(),
156
+ project_gutenberg_search_url=os.getenv(
157
+ "PROJECT_GUTENBERG_SEARCH_URL",
158
+ "https://www.gutenberg.org/ebooks/search/",
159
+ ).strip(),
160
+ internet_archive_advancedsearch_url=os.getenv(
161
+ "INTERNET_ARCHIVE_ADVANCEDSEARCH_URL",
162
+ "https://archive.org/advancedsearch.php",
163
+ ).strip(),
164
+ internet_archive_metadata_url_template=os.getenv(
165
+ "INTERNET_ARCHIVE_METADATA_URL_TEMPLATE",
166
+ "https://archive.org/metadata/{identifier}",
167
+ ).strip(),
168
+ src_a_search_url=os.getenv("SRC_A_SEARCH_URL", "").strip(),
169
+ src_a_base_url=os.getenv("SRC_A_BASE_URL", "").strip(),
170
+ src_b_base_url=os.getenv("SRC_B_BASE_URL", "").strip(),
171
+ )