gordonchan commited on
Commit
8efb4df
·
verified ·
1 Parent(s): e2f2d2f

Delete crazy_functions

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. crazy_functions/CodeInterpreter.py +0 -232
  2. crazy_functions/Langchain知识库.py +0 -106
  3. crazy_functions/Latex全文润色.py +0 -245
  4. crazy_functions/Latex全文翻译.py +0 -176
  5. crazy_functions/Latex输出PDF结果.py +0 -306
  6. crazy_functions/__init__.py +0 -0
  7. crazy_functions/agent_fns/auto_agent.py +0 -23
  8. crazy_functions/agent_fns/echo_agent.py +0 -19
  9. crazy_functions/agent_fns/general.py +0 -134
  10. crazy_functions/agent_fns/persistent.py +0 -16
  11. crazy_functions/agent_fns/pipe.py +0 -194
  12. crazy_functions/agent_fns/watchdog.py +0 -28
  13. crazy_functions/chatglm微调工具.py +0 -141
  14. crazy_functions/crazy_functions_test.py +0 -231
  15. crazy_functions/crazy_utils.py +0 -606
  16. crazy_functions/game_fns/game_ascii_art.py +0 -42
  17. crazy_functions/game_fns/game_interactive_story.py +0 -212
  18. crazy_functions/game_fns/game_utils.py +0 -35
  19. crazy_functions/gen_fns/gen_fns_shared.py +0 -70
  20. crazy_functions/ipc_fns/mp.py +0 -37
  21. crazy_functions/json_fns/pydantic_io.py +0 -111
  22. crazy_functions/latex_fns/latex_actions.py +0 -467
  23. crazy_functions/latex_fns/latex_toolbox.py +0 -562
  24. crazy_functions/latex_utils.py +0 -788
  25. crazy_functions/live_audio/aliyunASR.py +0 -261
  26. crazy_functions/live_audio/audio_io.py +0 -51
  27. crazy_functions/multi_stage/multi_stage_utils.py +0 -93
  28. crazy_functions/pdf_fns/breakdown_txt.py +0 -125
  29. crazy_functions/pdf_fns/parse_pdf.py +0 -171
  30. crazy_functions/pdf_fns/report_gen_html.py +0 -58
  31. crazy_functions/pdf_fns/report_template.html +0 -0
  32. crazy_functions/test_project/cpp/cppipc/buffer.cpp +0 -87
  33. crazy_functions/test_project/cpp/cppipc/ipc.cpp +0 -701
  34. crazy_functions/test_project/cpp/cppipc/policy.h +0 -25
  35. crazy_functions/test_project/cpp/cppipc/pool_alloc.cpp +0 -17
  36. crazy_functions/test_project/cpp/cppipc/prod_cons.h +0 -433
  37. crazy_functions/test_project/cpp/cppipc/queue.h +0 -216
  38. crazy_functions/test_project/cpp/cppipc/shm.cpp +0 -103
  39. crazy_functions/test_project/cpp/cppipc/waiter.h +0 -83
  40. crazy_functions/test_project/cpp/cppipc/来源 +0 -3
  41. crazy_functions/test_project/cpp/libJPG/jpgd.cpp +0 -3276
  42. crazy_functions/test_project/cpp/libJPG/jpgd.h +0 -316
  43. crazy_functions/test_project/cpp/libJPG/jpge.cpp +0 -1049
  44. crazy_functions/test_project/cpp/libJPG/jpge.h +0 -172
  45. crazy_functions/test_project/cpp/libJPG/来源 +0 -3
  46. crazy_functions/test_project/cpp/longcode/jpgd.cpp +0 -3276
  47. crazy_functions/test_project/cpp/longcode/jpge.cpp +0 -1049
  48. crazy_functions/test_project/cpp/longcode/prod_cons.h +0 -433
  49. crazy_functions/test_project/latex/attention/background.tex +0 -58
  50. crazy_functions/test_project/latex/attention/introduction.tex +0 -18
crazy_functions/CodeInterpreter.py DELETED
@@ -1,232 +0,0 @@
1
- from collections.abc import Callable, Iterable, Mapping
2
- from typing import Any
3
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
4
- from toolbox import promote_file_to_downloadzone, get_log_folder
5
- from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
6
- from .crazy_utils import input_clipping, try_install_deps
7
- from multiprocessing import Process, Pipe
8
- import os
9
- import time
10
-
11
- templete = """
12
- ```python
13
- import ... # Put dependencies here, e.g. import numpy as np
14
-
15
- class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
16
-
17
- def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
18
- # rewrite the function you have just written here
19
- ...
20
- return generated_file_path
21
- ```
22
- """
23
-
24
- def inspect_dependency(chatbot, history):
25
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
26
- return True
27
-
28
- def get_code_block(reply):
29
- import re
30
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
31
- matches = re.findall(pattern, reply) # find all code blocks in text
32
- if len(matches) == 1:
33
- return matches[0].strip('python') # code block
34
- for match in matches:
35
- if 'class TerminalFunction' in match:
36
- return match.strip('python') # code block
37
- raise RuntimeError("GPT is not generating proper code.")
38
-
39
- def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
40
- # 输入
41
- prompt_compose = [
42
- f'Your job:\n'
43
- f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
44
- f"2. You should write this function to perform following task: " + txt + "\n",
45
- f"3. Wrap the output python function with markdown codeblock."
46
- ]
47
- i_say = "".join(prompt_compose)
48
- demo = []
49
-
50
- # 第一步
51
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
52
- inputs=i_say, inputs_show_user=i_say,
53
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
54
- sys_prompt= r"You are a programmer."
55
- )
56
- history.extend([i_say, gpt_say])
57
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
58
-
59
- # 第二步
60
- prompt_compose = [
61
- "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
62
- templete
63
- ]
64
- i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
65
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
66
- inputs=i_say, inputs_show_user=inputs_show_user,
67
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
68
- sys_prompt= r"You are a programmer."
69
- )
70
- code_to_return = gpt_say
71
- history.extend([i_say, gpt_say])
72
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
73
-
74
- # # 第三步
75
- # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
76
- # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
77
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
78
- # inputs=i_say, inputs_show_user=inputs_show_user,
79
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
80
- # sys_prompt= r"You are a programmer."
81
- # )
82
- # # # 第三步
83
- # i_say = "Show me how to use `pip` to install packages to run the code above. "
84
- # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
85
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
86
- # inputs=i_say, inputs_show_user=i_say,
87
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
88
- # sys_prompt= r"You are a programmer."
89
- # )
90
- installation_advance = ""
91
-
92
- return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
93
-
94
- def make_module(code):
95
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
96
- with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
97
- f.write(code)
98
-
99
- def get_class_name(class_string):
100
- import re
101
- # Use regex to extract the class name
102
- class_name = re.search(r'class (\w+)\(', class_string).group(1)
103
- return class_name
104
-
105
- class_name = get_class_name(code)
106
- return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
107
-
108
- def init_module_instance(module):
109
- import importlib
110
- module_, class_ = module.split('->')
111
- init_f = getattr(importlib.import_module(module_), class_)
112
- return init_f()
113
-
114
- def for_immediate_show_off_when_possible(file_type, fp, chatbot):
115
- if file_type in ['png', 'jpg']:
116
- image_path = os.path.abspath(fp)
117
- chatbot.append(['这是一张图片, 展示如下:',
118
- f'本地文件地址: <br/>`{image_path}`<br/>'+
119
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
120
- ])
121
- return chatbot
122
-
123
- def subprocess_worker(instance, file_path, return_dict):
124
- return_dict['result'] = instance.run(file_path)
125
-
126
- def have_any_recent_upload_files(chatbot):
127
- _5min = 5 * 60
128
- if not chatbot: return False # chatbot is None
129
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
130
- if not most_recent_uploaded: return False # most_recent_uploaded is None
131
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
132
- else: return False # most_recent_uploaded is too old
133
-
134
- def get_recent_file_prompt_support(chatbot):
135
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
136
- path = most_recent_uploaded['path']
137
- return path
138
-
139
- @CatchException
140
- def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
141
- """
142
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
143
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
144
- plugin_kwargs 插件模型的参数,暂时没有用武之地
145
- chatbot 聊天显示框的句柄,用于显示给用户
146
- history 聊天历史,前情提要
147
- system_prompt 给gpt的静默提醒
148
- web_port 当前软件运行的端口号
149
- """
150
- raise NotImplementedError
151
-
152
- # 清空历史,以免输入溢出
153
- history = []; clear_file_downloadzone(chatbot)
154
-
155
- # 基本信息:功能、贡献者
156
- chatbot.append([
157
- "函数插件功能?",
158
- "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
159
- ])
160
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
161
-
162
- if have_any_recent_upload_files(chatbot):
163
- file_path = get_recent_file_prompt_support(chatbot)
164
- else:
165
- chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
166
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
-
168
- # 读取文件
169
- if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
170
- recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
171
- file_path = recently_uploaded_files[-1]
172
- file_type = file_path.split('.')[-1]
173
-
174
- # 粗心检查
175
- if is_the_upload_folder(txt):
176
- chatbot.append([
177
- "...",
178
- f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
179
- ])
180
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
181
- return
182
-
183
- # 开始干正事
184
- for j in range(5): # 最多重试5次
185
- try:
186
- code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
187
- yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
188
- code = get_code_block(code)
189
- res = make_module(code)
190
- instance = init_module_instance(res)
191
- break
192
- except Exception as e:
193
- chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
194
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
195
-
196
- # 代码生成结束, 开始执行
197
- try:
198
- import multiprocessing
199
- manager = multiprocessing.Manager()
200
- return_dict = manager.dict()
201
-
202
- p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
203
- # only has 10 seconds to run
204
- p.start(); p.join(timeout=10)
205
- if p.is_alive(): p.terminate(); p.join()
206
- p.close()
207
- res = return_dict['result']
208
- # res = instance.run(file_path)
209
- except Exception as e:
210
- chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
211
- # chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
212
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
213
- return
214
-
215
- # 顺利完成,收尾
216
- res = str(res)
217
- if os.path.exists(res):
218
- chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
219
- new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
220
- chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
221
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
222
- else:
223
- chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
224
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
225
-
226
- """
227
- 测试:
228
- 裁剪图像,保留下半部分
229
- 交换图像的蓝色通道和红色通道
230
- 将图像转为灰度图像
231
- 将csv文件转excel表格
232
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Langchain知识库.py DELETED
@@ -1,106 +0,0 @@
1
- from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg
2
- from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
3
-
4
-
5
-
6
- @CatchException
7
- def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
8
- """
9
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
10
- llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
11
- plugin_kwargs 插件模型的参数,暂时没有用武之地
12
- chatbot 聊天显示框的句柄,用于显示给用户
13
- history 聊天历史,前情提要
14
- system_prompt 给gpt的静默提醒
15
- web_port 当前软件运行的端口号
16
- """
17
- history = [] # 清空历史,以免输入溢出
18
-
19
- # < --------------------读取参数--------------- >
20
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
21
- kai_id = plugin_kwargs.get("advanced_arg", 'default')
22
-
23
- chatbot.append((f"向`{kai_id}`知识库中添加文件。", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
24
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
25
-
26
- # resolve deps
27
- try:
28
- from zh_langchain import construct_vector_store
29
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
30
- from .crazy_utils import knowledge_archive_interface
31
- except Exception as e:
32
- chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
33
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
34
- from .crazy_utils import try_install_deps
35
- try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
36
- yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
37
- return
38
-
39
- # < --------------------读取文件--------------- >
40
- file_manifest = []
41
- spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
42
- for sp in spl:
43
- _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
44
- file_manifest += file_manifest_tmp
45
-
46
- if len(file_manifest) == 0:
47
- chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
48
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
49
- return
50
-
51
- # < -------------------预热文本向量化模组--------------- >
52
- chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
53
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
54
- print('Checking Text2vec ...')
55
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
56
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
57
- HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
58
-
59
- # < -------------------构建知识库--------------- >
60
- chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
61
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
62
- print('Establishing knowledge archive ...')
63
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
64
- kai = knowledge_archive_interface()
65
- kai.feed_archive(file_manifest=file_manifest, id=kai_id)
66
- kai_files = kai.get_loaded_file()
67
- kai_files = '<br/>'.join(kai_files)
68
- # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
69
- # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
70
- # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
71
- # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答'
72
- # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
73
- chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
74
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
75
-
76
- @CatchException
77
- def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
78
- # resolve deps
79
- try:
80
- from zh_langchain import construct_vector_store
81
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
82
- from .crazy_utils import knowledge_archive_interface
83
- except Exception as e:
84
- chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
85
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
86
- from .crazy_utils import try_install_deps
87
- try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
88
- yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
89
- return
90
-
91
- # < ------------------- --------------- >
92
- kai = knowledge_archive_interface()
93
-
94
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
95
- kai_id = plugin_kwargs.get("advanced_arg", 'default')
96
- resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
97
-
98
- chatbot.append((txt, f'[知识库 {kai_id}] ' + prompt))
99
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
100
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
101
- inputs=prompt, inputs_show_user=txt,
102
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
103
- sys_prompt=system_prompt
104
- )
105
- history.extend((prompt, gpt_say))
106
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex全文润色.py DELETED
@@ -1,245 +0,0 @@
1
- from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
2
- from toolbox import CatchException, report_exception, write_history_to_file, zip_folder
3
-
4
-
5
- class PaperFileGroup():
6
- def __init__(self):
7
- self.file_paths = []
8
- self.file_contents = []
9
- self.sp_file_contents = []
10
- self.sp_file_index = []
11
- self.sp_file_tag = []
12
-
13
- # count_token
14
- from request_llms.bridge_all import model_info
15
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
- self.get_token_num = get_token_num
18
-
19
- def run_file_split(self, max_token_limit=1900):
20
- """
21
- 将长文本分离开来
22
- """
23
- for index, file_content in enumerate(self.file_contents):
24
- if self.get_token_num(file_content) < max_token_limit:
25
- self.sp_file_contents.append(file_content)
26
- self.sp_file_index.append(index)
27
- self.sp_file_tag.append(self.file_paths[index])
28
- else:
29
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
30
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
31
- for j, segment in enumerate(segments):
32
- self.sp_file_contents.append(segment)
33
- self.sp_file_index.append(index)
34
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
35
-
36
- print('Segmentation: done')
37
- def merge_result(self):
38
- self.file_result = ["" for _ in range(len(self.file_paths))]
39
- for r, k in zip(self.sp_file_result, self.sp_file_index):
40
- self.file_result[k] += r
41
-
42
- def write_result(self):
43
- manifest = []
44
- for path, res in zip(self.file_paths, self.file_result):
45
- with open(path + '.polish.tex', 'w', encoding='utf8') as f:
46
- manifest.append(path + '.polish.tex')
47
- f.write(res)
48
- return manifest
49
-
50
- def zip_result(self):
51
- import os, time
52
- folder = os.path.dirname(self.file_paths[0])
53
- t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
54
- zip_folder(folder, get_log_folder(), f'{t}-polished.zip')
55
-
56
-
57
- def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
58
- import time, os, re
59
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
60
-
61
-
62
- # <-------- 读取Latex文件,删除其中的所有注释 ---------->
63
- pfg = PaperFileGroup()
64
-
65
- for index, fp in enumerate(file_manifest):
66
- with open(fp, 'r', encoding='utf-8', errors='replace') as f:
67
- file_content = f.read()
68
- # 定义注释的正则表达式
69
- comment_pattern = r'(?<!\\)%.*'
70
- # 使用正则表达式查找注释,并替换为空字符串
71
- clean_tex_content = re.sub(comment_pattern, '', file_content)
72
- # 记录删除注释后的文本
73
- pfg.file_paths.append(fp)
74
- pfg.file_contents.append(clean_tex_content)
75
-
76
- # <-------- 拆分过长的latex文件 ---------->
77
- pfg.run_file_split(max_token_limit=1024)
78
- n_split = len(pfg.sp_file_contents)
79
-
80
-
81
- # <-------- 多线程润色开始 ---------->
82
- if language == 'en':
83
- if mode == 'polish':
84
- inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " +
85
- "improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
86
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
87
- else:
88
- inputs_array = [r"Below is a section from an academic paper, proofread this section." +
89
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
90
- r"Answer me only with the revised text:" +
91
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
92
- inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
93
- sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
94
- elif language == 'zh':
95
- if mode == 'polish':
96
- inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
97
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
98
- else:
99
- inputs_array = [f"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
100
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
101
- inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
102
- sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
103
-
104
-
105
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
106
- inputs_array=inputs_array,
107
- inputs_show_user_array=inputs_show_user_array,
108
- llm_kwargs=llm_kwargs,
109
- chatbot=chatbot,
110
- history_array=[[""] for _ in range(n_split)],
111
- sys_prompt_array=sys_prompt_array,
112
- # max_workers=5, # 并行任务数量限制,最多同时执行5个,其他的排队等待
113
- scroller_max_len = 80
114
- )
115
-
116
- # <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ---------->
117
- try:
118
- pfg.sp_file_result = []
119
- for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
120
- pfg.sp_file_result.append(gpt_say)
121
- pfg.merge_result()
122
- pfg.write_result()
123
- pfg.zip_result()
124
- except:
125
- print(trimmed_format_exc())
126
-
127
- # <-------- 整理结果,退出 ---------->
128
- create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
129
- res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
130
- promote_file_to_downloadzone(res, chatbot=chatbot)
131
-
132
- history = gpt_response_collection
133
- chatbot.append((f"{fp}完成了吗?", res))
134
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
135
-
136
-
137
- @CatchException
138
- def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
139
- # 基本信息:功能、贡献者
140
- chatbot.append([
141
- "函数插件功能?",
142
- "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。(注意,此插件不调用Latex,如果有Latex环境,请使用“Latex英文纠错+高亮”插件)"])
143
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
144
-
145
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
146
- try:
147
- import tiktoken
148
- except:
149
- report_exception(chatbot, history,
150
- a=f"解析项目: {txt}",
151
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
152
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
153
- return
154
- history = [] # 清空历史,以免输入溢出
155
- import glob, os
156
- if os.path.exists(txt):
157
- project_folder = txt
158
- else:
159
- if txt == "": txt = '空空如也的输入栏'
160
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
161
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
162
- return
163
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
164
- if len(file_manifest) == 0:
165
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
166
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
- return
168
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
169
-
170
-
171
-
172
-
173
-
174
-
175
- @CatchException
176
- def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
177
- # 基本信息:功能、贡献者
178
- chatbot.append([
179
- "函数插件功能?",
180
- "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"])
181
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
182
-
183
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
184
- try:
185
- import tiktoken
186
- except:
187
- report_exception(chatbot, history,
188
- a=f"解析项目: {txt}",
189
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
190
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
191
- return
192
- history = [] # 清空历史,以免输入溢出
193
- import glob, os
194
- if os.path.exists(txt):
195
- project_folder = txt
196
- else:
197
- if txt == "": txt = '空空如也的输入栏'
198
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
199
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
200
- return
201
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
202
- if len(file_manifest) == 0:
203
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
204
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
205
- return
206
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
207
-
208
-
209
-
210
-
211
- @CatchException
212
- def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
213
- # 基本信息:功能、贡献者
214
- chatbot.append([
215
- "函数插件功能?",
216
- "对整个Latex项目进行纠错。函数插件贡献者: Binary-Husky"])
217
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
218
-
219
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
220
- try:
221
- import tiktoken
222
- except:
223
- report_exception(chatbot, history,
224
- a=f"解析项目: {txt}",
225
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
226
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
227
- return
228
- history = [] # 清空历史,以免输入溢出
229
- import glob, os
230
- if os.path.exists(txt):
231
- project_folder = txt
232
- else:
233
- if txt == "": txt = '空空如也的输入栏'
234
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
235
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
236
- return
237
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
238
- if len(file_manifest) == 0:
239
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
240
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
241
- return
242
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
243
-
244
-
245
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex全文翻译.py DELETED
@@ -1,176 +0,0 @@
1
- from toolbox import update_ui, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, write_history_to_file
3
- fast_debug = False
4
-
5
- class PaperFileGroup():
6
- def __init__(self):
7
- self.file_paths = []
8
- self.file_contents = []
9
- self.sp_file_contents = []
10
- self.sp_file_index = []
11
- self.sp_file_tag = []
12
-
13
- # count_token
14
- from request_llms.bridge_all import model_info
15
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
- self.get_token_num = get_token_num
18
-
19
- def run_file_split(self, max_token_limit=1900):
20
- """
21
- 将长文本分离开来
22
- """
23
- for index, file_content in enumerate(self.file_contents):
24
- if self.get_token_num(file_content) < max_token_limit:
25
- self.sp_file_contents.append(file_content)
26
- self.sp_file_index.append(index)
27
- self.sp_file_tag.append(self.file_paths[index])
28
- else:
29
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
30
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
31
- for j, segment in enumerate(segments):
32
- self.sp_file_contents.append(segment)
33
- self.sp_file_index.append(index)
34
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
35
-
36
- print('Segmentation: done')
37
-
38
- def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
39
- import time, os, re
40
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
41
-
42
- # <-------- 读取Latex文件,删除其中的所有注释 ---------->
43
- pfg = PaperFileGroup()
44
-
45
- for index, fp in enumerate(file_manifest):
46
- with open(fp, 'r', encoding='utf-8', errors='replace') as f:
47
- file_content = f.read()
48
- # 定义注释的正则表达式
49
- comment_pattern = r'(?<!\\)%.*'
50
- # 使用正则表达式查找注释,并替换为空字符串
51
- clean_tex_content = re.sub(comment_pattern, '', file_content)
52
- # 记录删除注释后的文本
53
- pfg.file_paths.append(fp)
54
- pfg.file_contents.append(clean_tex_content)
55
-
56
- # <-------- 拆分过长的latex文件 ---------->
57
- pfg.run_file_split(max_token_limit=1024)
58
- n_split = len(pfg.sp_file_contents)
59
-
60
- # <-------- 抽取摘要 ---------->
61
- # if language == 'en':
62
- # abs_extract_inputs = f"Please write an abstract for this paper"
63
-
64
- # # 单线,获取文章meta信息
65
- # paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
66
- # inputs=abs_extract_inputs,
67
- # inputs_show_user=f"正在抽取摘要信息。",
68
- # llm_kwargs=llm_kwargs,
69
- # chatbot=chatbot, history=[],
70
- # sys_prompt="Your job is to collect information from materials。",
71
- # )
72
-
73
- # <-------- 多线程润色开始 ---------->
74
- if language == 'en->zh':
75
- inputs_array = ["Below is a section from an English academic paper, translate it into Chinese, do not modify any latex command such as \section, \cite and equations:" +
76
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
77
- inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
78
- sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
79
- elif language == 'zh->en':
80
- inputs_array = [f"Below is a section from a Chinese academic paper, translate it into English, do not modify any latex command such as \section, \cite and equations:" +
81
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
82
- inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
83
- sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
84
-
85
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
86
- inputs_array=inputs_array,
87
- inputs_show_user_array=inputs_show_user_array,
88
- llm_kwargs=llm_kwargs,
89
- chatbot=chatbot,
90
- history_array=[[""] for _ in range(n_split)],
91
- sys_prompt_array=sys_prompt_array,
92
- # max_workers=5, # OpenAI所允许的最大并行过载
93
- scroller_max_len = 80
94
- )
95
-
96
- # <-------- 整理结果,退出 ---------->
97
- create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
98
- res = write_history_to_file(gpt_response_collection, create_report_file_name)
99
- promote_file_to_downloadzone(res, chatbot=chatbot)
100
- history = gpt_response_collection
101
- chatbot.append((f"{fp}完成了吗?", res))
102
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
103
-
104
-
105
-
106
-
107
-
108
- @CatchException
109
- def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
110
- # 基本信息:功能、贡献者
111
- chatbot.append([
112
- "函数插件功能?",
113
- "对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"])
114
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
115
-
116
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
117
- try:
118
- import tiktoken
119
- except:
120
- report_exception(chatbot, history,
121
- a=f"解析项目: {txt}",
122
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
123
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
124
- return
125
- history = [] # 清空历史,以免输入溢出
126
- import glob, os
127
- if os.path.exists(txt):
128
- project_folder = txt
129
- else:
130
- if txt == "": txt = '空空如也的输入栏'
131
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
132
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
133
- return
134
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
135
- if len(file_manifest) == 0:
136
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
137
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
138
- return
139
- yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
140
-
141
-
142
-
143
-
144
-
145
- @CatchException
146
- def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
147
- # 基本信息:功能、贡献者
148
- chatbot.append([
149
- "函数插件功能?",
150
- "对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"])
151
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
152
-
153
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
154
- try:
155
- import tiktoken
156
- except:
157
- report_exception(chatbot, history,
158
- a=f"解析项目: {txt}",
159
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
160
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
161
- return
162
- history = [] # 清空历史,以免输入溢出
163
- import glob, os
164
- if os.path.exists(txt):
165
- project_folder = txt
166
- else:
167
- if txt == "": txt = '空空如也的输入栏'
168
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
169
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
170
- return
171
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
172
- if len(file_manifest) == 0:
173
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
174
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
175
- return
176
- yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex输出PDF结果.py DELETED
@@ -1,306 +0,0 @@
1
- from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
3
- from functools import partial
4
- import glob, os, requests, time
5
- pj = os.path.join
6
- ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
7
-
8
- # =================================== 工具函数 ===============================================
9
- # 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
10
- def switch_prompt(pfg, mode, more_requirement):
11
- """
12
- Generate prompts and system prompts based on the mode for proofreading or translating.
13
- Args:
14
- - pfg: Proofreader or Translator instance.
15
- - mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
16
-
17
- Returns:
18
- - inputs_array: A list of strings containing prompts for users to respond to.
19
- - sys_prompt_array: A list of strings containing prompts for system prompts.
20
- """
21
- n_split = len(pfg.sp_file_contents)
22
- if mode == 'proofread_en':
23
- inputs_array = [r"Below is a section from an academic paper, proofread this section." +
24
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
25
- r"Answer me only with the revised text:" +
26
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
27
- sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
28
- elif mode == 'translate_zh':
29
- inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
30
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
31
- r"Answer me only with the translated text:" +
32
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
33
- sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
34
- else:
35
- assert False, "未知指令"
36
- return inputs_array, sys_prompt_array
37
-
38
- def desend_to_extracted_folder_if_exist(project_folder):
39
- """
40
- Descend into the extracted folder if it exists, otherwise return the original folder.
41
-
42
- Args:
43
- - project_folder: A string specifying the folder path.
44
-
45
- Returns:
46
- - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
47
- """
48
- maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
49
- if len(maybe_dir) == 0: return project_folder
50
- if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
51
- return project_folder
52
-
53
- def move_project(project_folder, arxiv_id=None):
54
- """
55
- Create a new work folder and copy the project folder to it.
56
-
57
- Args:
58
- - project_folder: A string specifying the folder path of the project.
59
-
60
- Returns:
61
- - A string specifying the path to the new work folder.
62
- """
63
- import shutil, time
64
- time.sleep(2) # avoid time string conflict
65
- if arxiv_id is not None:
66
- new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
67
- else:
68
- new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
69
- try:
70
- shutil.rmtree(new_workfolder)
71
- except:
72
- pass
73
-
74
- # align subfolder if there is a folder wrapper
75
- items = glob.glob(pj(project_folder,'*'))
76
- items = [item for item in items if os.path.basename(item)!='__MACOSX']
77
- if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
78
- if os.path.isdir(items[0]): project_folder = items[0]
79
-
80
- shutil.copytree(src=project_folder, dst=new_workfolder)
81
- return new_workfolder
82
-
83
- def arxiv_download(chatbot, history, txt, allow_cache=True):
84
- def check_cached_translation_pdf(arxiv_id):
85
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
86
- if not os.path.exists(translation_dir):
87
- os.makedirs(translation_dir)
88
- target_file = pj(translation_dir, 'translate_zh.pdf')
89
- if os.path.exists(target_file):
90
- promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
91
- target_file_compare = pj(translation_dir, 'comparison.pdf')
92
- if os.path.exists(target_file_compare):
93
- promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
94
- return target_file
95
- return False
96
- def is_float(s):
97
- try:
98
- float(s)
99
- return True
100
- except ValueError:
101
- return False
102
- if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
103
- txt = 'https://arxiv.org/abs/' + txt.strip()
104
- if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
105
- txt = 'https://arxiv.org/abs/' + txt[:10]
106
- if not txt.startswith('https://arxiv.org'):
107
- return txt, None
108
-
109
- # <-------------- inspect format ------------->
110
- chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
111
- yield from update_ui(chatbot=chatbot, history=history)
112
- time.sleep(1) # 刷新界面
113
-
114
- url_ = txt # https://arxiv.org/abs/1707.06690
115
- if not txt.startswith('https://arxiv.org/abs/'):
116
- msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。"
117
- yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
118
- return msg, None
119
- # <-------------- set format ------------->
120
- arxiv_id = url_.split('/abs/')[-1]
121
- if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
122
- cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
123
- if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
124
-
125
- url_tar = url_.replace('/abs/', '/e-print/')
126
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
127
- extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
128
- os.makedirs(translation_dir, exist_ok=True)
129
-
130
- # <-------------- download arxiv source file ------------->
131
- dst = pj(translation_dir, arxiv_id+'.tar')
132
- if os.path.exists(dst):
133
- yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
134
- else:
135
- yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
136
- proxies = get_conf('proxies')
137
- r = requests.get(url_tar, proxies=proxies)
138
- with open(dst, 'wb+') as f:
139
- f.write(r.content)
140
- # <-------------- extract file ------------->
141
- yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
142
- from toolbox import extract_archive
143
- extract_archive(file_path=dst, dest_dir=extract_dst)
144
- return extract_dst, arxiv_id
145
- # ========================================= 插件主程序1 =====================================================
146
-
147
-
148
- @CatchException
149
- def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
150
- # <-------------- information about this plugin ------------->
151
- chatbot.append([ "函数插件功能?",
152
- "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"])
153
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
154
-
155
- # <-------------- more requirements ------------->
156
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
157
- more_req = plugin_kwargs.get("advanced_arg", "")
158
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
159
-
160
- # <-------------- check deps ------------->
161
- try:
162
- import glob, os, time, subprocess
163
- subprocess.Popen(['pdflatex', '-version'])
164
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
165
- except Exception as e:
166
- chatbot.append([ f"解析项目: {txt}",
167
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
168
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
169
- return
170
-
171
-
172
- # <-------------- clear history and read input ------------->
173
- history = []
174
- if os.path.exists(txt):
175
- project_folder = txt
176
- else:
177
- if txt == "": txt = '空空如也的输入栏'
178
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
179
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
180
- return
181
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
182
- if len(file_manifest) == 0:
183
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
184
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
185
- return
186
-
187
-
188
- # <-------------- if is a zip/tar file ------------->
189
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
190
-
191
-
192
- # <-------------- move latex project away from temp folder ------------->
193
- project_folder = move_project(project_folder, arxiv_id=None)
194
-
195
-
196
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
197
- if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
198
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
199
- chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
200
-
201
-
202
- # <-------------- compile PDF ------------->
203
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
204
- work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
205
-
206
-
207
- # <-------------- zip PDF ------------->
208
- zip_res = zip_result(project_folder)
209
- if success:
210
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
211
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
212
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
213
- else:
214
- chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
215
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
216
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
217
-
218
- # <-------------- we are done ------------->
219
- return success
220
-
221
- # ========================================= 插件主程序2 =====================================================
222
-
223
- @CatchException
224
- def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
225
- # <-------------- information about this plugin ------------->
226
- chatbot.append([
227
- "函数插件功能?",
228
- "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
229
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
230
-
231
- # <-------------- more requirements ------------->
232
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
233
- more_req = plugin_kwargs.get("advanced_arg", "")
234
- no_cache = more_req.startswith("--no-cache")
235
- if no_cache: more_req.lstrip("--no-cache")
236
- allow_cache = not no_cache
237
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
238
-
239
- # <-------------- check deps ------------->
240
- try:
241
- import glob, os, time, subprocess
242
- subprocess.Popen(['pdflatex', '-version'])
243
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
244
- except Exception as e:
245
- chatbot.append([ f"解析项目: {txt}",
246
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
247
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
248
- return
249
-
250
-
251
- # <-------------- clear history and read input ------------->
252
- history = []
253
- txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
254
- if txt.endswith('.pdf'):
255
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
256
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
257
- return
258
-
259
-
260
- if os.path.exists(txt):
261
- project_folder = txt
262
- else:
263
- if txt == "": txt = '空空如也的输入栏'
264
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}")
265
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
266
- return
267
-
268
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
269
- if len(file_manifest) == 0:
270
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
271
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
272
- return
273
-
274
-
275
- # <-------------- if is a zip/tar file ------------->
276
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
277
-
278
-
279
- # <-------------- move latex project away from temp folder ------------->
280
- project_folder = move_project(project_folder, arxiv_id)
281
-
282
-
283
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
284
- if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
285
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
286
- chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)
287
-
288
-
289
- # <-------------- compile PDF ------------->
290
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh',
291
- work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
292
-
293
- # <-------------- zip PDF ------------->
294
- zip_res = zip_result(project_folder)
295
- if success:
296
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
297
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
298
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
299
- else:
300
- chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
301
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
302
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
303
-
304
-
305
- # <-------------- we are done ------------->
306
- return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/__init__.py DELETED
File without changes
crazy_functions/agent_fns/auto_agent.py DELETED
@@ -1,23 +0,0 @@
1
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
2
- from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
3
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
4
- from crazy_functions.agent_fns.general import AutoGenGeneral
5
-
6
-
7
-
8
- class AutoGenMath(AutoGenGeneral):
9
-
10
- def define_agents(self):
11
- from autogen import AssistantAgent, UserProxyAgent
12
- return [
13
- {
14
- "name": "assistant", # name of the agent.
15
- "cls": AssistantAgent, # class of the agent.
16
- },
17
- {
18
- "name": "user_proxy", # name of the agent.
19
- "cls": UserProxyAgent, # class of the agent.
20
- "human_input_mode": "ALWAYS", # always ask for human input.
21
- "llm_config": False, # disables llm-based auto reply.
22
- },
23
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/echo_agent.py DELETED
@@ -1,19 +0,0 @@
1
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
2
-
3
- class EchoDemo(PluginMultiprocessManager):
4
- def subprocess_worker(self, child_conn):
5
- # ⭐⭐ 子进程
6
- self.child_conn = child_conn
7
- while True:
8
- msg = self.child_conn.recv() # PipeCom
9
- if msg.cmd == "user_input":
10
- # wait futher user input
11
- self.child_conn.send(PipeCom("show", msg.content))
12
- wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
13
- if not wait_success:
14
- # wait timeout, terminate this subprocess_worker
15
- break
16
- elif msg.cmd == "terminate":
17
- self.child_conn.send(PipeCom("done", ""))
18
- break
19
- print('[debug] subprocess_worker terminated')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/general.py DELETED
@@ -1,134 +0,0 @@
1
- from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
2
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
3
- from request_llms.bridge_all import predict_no_ui_long_connection
4
- import time
5
-
6
- def gpt_academic_generate_oai_reply(
7
- self,
8
- messages,
9
- sender,
10
- config,
11
- ):
12
- llm_config = self.llm_config if config is None else config
13
- if llm_config is False:
14
- return False, None
15
- if messages is None:
16
- messages = self._oai_messages[sender]
17
-
18
- inputs = messages[-1]['content']
19
- history = []
20
- for message in messages[:-1]:
21
- history.append(message['content'])
22
- context=messages[-1].pop("context", None)
23
- assert context is None, "预留参数 context 未实现"
24
-
25
- reply = predict_no_ui_long_connection(
26
- inputs=inputs,
27
- llm_kwargs=llm_config,
28
- history=history,
29
- sys_prompt=self._oai_system_message[0]['content'],
30
- console_slience=True
31
- )
32
- assumed_done = reply.endswith('\nTERMINATE')
33
- return True, reply
34
-
35
- class AutoGenGeneral(PluginMultiprocessManager):
36
- def gpt_academic_print_override(self, user_proxy, message, sender):
37
- # ⭐⭐ run in subprocess
38
- self.child_conn.send(PipeCom("show", sender.name + "\n\n---\n\n" + message["content"]))
39
-
40
- def gpt_academic_get_human_input(self, user_proxy, message):
41
- # ⭐⭐ run in subprocess
42
- patience = 300
43
- begin_waiting_time = time.time()
44
- self.child_conn.send(PipeCom("interact", message))
45
- while True:
46
- time.sleep(0.5)
47
- if self.child_conn.poll():
48
- wait_success = True
49
- break
50
- if time.time() - begin_waiting_time > patience:
51
- self.child_conn.send(PipeCom("done", ""))
52
- wait_success = False
53
- break
54
- if wait_success:
55
- return self.child_conn.recv().content
56
- else:
57
- raise TimeoutError("等待用户输入超时")
58
-
59
- def define_agents(self):
60
- raise NotImplementedError
61
-
62
- def exe_autogen(self, input):
63
- # ⭐⭐ run in subprocess
64
- input = input.content
65
- with ProxyNetworkActivate("AutoGen"):
66
- code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
67
- agents = self.define_agents()
68
- user_proxy = None
69
- assistant = None
70
- for agent_kwargs in agents:
71
- agent_cls = agent_kwargs.pop('cls')
72
- kwargs = {
73
- 'llm_config':self.llm_kwargs,
74
- 'code_execution_config':code_execution_config
75
- }
76
- kwargs.update(agent_kwargs)
77
- agent_handle = agent_cls(**kwargs)
78
- agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
79
- for d in agent_handle._reply_func_list:
80
- if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
81
- d['reply_func'] = gpt_academic_generate_oai_reply
82
- if agent_kwargs['name'] == 'user_proxy':
83
- agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
84
- user_proxy = agent_handle
85
- if agent_kwargs['name'] == 'assistant': assistant = agent_handle
86
- try:
87
- if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
88
- user_proxy.initiate_chat(assistant, message=input)
89
- except Exception as e:
90
- tb_str = '```\n' + trimmed_format_exc() + '```'
91
- self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
92
-
93
- def subprocess_worker(self, child_conn):
94
- # ⭐⭐ run in subprocess
95
- self.child_conn = child_conn
96
- while True:
97
- msg = self.child_conn.recv() # PipeCom
98
- self.exe_autogen(msg)
99
-
100
-
101
- class AutoGenGroupChat(AutoGenGeneral):
102
- def exe_autogen(self, input):
103
- # ⭐⭐ run in subprocess
104
- import autogen
105
-
106
- input = input.content
107
- with ProxyNetworkActivate("AutoGen"):
108
- code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
109
- agents = self.define_agents()
110
- agents_instances = []
111
- for agent_kwargs in agents:
112
- agent_cls = agent_kwargs.pop("cls")
113
- kwargs = {"code_execution_config": code_execution_config}
114
- kwargs.update(agent_kwargs)
115
- agent_handle = agent_cls(**kwargs)
116
- agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
117
- agents_instances.append(agent_handle)
118
- if agent_kwargs["name"] == "user_proxy":
119
- user_proxy = agent_handle
120
- user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
121
- try:
122
- groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
123
- manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
124
- manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
125
- manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
126
- if user_proxy is None:
127
- raise Exception("user_proxy is not defined")
128
- user_proxy.initiate_chat(manager, message=input)
129
- except Exception:
130
- tb_str = "```\n" + trimmed_format_exc() + "```"
131
- self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
132
-
133
- def define_group_chat_manager_config(self):
134
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/persistent.py DELETED
@@ -1,16 +0,0 @@
1
- from toolbox import Singleton
2
- @Singleton
3
- class GradioMultiuserManagerForPersistentClasses():
4
- def __init__(self):
5
- self.mapping = {}
6
-
7
- def already_alive(self, key):
8
- return (key in self.mapping) and (self.mapping[key].is_alive())
9
-
10
- def set(self, key, x):
11
- self.mapping[key] = x
12
- return self.mapping[key]
13
-
14
- def get(self, key):
15
- return self.mapping[key]
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/pipe.py DELETED
@@ -1,194 +0,0 @@
1
- from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
2
- from crazy_functions.agent_fns.watchdog import WatchDog
3
- import time, os
4
-
5
- class PipeCom:
6
- def __init__(self, cmd, content) -> None:
7
- self.cmd = cmd
8
- self.content = content
9
-
10
-
11
- class PluginMultiprocessManager:
12
- def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
13
- # ⭐ run in main process
14
- self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
15
- self.previous_work_dir_files = {}
16
- self.llm_kwargs = llm_kwargs
17
- self.plugin_kwargs = plugin_kwargs
18
- self.chatbot = chatbot
19
- self.history = history
20
- self.system_prompt = system_prompt
21
- # self.web_port = web_port
22
- self.alive = True
23
- self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
24
- self.last_user_input = ""
25
- # create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
26
- timeout_seconds = 5 * 60
27
- self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
28
- self.heartbeat_watchdog.begin_watch()
29
-
30
- def feed_heartbeat_watchdog(self):
31
- # feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
32
- self.heartbeat_watchdog.feed()
33
-
34
- def is_alive(self):
35
- return self.alive
36
-
37
- def launch_subprocess_with_pipe(self):
38
- # ⭐ run in main process
39
- from multiprocessing import Process, Pipe
40
-
41
- parent_conn, child_conn = Pipe()
42
- self.p = Process(target=self.subprocess_worker, args=(child_conn,))
43
- self.p.daemon = True
44
- self.p.start()
45
- return parent_conn
46
-
47
- def terminate(self):
48
- self.p.terminate()
49
- self.alive = False
50
- print("[debug] instance terminated")
51
-
52
- def subprocess_worker(self, child_conn):
53
- # ⭐⭐ run in subprocess
54
- raise NotImplementedError
55
-
56
- def send_command(self, cmd):
57
- # ⭐ run in main process
58
- repeated = False
59
- if cmd == self.last_user_input:
60
- repeated = True
61
- cmd = ""
62
- else:
63
- self.last_user_input = cmd
64
- self.parent_conn.send(PipeCom("user_input", cmd))
65
- return repeated, cmd
66
-
67
- def immediate_showoff_when_possible(self, fp):
68
- # ⭐ 主进程
69
- # 获取fp的拓展名
70
- file_type = fp.split('.')[-1]
71
- # 如果是文本文件, 则直接显示文本内容
72
- if file_type.lower() in ['png', 'jpg']:
73
- image_path = os.path.abspath(fp)
74
- self.chatbot.append([
75
- '检测到新生图像:',
76
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
77
- ])
78
- yield from update_ui(chatbot=self.chatbot, history=self.history)
79
-
80
- def overwatch_workdir_file_change(self):
81
- # ⭐ 主进程 Docker 外挂文件夹监控
82
- path_to_overwatch = self.autogen_work_dir
83
- change_list = []
84
- # 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比,
85
- # 如果有新文件出现,或者文件的修改时间发生变化,则更新self.previous_work_dir_files中
86
- # 把新文件和发生变化的文件的路径记录到 change_list 中
87
- for root, dirs, files in os.walk(path_to_overwatch):
88
- for file in files:
89
- file_path = os.path.join(root, file)
90
- if file_path not in self.previous_work_dir_files.keys():
91
- last_modified_time = os.stat(file_path).st_mtime
92
- self.previous_work_dir_files.update({file_path: last_modified_time})
93
- change_list.append(file_path)
94
- else:
95
- last_modified_time = os.stat(file_path).st_mtime
96
- if last_modified_time != self.previous_work_dir_files[file_path]:
97
- self.previous_work_dir_files[file_path] = last_modified_time
98
- change_list.append(file_path)
99
- if len(change_list) > 0:
100
- file_links = ""
101
- for f in change_list:
102
- res = promote_file_to_downloadzone(f)
103
- file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
104
- yield from self.immediate_showoff_when_possible(f)
105
-
106
- self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
107
- yield from update_ui(chatbot=self.chatbot, history=self.history)
108
- return change_list
109
-
110
-
111
- def main_process_ui_control(self, txt, create_or_resume) -> str:
112
- # ⭐ 主进程
113
- if create_or_resume == 'create':
114
- self.cnt = 1
115
- self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
116
- repeated, cmd_to_autogen = self.send_command(txt)
117
- if txt == 'exit':
118
- self.chatbot.append([f"结束", "结束信号已明确,终止AutoGen程序。"])
119
- yield from update_ui(chatbot=self.chatbot, history=self.history)
120
- self.terminate()
121
- return "terminate"
122
-
123
- # patience = 10
124
-
125
- while True:
126
- time.sleep(0.5)
127
- if not self.alive:
128
- # the heartbeat watchdog might have it killed
129
- self.terminate()
130
- return "terminate"
131
- if self.parent_conn.poll():
132
- self.feed_heartbeat_watchdog()
133
- if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
134
- self.chatbot.pop(-1) # remove the last line
135
- if "等待您的进一步指令" in self.chatbot[-1][-1]:
136
- self.chatbot.pop(-1) # remove the last line
137
- if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
138
- self.chatbot.pop(-1) # remove the last line
139
- msg = self.parent_conn.recv() # PipeCom
140
- if msg.cmd == "done":
141
- self.chatbot.append([f"结束", msg.content])
142
- self.cnt += 1
143
- yield from update_ui(chatbot=self.chatbot, history=self.history)
144
- self.terminate()
145
- break
146
- if msg.cmd == "show":
147
- yield from self.overwatch_workdir_file_change()
148
- notice = ""
149
- if repeated: notice = "(自动忽略重复的输入)"
150
- self.chatbot.append([f"运行阶段-{self.cnt}(上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
151
- self.cnt += 1
152
- yield from update_ui(chatbot=self.chatbot, history=self.history)
153
- if msg.cmd == "interact":
154
- yield from self.overwatch_workdir_file_change()
155
- self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
156
- "\n\n等待您的进一步指令." +
157
- "\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
158
- "\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
159
- "\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
160
- ])
161
- yield from update_ui(chatbot=self.chatbot, history=self.history)
162
- # do not terminate here, leave the subprocess_worker instance alive
163
- return "wait_feedback"
164
- else:
165
- self.feed_heartbeat_watchdog()
166
- if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
167
- # begin_waiting_time = time.time()
168
- self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
169
- self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
170
- yield from update_ui(chatbot=self.chatbot, history=self.history)
171
- # if time.time() - begin_waiting_time > patience:
172
- # self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
173
- # yield from update_ui(chatbot=self.chatbot, history=self.history)
174
- # self.terminate()
175
- # return "terminate"
176
-
177
- self.terminate()
178
- return "terminate"
179
-
180
- def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
181
- # ⭐⭐ run in subprocess
182
- patience = 5 * 60
183
- begin_waiting_time = time.time()
184
- self.child_conn.send(PipeCom("interact", wait_msg))
185
- while True:
186
- time.sleep(0.5)
187
- if self.child_conn.poll():
188
- wait_success = True
189
- break
190
- if time.time() - begin_waiting_time > patience:
191
- self.child_conn.send(PipeCom("done", ""))
192
- wait_success = False
193
- break
194
- return wait_success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/watchdog.py DELETED
@@ -1,28 +0,0 @@
1
- import threading, time
2
-
3
- class WatchDog():
4
- def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
5
- self.last_feed = None
6
- self.timeout = timeout
7
- self.bark_fn = bark_fn
8
- self.interval = interval
9
- self.msg = msg
10
- self.kill_dog = False
11
-
12
- def watch(self):
13
- while True:
14
- if self.kill_dog: break
15
- if time.time() - self.last_feed > self.timeout:
16
- if len(self.msg) > 0: print(self.msg)
17
- self.bark_fn()
18
- break
19
- time.sleep(self.interval)
20
-
21
- def begin_watch(self):
22
- self.last_feed = time.time()
23
- th = threading.Thread(target=self.watch)
24
- th.daemon = True
25
- th.start()
26
-
27
- def feed(self):
28
- self.last_feed = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/chatglm微调工具.py DELETED
@@ -1,141 +0,0 @@
1
- from toolbox import CatchException, update_ui, promote_file_to_downloadzone
2
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
3
- import datetime, json
4
-
5
- def fetch_items(list_of_items, batch_size):
6
- for i in range(0, len(list_of_items), batch_size):
7
- yield list_of_items[i:i + batch_size]
8
-
9
- def string_to_options(arguments):
10
- import argparse
11
- import shlex
12
-
13
- # Create an argparse.ArgumentParser instance
14
- parser = argparse.ArgumentParser()
15
-
16
- # Add command-line arguments
17
- parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
18
- parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
19
- parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
20
- parser.add_argument("--batch", type=int, help="System prompt", default=50)
21
- parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50)
22
- parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2)
23
- parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1)
24
- parser.add_argument("--json_dataset", type=str, help="json_dataset", default="")
25
- parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="")
26
-
27
-
28
-
29
- # Parse the arguments
30
- args = parser.parse_args(shlex.split(arguments))
31
-
32
- return args
33
-
34
- @CatchException
35
- def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
36
- """
37
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
38
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
39
- plugin_kwargs 插件模型的参数
40
- chatbot 聊天显示框的句柄,用于显示给用户
41
- history 聊天历史,前情提要
42
- system_prompt 给gpt的静默提醒
43
- web_port 当前软件运行的端口号
44
- """
45
- history = [] # 清空历史,以免输入溢出
46
- chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
47
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
48
- args = plugin_kwargs.get("advanced_arg", None)
49
- if args is None:
50
- chatbot.append(("没给定指令", "退出"))
51
- yield from update_ui(chatbot=chatbot, history=history); return
52
- else:
53
- arguments = string_to_options(arguments=args)
54
-
55
- dat = []
56
- with open(txt, 'r', encoding='utf8') as f:
57
- for line in f.readlines():
58
- json_dat = json.loads(line)
59
- dat.append(json_dat["content"])
60
-
61
- llm_kwargs['llm_model'] = arguments.llm_to_learn
62
- for batch in fetch_items(dat, arguments.batch):
63
- res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
64
- inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)],
65
- inputs_show_user_array=[f"Show Nothing" for _ in (batch)],
66
- llm_kwargs=llm_kwargs,
67
- chatbot=chatbot,
68
- history_array=[[] for _ in (batch)],
69
- sys_prompt_array=[arguments.system_prompt for _ in (batch)],
70
- max_workers=10 # OpenAI所允许的最大并行过载
71
- )
72
-
73
- with open(txt+'.generated.json', 'a+', encoding='utf8') as f:
74
- for b, r in zip(batch, res[1::2]):
75
- f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n')
76
-
77
- promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot)
78
- return
79
-
80
-
81
-
82
- @CatchException
83
- def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
84
- """
85
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
86
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
87
- plugin_kwargs 插件模型的参数
88
- chatbot 聊天显示框的句柄,用于显示给用户
89
- history 聊天历史,前情提要
90
- system_prompt 给gpt的静默提醒
91
- web_port 当前软件运行的端口号
92
- """
93
- import subprocess
94
- history = [] # 清空历史,以免输入溢出
95
- chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
96
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
97
- args = plugin_kwargs.get("advanced_arg", None)
98
- if args is None:
99
- chatbot.append(("没给定指令", "退出"))
100
- yield from update_ui(chatbot=chatbot, history=history); return
101
- else:
102
- arguments = string_to_options(arguments=args)
103
-
104
-
105
-
106
- pre_seq_len = arguments.pre_seq_len # 128
107
- learning_rate = arguments.learning_rate # 2e-2
108
- num_gpus = arguments.num_gpus # 1
109
- json_dataset = arguments.json_dataset # 't_code.json'
110
- ptuning_directory = arguments.ptuning_directory # '/home/hmp/ChatGLM2-6B/ptuning'
111
-
112
- command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \
113
- --do_train \
114
- --train_file AdvertiseGen/{json_dataset} \
115
- --validation_file AdvertiseGen/{json_dataset} \
116
- --preprocessing_num_workers 20 \
117
- --prompt_column content \
118
- --response_column summary \
119
- --overwrite_cache \
120
- --model_name_or_path THUDM/chatglm2-6b \
121
- --output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \
122
- --overwrite_output_dir \
123
- --max_source_length 256 \
124
- --max_target_length 256 \
125
- --per_device_train_batch_size 1 \
126
- --per_device_eval_batch_size 1 \
127
- --gradient_accumulation_steps 16 \
128
- --predict_with_generate \
129
- --max_steps 100 \
130
- --logging_steps 10 \
131
- --save_steps 20 \
132
- --learning_rate {learning_rate} \
133
- --pre_seq_len {pre_seq_len} \
134
- --quantization_bit 4"
135
-
136
- process = subprocess.Popen(command, shell=True, cwd=ptuning_directory)
137
- try:
138
- process.communicate(timeout=3600*24)
139
- except subprocess.TimeoutExpired:
140
- process.kill()
141
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/crazy_functions_test.py DELETED
@@ -1,231 +0,0 @@
1
- """
2
- 这是什么?
3
- 这个文件用于函数插件的单元测试
4
- 运行方法 python crazy_functions/crazy_functions_test.py
5
- """
6
-
7
- # ==============================================================================================================================
8
-
9
- def validate_path():
10
- import os, sys
11
- dir_name = os.path.dirname(__file__)
12
- root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
13
- os.chdir(root_dir_assume)
14
- sys.path.append(root_dir_assume)
15
- validate_path() # validate path so you can run from base directory
16
-
17
- # ==============================================================================================================================
18
-
19
- from colorful import *
20
- from toolbox import get_conf, ChatBotWithCookies
21
- import contextlib
22
- import os
23
- import sys
24
- from functools import wraps
25
- proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
26
- get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
27
-
28
- llm_kwargs = {
29
- 'api_key': API_KEY,
30
- 'llm_model': LLM_MODEL,
31
- 'top_p':1.0,
32
- 'max_length': None,
33
- 'temperature':1.0,
34
- }
35
- plugin_kwargs = { }
36
- chatbot = ChatBotWithCookies(llm_kwargs)
37
- history = []
38
- system_prompt = "Serve me as a writing and programming assistant."
39
- web_port = 1024
40
-
41
- # ==============================================================================================================================
42
-
43
- def silence_stdout(func):
44
- @wraps(func)
45
- def wrapper(*args, **kwargs):
46
- _original_stdout = sys.stdout
47
- sys.stdout = open(os.devnull, 'w')
48
- for q in func(*args, **kwargs):
49
- sys.stdout = _original_stdout
50
- yield q
51
- sys.stdout = open(os.devnull, 'w')
52
- sys.stdout.close()
53
- sys.stdout = _original_stdout
54
- return wrapper
55
-
56
- class CLI_Printer():
57
- def __init__(self) -> None:
58
- self.pre_buf = ""
59
-
60
- def print(self, buf):
61
- bufp = ""
62
- for index, chat in enumerate(buf):
63
- a, b = chat
64
- bufp += sprint亮靛('[Me]:' + a) + '\n'
65
- bufp += '[GPT]:' + b
66
- if index < len(buf)-1:
67
- bufp += '\n'
68
-
69
- if self.pre_buf!="" and bufp.startswith(self.pre_buf):
70
- print(bufp[len(self.pre_buf):], end='')
71
- else:
72
- print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='')
73
- self.pre_buf = bufp
74
- return
75
-
76
- cli_printer = CLI_Printer()
77
- # ==============================================================================================================================
78
- def test_解析一个Python项目():
79
- from crazy_functions.解析项目源代码 import 解析一个Python项目
80
- txt = "crazy_functions/test_project/python/dqn"
81
- for cookies, cb, hist, msg in 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
82
- print(cb)
83
-
84
- def test_解析一个Cpp项目():
85
- from crazy_functions.解析项目源代码 import 解析一个C项目
86
- txt = "crazy_functions/test_project/cpp/cppipc"
87
- for cookies, cb, hist, msg in 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
88
- print(cb)
89
-
90
- def test_Latex英文润色():
91
- from crazy_functions.Latex全文润色 import Latex英文润色
92
- txt = "crazy_functions/test_project/latex/attention"
93
- for cookies, cb, hist, msg in Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
94
- print(cb)
95
-
96
- def test_Markdown中译英():
97
- from crazy_functions.批量Markdown翻译 import Markdown中译英
98
- txt = "README.md"
99
- for cookies, cb, hist, msg in Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
100
- print(cb)
101
-
102
- def test_批量翻译PDF文档():
103
- from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
104
- txt = "crazy_functions/test_project/pdf_and_word"
105
- for cookies, cb, hist, msg in 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
106
- print(cb)
107
-
108
- def test_谷歌检索小助手():
109
- from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
110
- txt = "https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG="
111
- for cookies, cb, hist, msg in 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
112
- print(cb)
113
-
114
- def test_总结word文档():
115
- from crazy_functions.总结word文档 import 总结word文档
116
- txt = "crazy_functions/test_project/pdf_and_word"
117
- for cookies, cb, hist, msg in 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
118
- print(cb)
119
-
120
- def test_下载arxiv论文并翻译摘要():
121
- from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
122
- txt = "1812.10695"
123
- for cookies, cb, hist, msg in 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
124
- print(cb)
125
-
126
- def test_联网回答问题():
127
- from crazy_functions.联网的ChatGPT import 连接网络回答问题
128
- # txt = "谁是应急食品?"
129
- # >> '根据以上搜索结果可以得知,应急食品是“原神”游戏中的角色派蒙的外号。'
130
- # txt = "道路千万条,安全第一条。后面两句是?"
131
- # >> '行车不规范,亲人两行泪。'
132
- # txt = "You should have gone for the head. What does that mean?"
133
- # >> The phrase "You should have gone for the head" is a quote from the Marvel movies, Avengers: Infinity War and Avengers: Endgame. It was spoken by the character Thanos in Infinity War and by Thor in Endgame.
134
- txt = "AutoGPT是什么?"
135
- for cookies, cb, hist, msg in 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
136
- print("当前问答:", cb[-1][-1].replace("\n"," "))
137
- for i, it in enumerate(cb): print亮蓝(it[0]); print亮黄(it[1])
138
-
139
- def test_解析ipynb文件():
140
- from crazy_functions.解析JupyterNotebook import 解析ipynb文件
141
- txt = "crazy_functions/test_samples"
142
- for cookies, cb, hist, msg in 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
143
- print(cb)
144
-
145
-
146
- def test_数学动画生成manim():
147
- from crazy_functions.数学动画生成manim import 动画生成
148
- txt = "A ball split into 2, and then split into 4, and finally split into 8."
149
- for cookies, cb, hist, msg in 动画生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
150
- print(cb)
151
-
152
-
153
-
154
- def test_Markdown多语言():
155
- from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
156
- txt = "README.md"
157
- history = []
158
- for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]:
159
- plugin_kwargs = {"advanced_arg": lang}
160
- for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
161
- print(cb)
162
-
163
- def test_Langchain知识库():
164
- from crazy_functions.Langchain知识库 import 知识库问答
165
- txt = "./"
166
- chatbot = ChatBotWithCookies(llm_kwargs)
167
- for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
168
- cli_printer.print(cb) # print(cb)
169
-
170
- chatbot = ChatBotWithCookies(cookies)
171
- from crazy_functions.Langchain知识库 import 读取知识库作答
172
- txt = "What is the installation method?"
173
- for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
174
- cli_printer.print(cb) # print(cb)
175
-
176
- def test_Langchain知识库读取():
177
- from crazy_functions.Langchain知识库 import 读取知识库作答
178
- txt = "远程云服务器部署?"
179
- for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
180
- cli_printer.print(cb) # print(cb)
181
-
182
- def test_Latex():
183
- from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF
184
-
185
- # txt = r"https://arxiv.org/abs/1706.03762"
186
- # txt = r"https://arxiv.org/abs/1902.03185"
187
- # txt = r"https://arxiv.org/abs/2305.18290"
188
- # txt = r"https://arxiv.org/abs/2305.17608"
189
- # txt = r"https://arxiv.org/abs/2211.16068" # ACE
190
- # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
191
- # txt = r"https://arxiv.org/abs/2002.09253"
192
- # txt = r"https://arxiv.org/abs/2306.07831"
193
- # txt = r"https://arxiv.org/abs/2212.10156"
194
- # txt = r"https://arxiv.org/abs/2211.11559"
195
- # txt = r"https://arxiv.org/abs/2303.08774"
196
- txt = r"https://arxiv.org/abs/2303.12712"
197
- # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
198
-
199
-
200
- for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
201
- cli_printer.print(cb) # print(cb)
202
-
203
-
204
-
205
- # txt = "2302.02948.tar"
206
- # print(txt)
207
- # main_tex, work_folder = Latex预处理(txt)
208
- # print('main tex:', main_tex)
209
- # res = 编译Latex(main_tex, work_folder)
210
- # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
211
- # cli_printer.print(cb) # print(cb)
212
-
213
-
214
-
215
- # test_解析一个Python项目()
216
- # test_Latex英文润色()
217
- # test_Markdown中译英()
218
- # test_批量翻译PDF文档()
219
- # test_谷歌检索小助手()
220
- # test_总结word文档()
221
- # test_下载arxiv论文并翻译摘要()
222
- # test_解析一个Cpp项目()
223
- # test_联网回答问题()
224
- # test_解析ipynb文件()
225
- # test_数学动画生成manim()
226
- # test_Langchain知识库()
227
- # test_Langchain知识库读取()
228
- if __name__ == "__main__":
229
- test_Latex()
230
- input("程序完成,回车退出。")
231
- print("退出。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/crazy_utils.py DELETED
@@ -1,606 +0,0 @@
1
- from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
2
- import threading
3
- import os
4
- import logging
5
-
6
- def input_clipping(inputs, history, max_token_limit):
7
- import numpy as np
8
- from request_llms.bridge_all import model_info
9
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
10
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
11
-
12
- mode = 'input-and-history'
13
- # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
14
- input_token_num = get_token_num(inputs)
15
- if input_token_num < max_token_limit//2:
16
- mode = 'only-history'
17
- max_token_limit = max_token_limit - input_token_num
18
-
19
- everything = [inputs] if mode == 'input-and-history' else ['']
20
- everything.extend(history)
21
- n_token = get_token_num('\n'.join(everything))
22
- everything_token = [get_token_num(e) for e in everything]
23
- delta = max(everything_token) // 16 # 截断时的颗粒度
24
-
25
- while n_token > max_token_limit:
26
- where = np.argmax(everything_token)
27
- encoded = enc.encode(everything[where], disallowed_special=())
28
- clipped_encoded = encoded[:len(encoded)-delta]
29
- everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
30
- everything_token[where] = get_token_num(everything[where])
31
- n_token = get_token_num('\n'.join(everything))
32
-
33
- if mode == 'input-and-history':
34
- inputs = everything[0]
35
- else:
36
- pass
37
- history = everything[1:]
38
- return inputs, history
39
-
40
- def request_gpt_model_in_new_thread_with_ui_alive(
41
- inputs, inputs_show_user, llm_kwargs,
42
- chatbot, history, sys_prompt, refresh_interval=0.2,
43
- handle_token_exceed=True,
44
- retry_times_at_unknown_error=2,
45
- ):
46
- """
47
- Request GPT model,请求GPT模型同时维持用户界面活跃。
48
-
49
- 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
50
- inputs (string): List of inputs (输入)
51
- inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
52
- top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
53
- temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
54
- chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
55
- history (list): List of chat history (历史,对话历史列表)
56
- sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
57
- refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
58
- handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
59
- retry_times_at_unknown_error:失败时的重试次数
60
-
61
- 输出 Returns:
62
- future: 输出,GPT返回的结果
63
- """
64
- import time
65
- from concurrent.futures import ThreadPoolExecutor
66
- from request_llms.bridge_all import predict_no_ui_long_connection
67
- # 用户反馈
68
- chatbot.append([inputs_show_user, ""])
69
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
70
- executor = ThreadPoolExecutor(max_workers=16)
71
- mutable = ["", time.time(), ""]
72
- # 看门狗耐心
73
- watch_dog_patience = 5
74
- # 请求任务
75
- def _req_gpt(inputs, history, sys_prompt):
76
- retry_op = retry_times_at_unknown_error
77
- exceeded_cnt = 0
78
- while True:
79
- # watchdog error
80
- if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
81
- raise RuntimeError("检测到程序终止。")
82
- try:
83
- # 【第一种情况】:顺利完成
84
- result = predict_no_ui_long_connection(
85
- inputs=inputs, llm_kwargs=llm_kwargs,
86
- history=history, sys_prompt=sys_prompt, observe_window=mutable)
87
- return result
88
- except ConnectionAbortedError as token_exceeded_error:
89
- # 【第二种情况】:Token溢出
90
- if handle_token_exceed:
91
- exceeded_cnt += 1
92
- # 【选择处理】 尝试计算比例,尽可能多地保留文本
93
- from toolbox import get_reduce_token_percent
94
- p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
95
- MAX_TOKEN = get_max_token(llm_kwargs)
96
- EXCEED_ALLO = 512 + 512 * exceeded_cnt
97
- inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
98
- mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
99
- continue # 返回重试
100
- else:
101
- # 【选择放弃】
102
- tb_str = '```\n' + trimmed_format_exc() + '```'
103
- mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
104
- return mutable[0] # 放弃
105
- except:
106
- # 【第三种情况】:其他错误:重试几次
107
- tb_str = '```\n' + trimmed_format_exc() + '```'
108
- print(tb_str)
109
- mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
110
- if retry_op > 0:
111
- retry_op -= 1
112
- mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
113
- if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
114
- time.sleep(30)
115
- time.sleep(5)
116
- continue # 返回重试
117
- else:
118
- time.sleep(5)
119
- return mutable[0] # 放弃
120
-
121
- # 提交任务
122
- future = executor.submit(_req_gpt, inputs, history, sys_prompt)
123
- while True:
124
- # yield一次以刷新前端页面
125
- time.sleep(refresh_interval)
126
- # “喂狗”(看门狗)
127
- mutable[1] = time.time()
128
- if future.done():
129
- break
130
- chatbot[-1] = [chatbot[-1][0], mutable[0]]
131
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
132
-
133
- final_result = future.result()
134
- chatbot[-1] = [chatbot[-1][0], final_result]
135
- yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
136
- return final_result
137
-
138
- def can_multi_process(llm):
139
- if llm.startswith('gpt-'): return True
140
- if llm.startswith('api2d-'): return True
141
- if llm.startswith('azure-'): return True
142
- if llm.startswith('spark'): return True
143
- if llm.startswith('zhipuai'): return True
144
- return False
145
-
146
- def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
147
- inputs_array, inputs_show_user_array, llm_kwargs,
148
- chatbot, history_array, sys_prompt_array,
149
- refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
150
- handle_token_exceed=True, show_user_at_complete=False,
151
- retry_times_at_unknown_error=2,
152
- ):
153
- """
154
- Request GPT model using multiple threads with UI and high efficiency
155
- 请求GPT模型的[多线程]版。
156
- 具备以下功能:
157
- 实时在UI上反馈远程数据流
158
- 使用线程池,可调节线程池的大小避免openai的流量限制错误
159
- 处理中途中止的情况
160
- 网络等出问题时,会把traceback和已经接收的数据转入输出
161
-
162
- 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
163
- inputs_array (list): List of inputs (每个子任务的输入)
164
- inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
165
- llm_kwargs: llm_kwargs参数
166
- chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化)
167
- history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
168
- sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
169
- refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
170
- max_workers (int, optional): Maximum number of threads (default: see config.py) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
171
- scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
172
- handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
173
- handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
174
- show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
175
- retry_times_at_unknown_error:子任务失败时的重试次数
176
-
177
- 输出 Returns:
178
- list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方��调试和定位问题。)
179
- """
180
- import time, random
181
- from concurrent.futures import ThreadPoolExecutor
182
- from request_llms.bridge_all import predict_no_ui_long_connection
183
- assert len(inputs_array) == len(history_array)
184
- assert len(inputs_array) == len(sys_prompt_array)
185
- if max_workers == -1: # 读取配置文件
186
- try: max_workers = get_conf('DEFAULT_WORKER_NUM')
187
- except: max_workers = 8
188
- if max_workers <= 0: max_workers = 3
189
- # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
190
- if not can_multi_process(llm_kwargs['llm_model']):
191
- max_workers = 1
192
-
193
- executor = ThreadPoolExecutor(max_workers=max_workers)
194
- n_frag = len(inputs_array)
195
- # 用户反馈
196
- chatbot.append(["请开始多线程操作。", ""])
197
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
198
- # 跨线程传递
199
- mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
200
-
201
- # 看门狗耐心
202
- watch_dog_patience = 5
203
-
204
- # 子线程任务
205
- def _req_gpt(index, inputs, history, sys_prompt):
206
- gpt_say = ""
207
- retry_op = retry_times_at_unknown_error
208
- exceeded_cnt = 0
209
- mutable[index][2] = "执行中"
210
- detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
211
- while True:
212
- # watchdog error
213
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
214
- try:
215
- # 【第一种情况】:顺利完成
216
- gpt_say = predict_no_ui_long_connection(
217
- inputs=inputs, llm_kwargs=llm_kwargs, history=history,
218
- sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
219
- )
220
- mutable[index][2] = "已成功"
221
- return gpt_say
222
- except ConnectionAbortedError as token_exceeded_error:
223
- # 【第二种情况】:Token溢出
224
- if handle_token_exceed:
225
- exceeded_cnt += 1
226
- # 【选择处理】 尝试计算比例,尽可能多地保留文本
227
- from toolbox import get_reduce_token_percent
228
- p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
229
- MAX_TOKEN = get_max_token(llm_kwargs)
230
- EXCEED_ALLO = 512 + 512 * exceeded_cnt
231
- inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
232
- gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
233
- mutable[index][2] = f"截断重试"
234
- continue # 返回重试
235
- else:
236
- # 【选择放弃】
237
- tb_str = '```\n' + trimmed_format_exc() + '```'
238
- gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
239
- if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
240
- mutable[index][2] = "输入过长已放弃"
241
- return gpt_say # 放弃
242
- except:
243
- # 【第三种情况】:其他错误
244
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
245
- tb_str = '```\n' + trimmed_format_exc() + '```'
246
- print(tb_str)
247
- gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
248
- if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
249
- if retry_op > 0:
250
- retry_op -= 1
251
- wait = random.randint(5, 20)
252
- if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
253
- wait = wait * 3
254
- fail_info = "OpenAI绑定信用卡可解除频率限制 "
255
- else:
256
- fail_info = ""
257
- # 也许等待十几秒后,情况会好转
258
- for i in range(wait):
259
- mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
260
- # 开始重试
261
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
262
- mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
263
- continue # 返回重试
264
- else:
265
- mutable[index][2] = "已失败"
266
- wait = 5
267
- time.sleep(5)
268
- return gpt_say # 放弃
269
-
270
- # 异步任务开始
271
- futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
272
- range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
273
- cnt = 0
274
- while True:
275
- # yield一次以刷新前端页面
276
- time.sleep(refresh_interval)
277
- cnt += 1
278
- worker_done = [h.done() for h in futures]
279
- # 更好的UI视觉效果
280
- observe_win = []
281
- # 每个线程都要“喂狗”(看门狗)
282
- for thread_index, _ in enumerate(worker_done):
283
- mutable[thread_index][1] = time.time()
284
- # 在前端打印些好玩的东西
285
- for thread_index, _ in enumerate(worker_done):
286
- print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
287
- replace('\n', '').replace('`', '.').replace(
288
- ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
289
- observe_win.append(print_something_really_funny)
290
- # 在前端打印些好玩的东西
291
- stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
292
- if not done else f'`{mutable[thread_index][2]}`\n\n'
293
- for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
294
- # 在前端打印些好玩的东西
295
- chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
296
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
297
- if all(worker_done):
298
- executor.shutdown()
299
- break
300
-
301
- # 异步任务结束
302
- gpt_response_collection = []
303
- for inputs_show_user, f in zip(inputs_show_user_array, futures):
304
- gpt_res = f.result()
305
- gpt_response_collection.extend([inputs_show_user, gpt_res])
306
-
307
- # 是否在结束时,在界面上显示结果
308
- if show_user_at_complete:
309
- for inputs_show_user, f in zip(inputs_show_user_array, futures):
310
- gpt_res = f.result()
311
- chatbot.append([inputs_show_user, gpt_res])
312
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
313
- time.sleep(0.5)
314
- return gpt_response_collection
315
-
316
-
317
-
318
- def read_and_clean_pdf_text(fp):
319
- """
320
- 这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
321
-
322
- **输入参数说明**
323
- - `fp`:需要读取和清理文本的pdf文件路径
324
-
325
- **输出参数说明**
326
- - `meta_txt`:清理后的文本内容字符串
327
- - `page_one_meta`:第一页清理后的文本内容列表
328
-
329
- **函数功能**
330
- 读取pdf文件并清理其中的文本内容,清理规则包括:
331
- - 提取所有块元的文本信息,并合并为一个字符串
332
- - 去除短块(字符数小于100)并替换为回车符
333
- - 清理多余的空行
334
- - 合并小写字母开头的段落块并替换为空格
335
- - 清除重复的换行
336
- - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
337
- """
338
- import fitz, copy
339
- import re
340
- import numpy as np
341
- from colorful import print亮黄, print亮绿
342
- fc = 0 # Index 0 文本
343
- fs = 1 # Index 1 字体
344
- fb = 2 # Index 2 框框
345
- REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
346
- REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
347
- def primary_ffsize(l):
348
- """
349
- 提取文本块主字体
350
- """
351
- fsize_statiscs = {}
352
- for wtf in l['spans']:
353
- if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
354
- fsize_statiscs[wtf['size']] += len(wtf['text'])
355
- return max(fsize_statiscs, key=fsize_statiscs.get)
356
-
357
- def ffsize_same(a,b):
358
- """
359
- 提取字体大小是否近似相等
360
- """
361
- return abs((a-b)/max(a,b)) < 0.02
362
-
363
- with fitz.open(fp) as doc:
364
- meta_txt = []
365
- meta_font = []
366
-
367
- meta_line = []
368
- meta_span = []
369
- ############################## <第 1 步,搜集初始信息> ##################################
370
- for index, page in enumerate(doc):
371
- # file_content += page.get_text()
372
- text_areas = page.get_text("dict") # 获取页面上的文本信息
373
- for t in text_areas['blocks']:
374
- if 'lines' in t:
375
- pf = 998
376
- for l in t['lines']:
377
- txt_line = "".join([wtf['text'] for wtf in l['spans']])
378
- if len(txt_line) == 0: continue
379
- pf = primary_ffsize(l)
380
- meta_line.append([txt_line, pf, l['bbox'], l])
381
- for wtf in l['spans']: # for l in t['lines']:
382
- meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
383
- # meta_line.append(["NEW_BLOCK", pf])
384
- # 块元提取 for each word segment with in line for each line cross-line words for each block
385
- meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
386
- '- ', '') for t in text_areas['blocks'] if 'lines' in t])
387
- meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
388
- for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
389
- if index == 0:
390
- page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
391
- '- ', '') for t in text_areas['blocks'] if 'lines' in t]
392
-
393
- ############################## <第 2 步,获取正文主字体> ##################################
394
- try:
395
- fsize_statiscs = {}
396
- for span in meta_span:
397
- if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
398
- fsize_statiscs[span[1]] += span[2]
399
- main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
400
- if REMOVE_FOOT_NOTE:
401
- give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
402
- except:
403
- raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。')
404
- ############################## <第 3 步,切分和重新整合> ##################################
405
- mega_sec = []
406
- sec = []
407
- for index, line in enumerate(meta_line):
408
- if index == 0:
409
- sec.append(line[fc])
410
- continue
411
- if REMOVE_FOOT_NOTE:
412
- if meta_line[index][fs] <= give_up_fize_threshold:
413
- continue
414
- if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
415
- # 尝试识别段落
416
- if meta_line[index][fc].endswith('.') and\
417
- (meta_line[index-1][fc] != 'NEW_BLOCK') and \
418
- (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
419
- sec[-1] += line[fc]
420
- sec[-1] += "\n\n"
421
- else:
422
- sec[-1] += " "
423
- sec[-1] += line[fc]
424
- else:
425
- if (index+1 < len(meta_line)) and \
426
- meta_line[index][fs] > main_fsize:
427
- # 单行 + 字体大
428
- mega_sec.append(copy.deepcopy(sec))
429
- sec = []
430
- sec.append("# " + line[fc])
431
- else:
432
- # 尝试识别section
433
- if meta_line[index-1][fs] > meta_line[index][fs]:
434
- sec.append("\n" + line[fc])
435
- else:
436
- sec.append(line[fc])
437
- mega_sec.append(copy.deepcopy(sec))
438
-
439
- finals = []
440
- for ms in mega_sec:
441
- final = " ".join(ms)
442
- final = final.replace('- ', ' ')
443
- finals.append(final)
444
- meta_txt = finals
445
-
446
- ############################## <第 4 步,乱七八糟的后处理> ##################################
447
- def 把字符太少的块清除为回车(meta_txt):
448
- for index, block_txt in enumerate(meta_txt):
449
- if len(block_txt) < 100:
450
- meta_txt[index] = '\n'
451
- return meta_txt
452
- meta_txt = 把字符太少的块清除为回车(meta_txt)
453
-
454
- def 清理多余的空行(meta_txt):
455
- for index in reversed(range(1, len(meta_txt))):
456
- if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
457
- meta_txt.pop(index)
458
- return meta_txt
459
- meta_txt = 清理多余的空行(meta_txt)
460
-
461
- def 合并小写开头的段落块(meta_txt):
462
- def starts_with_lowercase_word(s):
463
- pattern = r"^[a-z]+"
464
- match = re.match(pattern, s)
465
- if match:
466
- return True
467
- else:
468
- return False
469
- for _ in range(100):
470
- for index, block_txt in enumerate(meta_txt):
471
- if starts_with_lowercase_word(block_txt):
472
- if meta_txt[index-1] != '\n':
473
- meta_txt[index-1] += ' '
474
- else:
475
- meta_txt[index-1] = ''
476
- meta_txt[index-1] += meta_txt[index]
477
- meta_txt[index] = '\n'
478
- return meta_txt
479
- meta_txt = 合并小写开头的段落块(meta_txt)
480
- meta_txt = 清理多余的空行(meta_txt)
481
-
482
- meta_txt = '\n'.join(meta_txt)
483
- # 清除重复的换行
484
- for _ in range(5):
485
- meta_txt = meta_txt.replace('\n\n', '\n')
486
-
487
- # 换行 -> 双换行
488
- meta_txt = meta_txt.replace('\n', '\n\n')
489
-
490
- ############################## <第 5 步,展示分割效果> ##################################
491
- # for f in finals:
492
- # print亮黄(f)
493
- # print亮绿('***************************')
494
-
495
- return meta_txt, page_one_meta
496
-
497
-
498
- def get_files_from_everything(txt, type): # type='.md'
499
- """
500
- 这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
501
- 下面是对每个参数和返回值的说明:
502
- 参数
503
- - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
504
- - type: 字符串,表示要搜索的文件类型。默认是.md。
505
- 返回值
506
- - success: 布尔值,表示函数是否成功执行。
507
- - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
508
- - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
509
- 该函数详细注释已添加,请确认是否满足您的需要。
510
- """
511
- import glob, os
512
-
513
- success = True
514
- if txt.startswith('http'):
515
- # 网络的远程文件
516
- import requests
517
- from toolbox import get_conf
518
- from toolbox import get_log_folder, gen_time_str
519
- proxies = get_conf('proxies')
520
- try:
521
- r = requests.get(txt, proxies=proxies)
522
- except:
523
- raise ConnectionRefusedError(f"无法下载资源{txt},请检查。")
524
- path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
525
- with open(path, 'wb+') as f: f.write(r.content)
526
- project_folder = get_log_folder(plugin_name='web_download')
527
- file_manifest = [path]
528
- elif txt.endswith(type):
529
- # 直接给定文件
530
- file_manifest = [txt]
531
- project_folder = os.path.dirname(txt)
532
- elif os.path.exists(txt):
533
- # 本地路径,递归搜索
534
- project_folder = txt
535
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
536
- if len(file_manifest) == 0:
537
- success = False
538
- else:
539
- project_folder = None
540
- file_manifest = []
541
- success = False
542
-
543
- return success, file_manifest, project_folder
544
-
545
-
546
-
547
- @Singleton
548
- class nougat_interface():
549
- def __init__(self):
550
- self.threadLock = threading.Lock()
551
-
552
- def nougat_with_timeout(self, command, cwd, timeout=3600):
553
- import subprocess
554
- from toolbox import ProxyNetworkActivate
555
- logging.info(f'正在执行命令 {command}')
556
- with ProxyNetworkActivate("Nougat_Download"):
557
- process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
558
- try:
559
- stdout, stderr = process.communicate(timeout=timeout)
560
- except subprocess.TimeoutExpired:
561
- process.kill()
562
- stdout, stderr = process.communicate()
563
- print("Process timed out!")
564
- return False
565
- return True
566
-
567
-
568
- def NOUGAT_parse_pdf(self, fp, chatbot, history):
569
- from toolbox import update_ui_lastest_msg
570
-
571
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
572
- chatbot=chatbot, history=history, delay=0)
573
- self.threadLock.acquire()
574
- import glob, threading, os
575
- from toolbox import get_log_folder, gen_time_str
576
- dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
577
- os.makedirs(dst)
578
-
579
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
580
- chatbot=chatbot, history=history, delay=0)
581
- self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
582
- res = glob.glob(os.path.join(dst,'*.mmd'))
583
- if len(res) == 0:
584
- self.threadLock.release()
585
- raise RuntimeError("Nougat解析论文失败。")
586
- self.threadLock.release()
587
- return res[0]
588
-
589
-
590
-
591
-
592
- def try_install_deps(deps, reload_m=[]):
593
- import subprocess, sys, importlib
594
- for dep in deps:
595
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
596
- import site
597
- importlib.reload(site)
598
- for m in reload_m:
599
- importlib.reload(__import__(m))
600
-
601
-
602
- def get_plugin_arg(plugin_kwargs, key, default):
603
- # 如果参数是空的
604
- if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key)
605
- # 正常情况
606
- return plugin_kwargs.get(key, default)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_ascii_art.py DELETED
@@ -1,42 +0,0 @@
1
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
2
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
3
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
4
- from request_llms.bridge_all import predict_no_ui_long_connection
5
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
6
- import random
7
-
8
-
9
- class MiniGame_ASCII_Art(GptAcademicGameBaseState):
10
- def step(self, prompt, chatbot, history):
11
- if self.step_cnt == 0:
12
- chatbot.append(["我画你猜(动物)", "请稍等..."])
13
- else:
14
- if prompt.strip() == 'exit':
15
- self.delete_game = True
16
- yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.)
17
- return
18
- chatbot.append([prompt, ""])
19
- yield from update_ui(chatbot=chatbot, history=history)
20
-
21
- if self.step_cnt == 0:
22
- self.lock_plugin(chatbot)
23
- self.cur_task = 'draw'
24
-
25
- if self.cur_task == 'draw':
26
- avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"]
27
- self.obj = random.choice(avail_obj)
28
- inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + \
29
- f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. "
30
- raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="")
31
- self.cur_task = 'identify user guess'
32
- res = get_code_block(raw_res)
33
- history += ['', f'the answer is {self.obj}', inputs, res]
34
- yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.)
35
-
36
- elif self.cur_task == 'identify user guess':
37
- if is_same_thing(self.obj, prompt, self.llm_kwargs):
38
- self.delete_game = True
39
- yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.)
40
- else:
41
- self.cur_task = 'identify user guess'
42
- yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_interactive_story.py DELETED
@@ -1,212 +0,0 @@
1
- prompts_hs = """ 请以“{headstart}”为开头,编写一个小说的第一幕。
2
-
3
- - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
4
- - 出现人物时,给出人物的名字。
5
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
6
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
7
- - 字数要求:第一幕的字数少于300字,且少于2个段落。
8
- """
9
-
10
- prompts_interact = """ 小说的前文回顾:
11
-
12
- {previously_on_story}
13
-
14
-
15
- 你是一个作家,根据以上的情节,给出4种不同的后续剧情发展方向,每个发展方向都精明扼要地用一句话说明。稍后,我将在这4个选择中,挑选一种剧情发展。
16
-
17
- 输出格式例如:
18
- 1. 后续剧情发展1
19
- 2. 后续剧情发展2
20
- 3. 后续剧情发展3
21
- 4. 后续剧情发展4
22
- """
23
-
24
-
25
- prompts_resume = """小说的前文回顾:
26
-
27
- {previously_on_story}
28
-
29
-
30
- 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
31
- 在以下的剧情发展中,
32
-
33
- {choice}
34
-
35
- 我认为更合理的是:{user_choice}。
36
- 请在前文的基础上(不要重复前文),围绕我选定的剧情情节,编写小说的下一幕。
37
-
38
- - 禁止杜撰不符合我选择的剧情。
39
- - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
40
- - 不要重复前文。
41
- - 出现人物时,给出人物的名字。
42
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
43
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
44
- - 小说的下一幕字数少于300字,且少于2个段落。
45
- """
46
-
47
-
48
- prompts_terminate = """小说的前文回顾:
49
-
50
- {previously_on_story}
51
-
52
-
53
- 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
54
- 现在,故事该结束了,我认为最合理的故事结局是:{user_choice}。
55
-
56
- 请在前文的基础上(不要重复前文),编写小说的最后一幕。
57
-
58
- - 不要重复前文。
59
- - 出现人物时,给出人物的名字。
60
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
61
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
62
- - 字数要求:最后一幕的字数少于1000字。
63
- """
64
-
65
-
66
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
67
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
68
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
69
- from request_llms.bridge_all import predict_no_ui_long_connection
70
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
71
- import random
72
-
73
-
74
- class MiniGame_ResumeStory(GptAcademicGameBaseState):
75
- story_headstart = [
76
- '先行者知道,他现在是全宇宙中唯一的一个人了。',
77
- '深夜,一个年轻人穿过天安门广场向纪念堂走去。在二十二世纪编年史中,计算机把他的代号定为M102。',
78
- '他知道,这最后一课要提前讲了。又一阵剧痛从肝部袭来,几乎使他晕厥过去。',
79
- '在距地球五万光年的远方,在银河系的中心,一场延续了两万年的星际战争已接近尾声。那里的太空中渐渐隐现出一个方形区域,仿佛灿烂的群星的背景被剪出一个方口。',
80
- '伊依一行三人乘坐一艘游艇在南太平洋上做吟诗航行,他们的目的地是南极,如果几天后能顺利到达那里,他们将钻出地壳去看诗云。',
81
- '很多人生来就会莫名其妙地迷上一样东西,仿佛他的出生就是要和这东西约会似的,正是这样,圆圆迷上了肥皂泡。'
82
- ]
83
-
84
-
85
- def begin_game_step_0(self, prompt, chatbot, history):
86
- # init game at step 0
87
- self.headstart = random.choice(self.story_headstart)
88
- self.story = []
89
- chatbot.append(["互动写故事", f"这次的故事开头是:{self.headstart}"])
90
- self.sys_prompt_ = '你是一个想象力丰富的杰出作家。正在与你的朋友互动,一起写故事,因此你每次写的故事段落应少于300字(结局除外)。'
91
-
92
-
93
- def generate_story_image(self, story_paragraph):
94
- try:
95
- from crazy_functions.图片生成 import gen_image
96
- prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落,进行简短的环境描写。要求:80字以内。')
97
- image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural')
98
- return f'<br/><div align="center"><img src="file={image_path}"></div>'
99
- except:
100
- return ''
101
-
102
- def step(self, prompt, chatbot, history):
103
-
104
- """
105
- 首先,处理游戏初始化等特殊情况
106
- """
107
- if self.step_cnt == 0:
108
- self.begin_game_step_0(prompt, chatbot, history)
109
- self.lock_plugin(chatbot)
110
- self.cur_task = 'head_start'
111
- else:
112
- if prompt.strip() == 'exit' or prompt.strip() == '结束剧情':
113
- # should we terminate game here?
114
- self.delete_game = True
115
- yield from update_ui_lastest_msg(lastmsg=f"游戏结束。", chatbot=chatbot, history=history, delay=0.)
116
- return
117
- if '剧情收尾' in prompt:
118
- self.cur_task = 'story_terminate'
119
- # # well, game resumes
120
- # chatbot.append([prompt, ""])
121
- # update ui, don't keep the user waiting
122
- yield from update_ui(chatbot=chatbot, history=history)
123
-
124
-
125
- """
126
- 处理游戏的主体逻辑
127
- """
128
- if self.cur_task == 'head_start':
129
- """
130
- 这是游戏的第一步
131
- """
132
- inputs_ = prompts_hs.format(headstart=self.headstart)
133
- history_ = []
134
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
135
- inputs_, '故事开头', self.llm_kwargs,
136
- chatbot, history_, self.sys_prompt_
137
- )
138
- self.story.append(story_paragraph)
139
- # # 配图
140
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
141
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
142
-
143
- # # 构建后续剧情引导
144
- previously_on_story = ""
145
- for s in self.story:
146
- previously_on_story += s + '\n'
147
- inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
148
- history_ = []
149
- self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
150
- inputs_, '请在以下几种故事走向中,选择一种(当然,您也可以选择给出其他故事走向):', self.llm_kwargs,
151
- chatbot,
152
- history_,
153
- self.sys_prompt_
154
- )
155
- self.cur_task = 'user_choice'
156
-
157
-
158
- elif self.cur_task == 'user_choice':
159
- """
160
- 根据用户的提示,确定故事的下一步
161
- """
162
- if '请在以下几种故事走向中,选择一种' in chatbot[-1][0]: chatbot.pop(-1)
163
- previously_on_story = ""
164
- for s in self.story:
165
- previously_on_story += s + '\n'
166
- inputs_ = prompts_resume.format(previously_on_story=previously_on_story, choice=self.next_choices, user_choice=prompt)
167
- history_ = []
168
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
169
- inputs_, f'下一段故事(您的选择是:{prompt})。', self.llm_kwargs,
170
- chatbot, history_, self.sys_prompt_
171
- )
172
- self.story.append(story_paragraph)
173
- # # 配图
174
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
175
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
176
-
177
- # # 构建后续剧情引导
178
- previously_on_story = ""
179
- for s in self.story:
180
- previously_on_story += s + '\n'
181
- inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
182
- history_ = []
183
- self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
184
- inputs_,
185
- '请在以下几种故事走向中,选择一种。当然,您也可以给出您心中的其他故事走向。另外,如果您希望剧情立即收尾,请输入剧情走向,并以“剧情收尾”四个字提示程序。', self.llm_kwargs,
186
- chatbot,
187
- history_,
188
- self.sys_prompt_
189
- )
190
- self.cur_task = 'user_choice'
191
-
192
-
193
- elif self.cur_task == 'story_terminate':
194
- """
195
- 根据用户的提示,确定故事的结局
196
- """
197
- previously_on_story = ""
198
- for s in self.story:
199
- previously_on_story += s + '\n'
200
- inputs_ = prompts_terminate.format(previously_on_story=previously_on_story, user_choice=prompt)
201
- history_ = []
202
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
203
- inputs_, f'故事收尾(您的选择是:{prompt})。', self.llm_kwargs,
204
- chatbot, history_, self.sys_prompt_
205
- )
206
- # # 配图
207
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
208
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
209
-
210
- # terminate game
211
- self.delete_game = True
212
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_utils.py DELETED
@@ -1,35 +0,0 @@
1
-
2
- from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
3
- from request_llms.bridge_all import predict_no_ui_long_connection
4
- def get_code_block(reply):
5
- import re
6
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
7
- matches = re.findall(pattern, reply) # find all code blocks in text
8
- if len(matches) == 1:
9
- return "```" + matches[0] + "```" # code block
10
- raise RuntimeError("GPT is not generating proper code.")
11
-
12
- def is_same_thing(a, b, llm_kwargs):
13
- from pydantic import BaseModel, Field
14
- class IsSameThing(BaseModel):
15
- is_same_thing: bool = Field(description="determine whether two objects are same thing.", default=False)
16
-
17
- def run_gpt_fn(inputs, sys_prompt, history=[]):
18
- return predict_no_ui_long_connection(
19
- inputs=inputs, llm_kwargs=llm_kwargs,
20
- history=history, sys_prompt=sys_prompt, observe_window=[]
21
- )
22
-
23
- gpt_json_io = GptJsonIO(IsSameThing)
24
- inputs_01 = "Identity whether the user input and the target is the same thing: \n target object: {a} \n user input object: {b} \n\n\n".format(a=a, b=b)
25
- inputs_01 += "\n\n\n Note that the user may describe the target object with a different language, e.g. cat and 猫 are the same thing."
26
- analyze_res_cot_01 = run_gpt_fn(inputs_01, "", [])
27
-
28
- inputs_02 = inputs_01 + gpt_json_io.format_instructions
29
- analyze_res = run_gpt_fn(inputs_02, "", [inputs_01, analyze_res_cot_01])
30
-
31
- try:
32
- res = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
33
- return res.is_same_thing
34
- except JsonStringError as e:
35
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/gen_fns/gen_fns_shared.py DELETED
@@ -1,70 +0,0 @@
1
- import time
2
- import importlib
3
- from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
4
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
5
- from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
6
- import multiprocessing
7
-
8
- def get_class_name(class_string):
9
- import re
10
- # Use regex to extract the class name
11
- class_name = re.search(r'class (\w+)\(', class_string).group(1)
12
- return class_name
13
-
14
- def try_make_module(code, chatbot):
15
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
16
- fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
17
- with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
18
- promote_file_to_downloadzone(fn_path, chatbot=chatbot)
19
- class_name = get_class_name(code)
20
- manager = multiprocessing.Manager()
21
- return_dict = manager.dict()
22
- p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
23
- # only has 10 seconds to run
24
- p.start(); p.join(timeout=10)
25
- if p.is_alive(): p.terminate(); p.join()
26
- p.close()
27
- return return_dict["success"], return_dict['traceback']
28
-
29
- # check is_function_successfully_generated
30
- def is_function_successfully_generated(fn_path, class_name, return_dict):
31
- return_dict['success'] = False
32
- return_dict['traceback'] = ""
33
- try:
34
- # Create a spec for the module
35
- module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
36
- # Load the module
37
- example_module = importlib.util.module_from_spec(module_spec)
38
- module_spec.loader.exec_module(example_module)
39
- # Now you can use the module
40
- some_class = getattr(example_module, class_name)
41
- # Now you can create an instance of the class
42
- instance = some_class()
43
- return_dict['success'] = True
44
- return
45
- except:
46
- return_dict['traceback'] = trimmed_format_exc()
47
- return
48
-
49
- def subprocess_worker(code, file_path, return_dict):
50
- return_dict['result'] = None
51
- return_dict['success'] = False
52
- return_dict['traceback'] = ""
53
- try:
54
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
55
- fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
56
- with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
57
- class_name = get_class_name(code)
58
- # Create a spec for the module
59
- module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
60
- # Load the module
61
- example_module = importlib.util.module_from_spec(module_spec)
62
- module_spec.loader.exec_module(example_module)
63
- # Now you can use the module
64
- some_class = getattr(example_module, class_name)
65
- # Now you can create an instance of the class
66
- instance = some_class()
67
- return_dict['result'] = instance.run(file_path)
68
- return_dict['success'] = True
69
- except:
70
- return_dict['traceback'] = trimmed_format_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/ipc_fns/mp.py DELETED
@@ -1,37 +0,0 @@
1
- import platform
2
- import pickle
3
- import multiprocessing
4
-
5
- def run_in_subprocess_wrapper_func(v_args):
6
- func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
7
- import sys
8
- try:
9
- result = func(*args, **kwargs)
10
- return_dict['result'] = result
11
- except Exception as e:
12
- exc_info = sys.exc_info()
13
- exception_dict['exception'] = exc_info
14
-
15
- def run_in_subprocess_with_timeout(func, timeout=60):
16
- if platform.system() == 'Linux':
17
- def wrapper(*args, **kwargs):
18
- return_dict = multiprocessing.Manager().dict()
19
- exception_dict = multiprocessing.Manager().dict()
20
- v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
21
- process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
22
- process.start()
23
- process.join(timeout)
24
- if process.is_alive():
25
- process.terminate()
26
- raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
27
- process.close()
28
- if 'exception' in exception_dict:
29
- # ooops, the subprocess ran into an exception
30
- exc_info = exception_dict['exception']
31
- raise exc_info[1].with_traceback(exc_info[2])
32
- if 'result' in return_dict.keys():
33
- # If the subprocess ran successfully, return the result
34
- return return_dict['result']
35
- return wrapper
36
- else:
37
- return func
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/json_fns/pydantic_io.py DELETED
@@ -1,111 +0,0 @@
1
- """
2
- https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb
3
-
4
- Example 1.
5
-
6
- # Define your desired data structure.
7
- class Joke(BaseModel):
8
- setup: str = Field(description="question to set up a joke")
9
- punchline: str = Field(description="answer to resolve the joke")
10
-
11
- # You can add custom validation logic easily with Pydantic.
12
- @validator("setup")
13
- def question_ends_with_question_mark(cls, field):
14
- if field[-1] != "?":
15
- raise ValueError("Badly formed question!")
16
- return field
17
-
18
-
19
- Example 2.
20
-
21
- # Here's another example, but with a compound typed field.
22
- class Actor(BaseModel):
23
- name: str = Field(description="name of an actor")
24
- film_names: List[str] = Field(description="list of names of films they starred in")
25
- """
26
-
27
- import json, re, logging
28
-
29
-
30
- PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
31
-
32
- As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
33
- the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
34
-
35
- Here is the output schema:
36
- ```
37
- {schema}
38
- ```"""
39
-
40
-
41
- PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
42
- ```
43
- {schema}
44
- ```"""
45
-
46
- class JsonStringError(Exception): ...
47
-
48
- class GptJsonIO():
49
-
50
- def __init__(self, schema, example_instruction=True):
51
- self.pydantic_object = schema
52
- self.example_instruction = example_instruction
53
- self.format_instructions = self.generate_format_instructions()
54
-
55
- def generate_format_instructions(self):
56
- schema = self.pydantic_object.schema()
57
-
58
- # Remove extraneous fields.
59
- reduced_schema = schema
60
- if "title" in reduced_schema:
61
- del reduced_schema["title"]
62
- if "type" in reduced_schema:
63
- del reduced_schema["type"]
64
- # Ensure json in context is well-formed with double quotes.
65
- if self.example_instruction:
66
- schema_str = json.dumps(reduced_schema)
67
- return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
68
- else:
69
- return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)
70
-
71
- def generate_output(self, text):
72
- # Greedy search for 1st json candidate.
73
- match = re.search(
74
- r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
75
- )
76
- json_str = ""
77
- if match: json_str = match.group()
78
- json_object = json.loads(json_str, strict=False)
79
- final_object = self.pydantic_object.parse_obj(json_object)
80
- return final_object
81
-
82
- def generate_repair_prompt(self, broken_json, error):
83
- prompt = "Fix a broken json string.\n\n" + \
84
- "(1) The broken json string need to fix is: \n\n" + \
85
- "```" + "\n" + \
86
- broken_json + "\n" + \
87
- "```" + "\n\n" + \
88
- "(2) The error message is: \n\n" + \
89
- error + "\n\n" + \
90
- "Now, fix this json string. \n\n"
91
- return prompt
92
-
93
- def generate_output_auto_repair(self, response, gpt_gen_fn):
94
- """
95
- response: string containing canidate json
96
- gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt)
97
- """
98
- try:
99
- result = self.generate_output(response)
100
- except Exception as e:
101
- try:
102
- logging.info(f'Repairing json:{response}')
103
- repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e))
104
- result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions))
105
- logging.info('Repaire json success.')
106
- except Exception as e:
107
- # 没辙了,放弃治疗
108
- logging.info('Repaire json fail.')
109
- raise JsonStringError('Cannot repair json.', str(e))
110
- return result
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_fns/latex_actions.py DELETED
@@ -1,467 +0,0 @@
1
- from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
2
- from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone
3
- from .latex_toolbox import PRESERVE, TRANSFORM
4
- from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
5
- from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
6
- from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
7
- from .latex_toolbox import find_title_and_abs
8
-
9
- import os, shutil
10
- import re
11
- import numpy as np
12
-
13
- pj = os.path.join
14
-
15
-
16
- def split_subprocess(txt, project_folder, return_dict, opts):
17
- """
18
- break down latex file to a linked list,
19
- each node use a preserve flag to indicate whether it should
20
- be proccessed by GPT.
21
- """
22
- text = txt
23
- mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
24
-
25
- # 吸收title与作者以上的部分
26
- text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL)
27
- text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL)
28
- # 吸收iffalse注释
29
- text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
30
- # 吸收在42行以内的begin-end组合
31
- text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
32
- # 吸收匿名公式
33
- text, mask = set_forbidden_text(text, mask, [ r"\$\$([^$]+)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
34
- # 吸收其他杂项
35
- text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
36
- text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
37
- text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
38
- text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
39
- text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
40
- text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
41
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
42
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
43
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
44
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
45
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
46
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
47
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
48
- text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
49
- text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
50
- text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
51
- # reverse 操作必须放在最后
52
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
53
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
54
- text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
55
- root = convert_to_linklist(text, mask)
56
-
57
- # 最后一步处理,增强稳健性
58
- root = post_process(root)
59
-
60
- # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
61
- with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
62
- segment_parts_for_gpt = []
63
- nodes = []
64
- node = root
65
- while True:
66
- nodes.append(node)
67
- show_html = node.string.replace('\n','<br/>')
68
- if not node.preserve:
69
- segment_parts_for_gpt.append(node.string)
70
- f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
71
- else:
72
- f.write(f'<p style="color:red;">{show_html}</p>')
73
- node = node.next
74
- if node is None: break
75
-
76
- for n in nodes: n.next = None # break
77
- return_dict['nodes'] = nodes
78
- return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
79
- return return_dict
80
-
81
- class LatexPaperSplit():
82
- """
83
- break down latex file to a linked list,
84
- each node use a preserve flag to indicate whether it should
85
- be proccessed by GPT.
86
- """
87
- def __init__(self) -> None:
88
- self.nodes = None
89
- self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
90
- "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
91
- "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
92
- # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
93
- self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
94
- self.title = "unknown"
95
- self.abstract = "unknown"
96
-
97
- def read_title_and_abstract(self, txt):
98
- try:
99
- title, abstract = find_title_and_abs(txt)
100
- if title is not None:
101
- self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
102
- if abstract is not None:
103
- self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
104
- except:
105
- pass
106
-
107
- def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
108
- """
109
- Merge the result after the GPT process completed
110
- """
111
- result_string = ""
112
- node_cnt = 0
113
- line_cnt = 0
114
-
115
- for node in self.nodes:
116
- if node.preserve:
117
- line_cnt += node.string.count('\n')
118
- result_string += node.string
119
- else:
120
- translated_txt = fix_content(arr[node_cnt], node.string)
121
- begin_line = line_cnt
122
- end_line = line_cnt + translated_txt.count('\n')
123
-
124
- # reverse translation if any error
125
- if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
126
- translated_txt = node.string
127
-
128
- result_string += translated_txt
129
- node_cnt += 1
130
- line_cnt += translated_txt.count('\n')
131
-
132
- if mode == 'translate_zh':
133
- pattern = re.compile(r'\\begin\{abstract\}.*\n')
134
- match = pattern.search(result_string)
135
- if not match:
136
- # match \abstract{xxxx}
137
- pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
138
- match = pattern_compile.search(result_string)
139
- position = match.regs[1][0]
140
- else:
141
- # match \begin{abstract}xxxx\end{abstract}
142
- position = match.end()
143
- result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
144
- return result_string
145
-
146
-
147
- def split(self, txt, project_folder, opts):
148
- """
149
- break down latex file to a linked list,
150
- each node use a preserve flag to indicate whether it should
151
- be proccessed by GPT.
152
- P.S. use multiprocessing to avoid timeout error
153
- """
154
- import multiprocessing
155
- manager = multiprocessing.Manager()
156
- return_dict = manager.dict()
157
- p = multiprocessing.Process(
158
- target=split_subprocess,
159
- args=(txt, project_folder, return_dict, opts))
160
- p.start()
161
- p.join()
162
- p.close()
163
- self.nodes = return_dict['nodes']
164
- self.sp = return_dict['segment_parts_for_gpt']
165
- return self.sp
166
-
167
-
168
- class LatexPaperFileGroup():
169
- """
170
- use tokenizer to break down text according to max_token_limit
171
- """
172
- def __init__(self):
173
- self.file_paths = []
174
- self.file_contents = []
175
- self.sp_file_contents = []
176
- self.sp_file_index = []
177
- self.sp_file_tag = []
178
- # count_token
179
- from request_llms.bridge_all import model_info
180
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
181
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
182
- self.get_token_num = get_token_num
183
-
184
- def run_file_split(self, max_token_limit=1900):
185
- """
186
- use tokenizer to break down text according to max_token_limit
187
- """
188
- for index, file_content in enumerate(self.file_contents):
189
- if self.get_token_num(file_content) < max_token_limit:
190
- self.sp_file_contents.append(file_content)
191
- self.sp_file_index.append(index)
192
- self.sp_file_tag.append(self.file_paths[index])
193
- else:
194
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
195
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
196
- for j, segment in enumerate(segments):
197
- self.sp_file_contents.append(segment)
198
- self.sp_file_index.append(index)
199
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
200
-
201
- def merge_result(self):
202
- self.file_result = ["" for _ in range(len(self.file_paths))]
203
- for r, k in zip(self.sp_file_result, self.sp_file_index):
204
- self.file_result[k] += r
205
-
206
- def write_result(self):
207
- manifest = []
208
- for path, res in zip(self.file_paths, self.file_result):
209
- with open(path + '.polish.tex', 'w', encoding='utf8') as f:
210
- manifest.append(path + '.polish.tex')
211
- f.write(res)
212
- return manifest
213
-
214
-
215
- def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
216
- import time, os, re
217
- from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
218
- from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
219
-
220
- # <-------- 寻找主tex文件 ---------->
221
- maintex = find_main_tex_file(file_manifest, mode)
222
- chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
223
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
224
- time.sleep(3)
225
-
226
- # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
227
- main_tex_basename = os.path.basename(maintex)
228
- assert main_tex_basename.endswith('.tex')
229
- main_tex_basename_bare = main_tex_basename[:-4]
230
- may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
231
- if os.path.exists(may_exist_bbl):
232
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
233
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
234
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
235
-
236
- with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
237
- content = f.read()
238
- merged_content = merge_tex_files(project_folder, content, mode)
239
-
240
- with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
241
- f.write(merged_content)
242
-
243
- # <-------- 精细切分latex文件 ---------->
244
- chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
245
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
246
- lps = LatexPaperSplit()
247
- lps.read_title_and_abstract(merged_content)
248
- res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
249
- # <-------- 拆分过长的latex片段 ---------->
250
- pfg = LatexPaperFileGroup()
251
- for index, r in enumerate(res):
252
- pfg.file_paths.append('segment-' + str(index))
253
- pfg.file_contents.append(r)
254
-
255
- pfg.run_file_split(max_token_limit=1024)
256
- n_split = len(pfg.sp_file_contents)
257
-
258
- # <-------- 根据需要切换prompt ---------->
259
- inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
260
- inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
261
-
262
- if os.path.exists(pj(project_folder,'temp.pkl')):
263
-
264
- # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ---------->
265
- pfg = objload(file=pj(project_folder,'temp.pkl'))
266
-
267
- else:
268
- # <-------- gpt 多线程请求 ---------->
269
- history_array = [[""] for _ in range(n_split)]
270
- # LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
271
- # if LATEX_EXPERIMENTAL:
272
- # paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
273
- # paper_meta_max_len = 888
274
- # history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
275
-
276
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
277
- inputs_array=inputs_array,
278
- inputs_show_user_array=inputs_show_user_array,
279
- llm_kwargs=llm_kwargs,
280
- chatbot=chatbot,
281
- history_array=history_array,
282
- sys_prompt_array=sys_prompt_array,
283
- # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
284
- scroller_max_len = 40
285
- )
286
-
287
- # <-------- 文本碎片重组为完整的tex片段 ---------->
288
- pfg.sp_file_result = []
289
- for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
290
- pfg.sp_file_result.append(gpt_say)
291
- pfg.merge_result()
292
-
293
- # <-------- 临时存储用于调试 ---------->
294
- pfg.get_token_num = None
295
- objdump(pfg, file=pj(project_folder,'temp.pkl'))
296
-
297
- write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
298
-
299
- # <-------- 写出文件 ---------->
300
- msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
301
- final_tex = lps.merge_result(pfg.file_result, mode, msg)
302
- objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
303
-
304
- with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
305
- if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
306
-
307
-
308
- # <-------- 整理结果, 退出 ---------->
309
- chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
310
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
311
-
312
- # <-------- 返回 ---------->
313
- return project_folder + f'/merge_{mode}.tex'
314
-
315
-
316
- def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
317
- try:
318
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
319
- log = f.read()
320
- import re
321
- buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
322
- buggy_lines = [int(l) for l in buggy_lines]
323
- buggy_lines = sorted(buggy_lines)
324
- buggy_line = buggy_lines[0]-1
325
- print("reversing tex line that has errors", buggy_line)
326
-
327
- # 重组,逆转出错的段落
328
- if buggy_line not in fixed_line:
329
- fixed_line.append(buggy_line)
330
-
331
- lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
332
- final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
333
-
334
- with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
335
- f.write(final_tex)
336
-
337
- return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
338
- except:
339
- print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
340
- return False, -1, [-1]
341
-
342
-
343
- def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
344
- import os, time
345
- n_fix = 1
346
- fixed_line = []
347
- max_try = 32
348
- chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
349
- chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
350
- yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
351
-
352
- while True:
353
- import os
354
- may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
355
- target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
356
- if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
357
- shutil.copyfile(may_exist_bbl, target_bbl)
358
-
359
- # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
360
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
361
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
362
-
363
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
364
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
365
-
366
- if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
367
- # 只有第二步成功,才能继续下面的步骤
368
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
369
- if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
370
- ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
371
- if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
372
- ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
373
-
374
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
375
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
376
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
377
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
378
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
379
-
380
- if mode!='translate_zh':
381
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
382
- print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
383
- ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
384
-
385
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
386
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
387
- ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
388
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
389
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
390
-
391
- # <---------- 检查结果 ----------->
392
- results_ = ""
393
- original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
394
- modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
395
- diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
396
- results_ += f"原始PDF编译是否成功: {original_pdf_success};"
397
- results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
398
- results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
399
- yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
400
-
401
- if diff_pdf_success:
402
- result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
403
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
404
- if modified_pdf_success:
405
- yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history) # 刷新Gradio前端界面
406
- result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
407
- origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
408
- if os.path.exists(pj(work_folder, '..', 'translation')):
409
- shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
410
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
411
- # 将两个PDF拼接
412
- if original_pdf_success:
413
- try:
414
- from .latex_toolbox import merge_pdfs
415
- concat_pdf = pj(work_folder_modified, f'comparison.pdf')
416
- merge_pdfs(origin_pdf, result_pdf, concat_pdf)
417
- if os.path.exists(pj(work_folder, '..', 'translation')):
418
- shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
419
- promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
420
- except Exception as e:
421
- print(e)
422
- pass
423
- return True # 成功啦
424
- else:
425
- if n_fix>=max_try: break
426
- n_fix += 1
427
- can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
428
- file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
429
- log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
430
- tex_name=f'{main_file_modified}.tex',
431
- tex_name_pure=f'{main_file_modified}',
432
- n_fix=n_fix,
433
- work_folder_modified=work_folder_modified,
434
- fixed_line=fixed_line
435
- )
436
- yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
437
- if not can_retry: break
438
-
439
- return False # 失败啦
440
-
441
-
442
- def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
443
- # write html
444
- try:
445
- import shutil
446
- from crazy_functions.pdf_fns.report_gen_html import construct_html
447
- from toolbox import gen_time_str
448
- ch = construct_html()
449
- orig = ""
450
- trans = ""
451
- final = []
452
- for c,r in zip(sp_file_contents, sp_file_result):
453
- final.append(c)
454
- final.append(r)
455
- for i, k in enumerate(final):
456
- if i%2==0:
457
- orig = k
458
- if i%2==1:
459
- trans = k
460
- ch.add_row(a=orig, b=trans)
461
- create_report_file_name = f"{gen_time_str()}.trans.html"
462
- res = ch.save_file(create_report_file_name)
463
- shutil.copyfile(res, pj(project_folder, create_report_file_name))
464
- promote_file_to_downloadzone(file=res, chatbot=chatbot)
465
- except:
466
- from toolbox import trimmed_format_exc
467
- print('writing html result failed:', trimmed_format_exc())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_fns/latex_toolbox.py DELETED
@@ -1,562 +0,0 @@
1
- import os, shutil
2
- import re
3
- import numpy as np
4
- PRESERVE = 0
5
- TRANSFORM = 1
6
-
7
- pj = os.path.join
8
-
9
- class LinkedListNode():
10
- """
11
- Linked List Node
12
- """
13
- def __init__(self, string, preserve=True) -> None:
14
- self.string = string
15
- self.preserve = preserve
16
- self.next = None
17
- self.range = None
18
- # self.begin_line = 0
19
- # self.begin_char = 0
20
-
21
- def convert_to_linklist(text, mask):
22
- root = LinkedListNode("", preserve=True)
23
- current_node = root
24
- for c, m, i in zip(text, mask, range(len(text))):
25
- if (m==PRESERVE and current_node.preserve) \
26
- or (m==TRANSFORM and not current_node.preserve):
27
- # add
28
- current_node.string += c
29
- else:
30
- current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
31
- current_node = current_node.next
32
- return root
33
-
34
- def post_process(root):
35
- # 修复括号
36
- node = root
37
- while True:
38
- string = node.string
39
- if node.preserve:
40
- node = node.next
41
- if node is None: break
42
- continue
43
- def break_check(string):
44
- str_stack = [""] # (lv, index)
45
- for i, c in enumerate(string):
46
- if c == '{':
47
- str_stack.append('{')
48
- elif c == '}':
49
- if len(str_stack) == 1:
50
- print('stack fix')
51
- return i
52
- str_stack.pop(-1)
53
- else:
54
- str_stack[-1] += c
55
- return -1
56
- bp = break_check(string)
57
-
58
- if bp == -1:
59
- pass
60
- elif bp == 0:
61
- node.string = string[:1]
62
- q = LinkedListNode(string[1:], False)
63
- q.next = node.next
64
- node.next = q
65
- else:
66
- node.string = string[:bp]
67
- q = LinkedListNode(string[bp:], False)
68
- q.next = node.next
69
- node.next = q
70
-
71
- node = node.next
72
- if node is None: break
73
-
74
- # 屏蔽空行和太短的句子
75
- node = root
76
- while True:
77
- if len(node.string.strip('\n').strip(''))==0: node.preserve = True
78
- if len(node.string.strip('\n').strip(''))<42: node.preserve = True
79
- node = node.next
80
- if node is None: break
81
- node = root
82
- while True:
83
- if node.next and node.preserve and node.next.preserve:
84
- node.string += node.next.string
85
- node.next = node.next.next
86
- node = node.next
87
- if node is None: break
88
-
89
- # 将前后断行符脱离
90
- node = root
91
- prev_node = None
92
- while True:
93
- if not node.preserve:
94
- lstriped_ = node.string.lstrip().lstrip('\n')
95
- if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
96
- prev_node.string += node.string[:-len(lstriped_)]
97
- node.string = lstriped_
98
- rstriped_ = node.string.rstrip().rstrip('\n')
99
- if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
100
- node.next.string = node.string[len(rstriped_):] + node.next.string
101
- node.string = rstriped_
102
- # =====
103
- prev_node = node
104
- node = node.next
105
- if node is None: break
106
-
107
- # 标注节点的行数范围
108
- node = root
109
- n_line = 0
110
- expansion = 2
111
- while True:
112
- n_l = node.string.count('\n')
113
- node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围
114
- n_line = n_line+n_l
115
- node = node.next
116
- if node is None: break
117
- return root
118
-
119
-
120
- """
121
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
122
- Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
123
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
124
- """
125
-
126
-
127
- def set_forbidden_text(text, mask, pattern, flags=0):
128
- """
129
- Add a preserve text area in this paper
130
- e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
131
- you can mask out (mask = PRESERVE so that text become untouchable for GPT)
132
- everything between "\begin{equation}" and "\end{equation}"
133
- """
134
- if isinstance(pattern, list): pattern = '|'.join(pattern)
135
- pattern_compile = re.compile(pattern, flags)
136
- for res in pattern_compile.finditer(text):
137
- mask[res.span()[0]:res.span()[1]] = PRESERVE
138
- return text, mask
139
-
140
- def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
141
- """
142
- Move area out of preserve area (make text editable for GPT)
143
- count the number of the braces so as to catch compelete text area.
144
- e.g.
145
- \begin{abstract} blablablablablabla. \end{abstract}
146
- """
147
- if isinstance(pattern, list): pattern = '|'.join(pattern)
148
- pattern_compile = re.compile(pattern, flags)
149
- for res in pattern_compile.finditer(text):
150
- if not forbid_wrapper:
151
- mask[res.span()[0]:res.span()[1]] = TRANSFORM
152
- else:
153
- mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
154
- mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
155
- mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
156
- return text, mask
157
-
158
- def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
159
- """
160
- Add a preserve text area in this paper (text become untouchable for GPT).
161
- count the number of the braces so as to catch compelete text area.
162
- e.g.
163
- \caption{blablablablabla\texbf{blablabla}blablabla.}
164
- """
165
- pattern_compile = re.compile(pattern, flags)
166
- for res in pattern_compile.finditer(text):
167
- brace_level = -1
168
- p = begin = end = res.regs[0][0]
169
- for _ in range(1024*16):
170
- if text[p] == '}' and brace_level == 0: break
171
- elif text[p] == '}': brace_level -= 1
172
- elif text[p] == '{': brace_level += 1
173
- p += 1
174
- end = p+1
175
- mask[begin:end] = PRESERVE
176
- return text, mask
177
-
178
- def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
179
- """
180
- Move area out of preserve area (make text editable for GPT)
181
- count the number of the braces so as to catch compelete text area.
182
- e.g.
183
- \caption{blablablablabla\texbf{blablabla}blablabla.}
184
- """
185
- pattern_compile = re.compile(pattern, flags)
186
- for res in pattern_compile.finditer(text):
187
- brace_level = 0
188
- p = begin = end = res.regs[1][0]
189
- for _ in range(1024*16):
190
- if text[p] == '}' and brace_level == 0: break
191
- elif text[p] == '}': brace_level -= 1
192
- elif text[p] == '{': brace_level += 1
193
- p += 1
194
- end = p
195
- mask[begin:end] = TRANSFORM
196
- if forbid_wrapper:
197
- mask[res.regs[0][0]:begin] = PRESERVE
198
- mask[end:res.regs[0][1]] = PRESERVE
199
- return text, mask
200
-
201
- def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
202
- """
203
- Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
204
- Add it to preserve area
205
- """
206
- pattern_compile = re.compile(pattern, flags)
207
- def search_with_line_limit(text, mask):
208
- for res in pattern_compile.finditer(text):
209
- cmd = res.group(1) # begin{what}
210
- this = res.group(2) # content between begin and end
211
- this_mask = mask[res.regs[2][0]:res.regs[2][1]]
212
- white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
213
- 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
214
- if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
215
- this, this_mask = search_with_line_limit(this, this_mask)
216
- mask[res.regs[2][0]:res.regs[2][1]] = this_mask
217
- else:
218
- mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
219
- return text, mask
220
- return search_with_line_limit(text, mask)
221
-
222
-
223
-
224
- """
225
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
226
- Latex Merge File
227
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
228
- """
229
-
230
- def find_main_tex_file(file_manifest, mode):
231
- """
232
- 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
233
- P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
234
- """
235
- canidates = []
236
- for texf in file_manifest:
237
- if os.path.basename(texf).startswith('merge'):
238
- continue
239
- with open(texf, 'r', encoding='utf8', errors='ignore') as f:
240
- file_content = f.read()
241
- if r'\documentclass' in file_content:
242
- canidates.append(texf)
243
- else:
244
- continue
245
-
246
- if len(canidates) == 0:
247
- raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
248
- elif len(canidates) == 1:
249
- return canidates[0]
250
- else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
251
- canidates_score = []
252
- # 给出一些判定模板文档的词作为扣分项
253
- unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
254
- expected_words = ['\input', '\ref', '\cite']
255
- for texf in canidates:
256
- canidates_score.append(0)
257
- with open(texf, 'r', encoding='utf8', errors='ignore') as f:
258
- file_content = f.read()
259
- file_content = rm_comments(file_content)
260
- for uw in unexpected_words:
261
- if uw in file_content:
262
- canidates_score[-1] -= 1
263
- for uw in expected_words:
264
- if uw in file_content:
265
- canidates_score[-1] += 1
266
- select = np.argmax(canidates_score) # 取评分最高者返回
267
- return canidates[select]
268
-
269
- def rm_comments(main_file):
270
- new_file_remove_comment_lines = []
271
- for l in main_file.splitlines():
272
- # 删除整行的空注释
273
- if l.lstrip().startswith("%"):
274
- pass
275
- else:
276
- new_file_remove_comment_lines.append(l)
277
- main_file = '\n'.join(new_file_remove_comment_lines)
278
- # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
279
- main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
280
- return main_file
281
-
282
- def find_tex_file_ignore_case(fp):
283
- dir_name = os.path.dirname(fp)
284
- base_name = os.path.basename(fp)
285
- # 如果输入的文件路径是正确的
286
- if os.path.isfile(pj(dir_name, base_name)): return pj(dir_name, base_name)
287
- # 如果不正确,试着加上.tex后缀试试
288
- if not base_name.endswith('.tex'): base_name+='.tex'
289
- if os.path.isfile(pj(dir_name, base_name)): return pj(dir_name, base_name)
290
- # 如果还找不到,解除大小写限制,再试一次
291
- import glob
292
- for f in glob.glob(dir_name+'/*.tex'):
293
- base_name_s = os.path.basename(fp)
294
- base_name_f = os.path.basename(f)
295
- if base_name_s.lower() == base_name_f.lower(): return f
296
- # 试着加上.tex后缀试试
297
- if not base_name_s.endswith('.tex'): base_name_s+='.tex'
298
- if base_name_s.lower() == base_name_f.lower(): return f
299
- return None
300
-
301
- def merge_tex_files_(project_foler, main_file, mode):
302
- """
303
- Merge Tex project recrusively
304
- """
305
- main_file = rm_comments(main_file)
306
- for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
307
- f = s.group(1)
308
- fp = os.path.join(project_foler, f)
309
- fp_ = find_tex_file_ignore_case(fp)
310
- if fp_:
311
- try:
312
- with open(fp_, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
313
- except:
314
- c = f"\n\nWarning from GPT-Academic: LaTex source file is missing!\n\n"
315
- else:
316
- raise RuntimeError(f'找不到{fp},Tex源文件缺失!')
317
- c = merge_tex_files_(project_foler, c, mode)
318
- main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
319
- return main_file
320
-
321
-
322
- def find_title_and_abs(main_file):
323
-
324
- def extract_abstract_1(text):
325
- pattern = r"\\abstract\{(.*?)\}"
326
- match = re.search(pattern, text, re.DOTALL)
327
- if match:
328
- return match.group(1)
329
- else:
330
- return None
331
-
332
- def extract_abstract_2(text):
333
- pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}"
334
- match = re.search(pattern, text, re.DOTALL)
335
- if match:
336
- return match.group(1)
337
- else:
338
- return None
339
-
340
- def extract_title(string):
341
- pattern = r"\\title\{(.*?)\}"
342
- match = re.search(pattern, string, re.DOTALL)
343
-
344
- if match:
345
- return match.group(1)
346
- else:
347
- return None
348
-
349
- abstract = extract_abstract_1(main_file)
350
- if abstract is None:
351
- abstract = extract_abstract_2(main_file)
352
- title = extract_title(main_file)
353
- return title, abstract
354
-
355
-
356
- def merge_tex_files(project_foler, main_file, mode):
357
- """
358
- Merge Tex project recrusively
359
- P.S. 顺便把CTEX塞进去以支持中文
360
- P.S. 顺便把Latex的注释去除
361
- """
362
- main_file = merge_tex_files_(project_foler, main_file, mode)
363
- main_file = rm_comments(main_file)
364
-
365
- if mode == 'translate_zh':
366
- # find paper documentclass
367
- pattern = re.compile(r'\\documentclass.*\n')
368
- match = pattern.search(main_file)
369
- assert match is not None, "Cannot find documentclass statement!"
370
- position = match.end()
371
- add_ctex = '\\usepackage{ctex}\n'
372
- add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
373
- main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
374
- # fontset=windows
375
- import platform
376
- main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
377
- main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
378
- # find paper abstract
379
- pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
380
- pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
381
- match_opt1 = pattern_opt1.search(main_file)
382
- match_opt2 = pattern_opt2.search(main_file)
383
- if (match_opt1 is None) and (match_opt2 is None):
384
- # "Cannot find paper abstract section!"
385
- main_file = insert_abstract(main_file)
386
- match_opt1 = pattern_opt1.search(main_file)
387
- match_opt2 = pattern_opt2.search(main_file)
388
- assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
389
- return main_file
390
-
391
-
392
- insert_missing_abs_str = r"""
393
- \begin{abstract}
394
- The GPT-Academic program cannot find abstract section in this paper.
395
- \end{abstract}
396
- """
397
-
398
- def insert_abstract(tex_content):
399
- if "\\maketitle" in tex_content:
400
- # find the position of "\maketitle"
401
- find_index = tex_content.index("\\maketitle")
402
- # find the nearest ending line
403
- end_line_index = tex_content.find("\n", find_index)
404
- # insert "abs_str" on the next line
405
- modified_tex = tex_content[:end_line_index+1] + '\n\n' + insert_missing_abs_str + '\n\n' + tex_content[end_line_index+1:]
406
- return modified_tex
407
- elif r"\begin{document}" in tex_content:
408
- # find the position of "\maketitle"
409
- find_index = tex_content.index(r"\begin{document}")
410
- # find the nearest ending line
411
- end_line_index = tex_content.find("\n", find_index)
412
- # insert "abs_str" on the next line
413
- modified_tex = tex_content[:end_line_index+1] + '\n\n' + insert_missing_abs_str + '\n\n' + tex_content[end_line_index+1:]
414
- return modified_tex
415
- else:
416
- return tex_content
417
-
418
- """
419
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
420
- Post process
421
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
422
- """
423
- def mod_inbraket(match):
424
- """
425
- 为啥chatgpt会把cite里面的逗号换成中文逗号呀
426
- """
427
- # get the matched string
428
- cmd = match.group(1)
429
- str_to_modify = match.group(2)
430
- # modify the matched string
431
- str_to_modify = str_to_modify.replace(':', ':') # 前面是中文冒号,后面是英文冒号
432
- str_to_modify = str_to_modify.replace(',', ',') # 前面是中文逗号,后面是英文逗号
433
- # str_to_modify = 'BOOM'
434
- return "\\" + cmd + "{" + str_to_modify + "}"
435
-
436
- def fix_content(final_tex, node_string):
437
- """
438
- Fix common GPT errors to increase success rate
439
- """
440
- final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
441
- final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
442
- final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
443
- final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
444
-
445
- if "Traceback" in final_tex and "[Local Message]" in final_tex:
446
- final_tex = node_string # 出问题了,还原原文
447
- if node_string.count('\\begin') != final_tex.count('\\begin'):
448
- final_tex = node_string # 出问题了,还原原文
449
- if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
450
- # walk and replace any _ without \
451
- final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
452
-
453
- def compute_brace_level(string):
454
- # this function count the number of { and }
455
- brace_level = 0
456
- for c in string:
457
- if c == "{": brace_level += 1
458
- elif c == "}": brace_level -= 1
459
- return brace_level
460
- def join_most(tex_t, tex_o):
461
- # this function join translated string and original string when something goes wrong
462
- p_t = 0
463
- p_o = 0
464
- def find_next(string, chars, begin):
465
- p = begin
466
- while p < len(string):
467
- if string[p] in chars: return p, string[p]
468
- p += 1
469
- return None, None
470
- while True:
471
- res1, char = find_next(tex_o, ['{','}'], p_o)
472
- if res1 is None: break
473
- res2, char = find_next(tex_t, [char], p_t)
474
- if res2 is None: break
475
- p_o = res1 + 1
476
- p_t = res2 + 1
477
- return tex_t[:p_t] + tex_o[p_o:]
478
-
479
- if compute_brace_level(final_tex) != compute_brace_level(node_string):
480
- # 出问题了,还原部分原文,保证括号正确
481
- final_tex = join_most(final_tex, node_string)
482
- return final_tex
483
-
484
- def compile_latex_with_timeout(command, cwd, timeout=60):
485
- import subprocess
486
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
487
- try:
488
- stdout, stderr = process.communicate(timeout=timeout)
489
- except subprocess.TimeoutExpired:
490
- process.kill()
491
- stdout, stderr = process.communicate()
492
- print("Process timed out!")
493
- return False
494
- return True
495
-
496
- def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict):
497
- import sys
498
- try:
499
- result = func(*args, **kwargs)
500
- return_dict['result'] = result
501
- except Exception as e:
502
- exc_info = sys.exc_info()
503
- exception_dict['exception'] = exc_info
504
-
505
- def run_in_subprocess(func):
506
- import multiprocessing
507
- def wrapper(*args, **kwargs):
508
- return_dict = multiprocessing.Manager().dict()
509
- exception_dict = multiprocessing.Manager().dict()
510
- process = multiprocessing.Process(target=run_in_subprocess_wrapper_func,
511
- args=(func, args, kwargs, return_dict, exception_dict))
512
- process.start()
513
- process.join()
514
- process.close()
515
- if 'exception' in exception_dict:
516
- # ooops, the subprocess ran into an exception
517
- exc_info = exception_dict['exception']
518
- raise exc_info[1].with_traceback(exc_info[2])
519
- if 'result' in return_dict.keys():
520
- # If the subprocess ran successfully, return the result
521
- return return_dict['result']
522
- return wrapper
523
-
524
- def _merge_pdfs(pdf1_path, pdf2_path, output_path):
525
- import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
526
- Percent = 0.95
527
- # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
528
- # Open the first PDF file
529
- with open(pdf1_path, 'rb') as pdf1_file:
530
- pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
531
- # Open the second PDF file
532
- with open(pdf2_path, 'rb') as pdf2_file:
533
- pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
534
- # Create a new PDF file to store the merged pages
535
- output_writer = PyPDF2.PdfFileWriter()
536
- # Determine the number of pages in each PDF file
537
- num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
538
- # Merge the pages from the two PDF files
539
- for page_num in range(num_pages):
540
- # Add the page from the first PDF file
541
- if page_num < pdf1_reader.numPages:
542
- page1 = pdf1_reader.getPage(page_num)
543
- else:
544
- page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
545
- # Add the page from the second PDF file
546
- if page_num < pdf2_reader.numPages:
547
- page2 = pdf2_reader.getPage(page_num)
548
- else:
549
- page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
550
- # Create a new empty page with double width
551
- new_page = PyPDF2.PageObject.createBlankPage(
552
- width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
553
- height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
554
- )
555
- new_page.mergeTranslatedPage(page1, 0, 0)
556
- new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
557
- output_writer.addPage(new_page)
558
- # Save the merged PDF file
559
- with open(output_path, 'wb') as output_file:
560
- output_writer.write(output_file)
561
-
562
- merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_utils.py DELETED
@@ -1,788 +0,0 @@
1
- from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
2
- from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
3
- import os, shutil
4
- import re
5
- import numpy as np
6
- pj = os.path.join
7
-
8
- """
9
- ========================================================================
10
- Part One
11
- Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
12
- ========================================================================
13
- """
14
- PRESERVE = 0
15
- TRANSFORM = 1
16
-
17
- def set_forbidden_text(text, mask, pattern, flags=0):
18
- """
19
- Add a preserve text area in this paper
20
- e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
21
- you can mask out (mask = PRESERVE so that text become untouchable for GPT)
22
- everything between "\begin{equation}" and "\end{equation}"
23
- """
24
- if isinstance(pattern, list): pattern = '|'.join(pattern)
25
- pattern_compile = re.compile(pattern, flags)
26
- for res in pattern_compile.finditer(text):
27
- mask[res.span()[0]:res.span()[1]] = PRESERVE
28
- return text, mask
29
-
30
- def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
31
- """
32
- Move area out of preserve area (make text editable for GPT)
33
- count the number of the braces so as to catch compelete text area.
34
- e.g.
35
- \begin{abstract} blablablablablabla. \end{abstract}
36
- """
37
- if isinstance(pattern, list): pattern = '|'.join(pattern)
38
- pattern_compile = re.compile(pattern, flags)
39
- for res in pattern_compile.finditer(text):
40
- if not forbid_wrapper:
41
- mask[res.span()[0]:res.span()[1]] = TRANSFORM
42
- else:
43
- mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
44
- mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
45
- mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
46
- return text, mask
47
-
48
- def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
49
- """
50
- Add a preserve text area in this paper (text become untouchable for GPT).
51
- count the number of the braces so as to catch compelete text area.
52
- e.g.
53
- \caption{blablablablabla\texbf{blablabla}blablabla.}
54
- """
55
- pattern_compile = re.compile(pattern, flags)
56
- for res in pattern_compile.finditer(text):
57
- brace_level = -1
58
- p = begin = end = res.regs[0][0]
59
- for _ in range(1024*16):
60
- if text[p] == '}' and brace_level == 0: break
61
- elif text[p] == '}': brace_level -= 1
62
- elif text[p] == '{': brace_level += 1
63
- p += 1
64
- end = p+1
65
- mask[begin:end] = PRESERVE
66
- return text, mask
67
-
68
- def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
69
- """
70
- Move area out of preserve area (make text editable for GPT)
71
- count the number of the braces so as to catch compelete text area.
72
- e.g.
73
- \caption{blablablablabla\texbf{blablabla}blablabla.}
74
- """
75
- pattern_compile = re.compile(pattern, flags)
76
- for res in pattern_compile.finditer(text):
77
- brace_level = 0
78
- p = begin = end = res.regs[1][0]
79
- for _ in range(1024*16):
80
- if text[p] == '}' and brace_level == 0: break
81
- elif text[p] == '}': brace_level -= 1
82
- elif text[p] == '{': brace_level += 1
83
- p += 1
84
- end = p
85
- mask[begin:end] = TRANSFORM
86
- if forbid_wrapper:
87
- mask[res.regs[0][0]:begin] = PRESERVE
88
- mask[end:res.regs[0][1]] = PRESERVE
89
- return text, mask
90
-
91
- def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
92
- """
93
- Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
94
- Add it to preserve area
95
- """
96
- pattern_compile = re.compile(pattern, flags)
97
- def search_with_line_limit(text, mask):
98
- for res in pattern_compile.finditer(text):
99
- cmd = res.group(1) # begin{what}
100
- this = res.group(2) # content between begin and end
101
- this_mask = mask[res.regs[2][0]:res.regs[2][1]]
102
- white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
103
- 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
104
- if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
105
- this, this_mask = search_with_line_limit(this, this_mask)
106
- mask[res.regs[2][0]:res.regs[2][1]] = this_mask
107
- else:
108
- mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
109
- return text, mask
110
- return search_with_line_limit(text, mask)
111
-
112
- class LinkedListNode():
113
- """
114
- Linked List Node
115
- """
116
- def __init__(self, string, preserve=True) -> None:
117
- self.string = string
118
- self.preserve = preserve
119
- self.next = None
120
- # self.begin_line = 0
121
- # self.begin_char = 0
122
-
123
- def convert_to_linklist(text, mask):
124
- root = LinkedListNode("", preserve=True)
125
- current_node = root
126
- for c, m, i in zip(text, mask, range(len(text))):
127
- if (m==PRESERVE and current_node.preserve) \
128
- or (m==TRANSFORM and not current_node.preserve):
129
- # add
130
- current_node.string += c
131
- else:
132
- current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
133
- current_node = current_node.next
134
- return root
135
- """
136
- ========================================================================
137
- Latex Merge File
138
- ========================================================================
139
- """
140
-
141
- def 寻找Latex主文件(file_manifest, mode):
142
- """
143
- 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
144
- P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
145
- """
146
- canidates = []
147
- for texf in file_manifest:
148
- if os.path.basename(texf).startswith('merge'):
149
- continue
150
- with open(texf, 'r', encoding='utf8') as f:
151
- file_content = f.read()
152
- if r'\documentclass' in file_content:
153
- canidates.append(texf)
154
- else:
155
- continue
156
-
157
- if len(canidates) == 0:
158
- raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
159
- elif len(canidates) == 1:
160
- return canidates[0]
161
- else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
162
- canidates_score = []
163
- # 给出一些判定模板文档的词作为扣分项
164
- unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
165
- expected_words = ['\input', '\ref', '\cite']
166
- for texf in canidates:
167
- canidates_score.append(0)
168
- with open(texf, 'r', encoding='utf8') as f:
169
- file_content = f.read()
170
- for uw in unexpected_words:
171
- if uw in file_content:
172
- canidates_score[-1] -= 1
173
- for uw in expected_words:
174
- if uw in file_content:
175
- canidates_score[-1] += 1
176
- select = np.argmax(canidates_score) # 取评分最高者返回
177
- return canidates[select]
178
-
179
- def rm_comments(main_file):
180
- new_file_remove_comment_lines = []
181
- for l in main_file.splitlines():
182
- # 删除整行的空注释
183
- if l.lstrip().startswith("%"):
184
- pass
185
- else:
186
- new_file_remove_comment_lines.append(l)
187
- main_file = '\n'.join(new_file_remove_comment_lines)
188
- # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
189
- main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
190
- return main_file
191
-
192
- def merge_tex_files_(project_foler, main_file, mode):
193
- """
194
- Merge Tex project recrusively
195
- """
196
- main_file = rm_comments(main_file)
197
- for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
198
- f = s.group(1)
199
- fp = os.path.join(project_foler, f)
200
- if os.path.exists(fp):
201
- # e.g., \input{srcs/07_appendix.tex}
202
- with open(fp, 'r', encoding='utf-8', errors='replace') as fx:
203
- c = fx.read()
204
- else:
205
- # e.g., \input{srcs/07_appendix}
206
- with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
207
- c = fx.read()
208
- c = merge_tex_files_(project_foler, c, mode)
209
- main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
210
- return main_file
211
-
212
- def merge_tex_files(project_foler, main_file, mode):
213
- """
214
- Merge Tex project recrusively
215
- P.S. 顺便把CTEX塞进去以支持中文
216
- P.S. 顺便把Latex的注释去除
217
- """
218
- main_file = merge_tex_files_(project_foler, main_file, mode)
219
- main_file = rm_comments(main_file)
220
-
221
- if mode == 'translate_zh':
222
- # find paper documentclass
223
- pattern = re.compile(r'\\documentclass.*\n')
224
- match = pattern.search(main_file)
225
- assert match is not None, "Cannot find documentclass statement!"
226
- position = match.end()
227
- add_ctex = '\\usepackage{ctex}\n'
228
- add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
229
- main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
230
- # fontset=windows
231
- import platform
232
- main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
233
- main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
234
- # find paper abstract
235
- pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
236
- pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
237
- match_opt1 = pattern_opt1.search(main_file)
238
- match_opt2 = pattern_opt2.search(main_file)
239
- assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
240
- return main_file
241
-
242
-
243
-
244
- """
245
- ========================================================================
246
- Post process
247
- ========================================================================
248
- """
249
- def mod_inbraket(match):
250
- """
251
- 为啥chatgpt会把cite里面的逗号换成中文逗号呀
252
- """
253
- # get the matched string
254
- cmd = match.group(1)
255
- str_to_modify = match.group(2)
256
- # modify the matched string
257
- str_to_modify = str_to_modify.replace(':', ':') # 前面是中文冒号,后面是英文冒号
258
- str_to_modify = str_to_modify.replace(',', ',') # 前面是中文逗号,后面是英文逗号
259
- # str_to_modify = 'BOOM'
260
- return "\\" + cmd + "{" + str_to_modify + "}"
261
-
262
- def fix_content(final_tex, node_string):
263
- """
264
- Fix common GPT errors to increase success rate
265
- """
266
- final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
267
- final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
268
- final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
269
- final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
270
-
271
- if "Traceback" in final_tex and "[Local Message]" in final_tex:
272
- final_tex = node_string # 出问题了,还原原文
273
- if node_string.count('\\begin') != final_tex.count('\\begin'):
274
- final_tex = node_string # 出问题了,还原原文
275
- if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
276
- # walk and replace any _ without \
277
- final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
278
-
279
- def compute_brace_level(string):
280
- # this function count the number of { and }
281
- brace_level = 0
282
- for c in string:
283
- if c == "{": brace_level += 1
284
- elif c == "}": brace_level -= 1
285
- return brace_level
286
- def join_most(tex_t, tex_o):
287
- # this function join translated string and original string when something goes wrong
288
- p_t = 0
289
- p_o = 0
290
- def find_next(string, chars, begin):
291
- p = begin
292
- while p < len(string):
293
- if string[p] in chars: return p, string[p]
294
- p += 1
295
- return None, None
296
- while True:
297
- res1, char = find_next(tex_o, ['{','}'], p_o)
298
- if res1 is None: break
299
- res2, char = find_next(tex_t, [char], p_t)
300
- if res2 is None: break
301
- p_o = res1 + 1
302
- p_t = res2 + 1
303
- return tex_t[:p_t] + tex_o[p_o:]
304
-
305
- if compute_brace_level(final_tex) != compute_brace_level(node_string):
306
- # 出问题了,还原部分原文,保证括号正确
307
- final_tex = join_most(final_tex, node_string)
308
- return final_tex
309
-
310
- def split_subprocess(txt, project_folder, return_dict, opts):
311
- """
312
- break down latex file to a linked list,
313
- each node use a preserve flag to indicate whether it should
314
- be proccessed by GPT.
315
- """
316
- text = txt
317
- mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
318
-
319
- # 吸收title与作者以上的部分
320
- text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
321
- # 吸收iffalse注释
322
- text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
323
- # 吸收在42行以内的begin-end组合
324
- text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
325
- # 吸收匿名公式
326
- text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
327
- # 吸收其他杂项
328
- text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
329
- text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
330
- text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
331
- text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
332
- text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
333
- text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
334
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
335
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
336
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
337
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
338
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
339
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
340
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
341
- text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
342
- text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
343
- text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
344
- # reverse 操作必须放在最后
345
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
346
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
347
- text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
348
- root = convert_to_linklist(text, mask)
349
-
350
- # 修复括号
351
- node = root
352
- while True:
353
- string = node.string
354
- if node.preserve:
355
- node = node.next
356
- if node is None: break
357
- continue
358
- def break_check(string):
359
- str_stack = [""] # (lv, index)
360
- for i, c in enumerate(string):
361
- if c == '{':
362
- str_stack.append('{')
363
- elif c == '}':
364
- if len(str_stack) == 1:
365
- print('stack fix')
366
- return i
367
- str_stack.pop(-1)
368
- else:
369
- str_stack[-1] += c
370
- return -1
371
- bp = break_check(string)
372
-
373
- if bp == -1:
374
- pass
375
- elif bp == 0:
376
- node.string = string[:1]
377
- q = LinkedListNode(string[1:], False)
378
- q.next = node.next
379
- node.next = q
380
- else:
381
- node.string = string[:bp]
382
- q = LinkedListNode(string[bp:], False)
383
- q.next = node.next
384
- node.next = q
385
-
386
- node = node.next
387
- if node is None: break
388
-
389
- # 屏蔽空行和太短的句子
390
- node = root
391
- while True:
392
- if len(node.string.strip('\n').strip(''))==0: node.preserve = True
393
- if len(node.string.strip('\n').strip(''))<42: node.preserve = True
394
- node = node.next
395
- if node is None: break
396
- node = root
397
- while True:
398
- if node.next and node.preserve and node.next.preserve:
399
- node.string += node.next.string
400
- node.next = node.next.next
401
- node = node.next
402
- if node is None: break
403
-
404
- # 将前后断行符脱离
405
- node = root
406
- prev_node = None
407
- while True:
408
- if not node.preserve:
409
- lstriped_ = node.string.lstrip().lstrip('\n')
410
- if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
411
- prev_node.string += node.string[:-len(lstriped_)]
412
- node.string = lstriped_
413
- rstriped_ = node.string.rstrip().rstrip('\n')
414
- if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
415
- node.next.string = node.string[len(rstriped_):] + node.next.string
416
- node.string = rstriped_
417
- # =====
418
- prev_node = node
419
- node = node.next
420
- if node is None: break
421
- # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
422
- with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
423
- segment_parts_for_gpt = []
424
- nodes = []
425
- node = root
426
- while True:
427
- nodes.append(node)
428
- show_html = node.string.replace('\n','<br/>')
429
- if not node.preserve:
430
- segment_parts_for_gpt.append(node.string)
431
- f.write(f'<p style="color:black;">#{show_html}#</p>')
432
- else:
433
- f.write(f'<p style="color:red;">{show_html}</p>')
434
- node = node.next
435
- if node is None: break
436
-
437
- for n in nodes: n.next = None # break
438
- return_dict['nodes'] = nodes
439
- return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
440
- return return_dict
441
-
442
-
443
-
444
- class LatexPaperSplit():
445
- """
446
- break down latex file to a linked list,
447
- each node use a preserve flag to indicate whether it should
448
- be proccessed by GPT.
449
- """
450
- def __init__(self) -> None:
451
- self.nodes = None
452
- self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
453
- "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
454
- "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
455
- # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
456
- self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
457
-
458
- def merge_result(self, arr, mode, msg):
459
- """
460
- Merge the result after the GPT process completed
461
- """
462
- result_string = ""
463
- p = 0
464
- for node in self.nodes:
465
- if node.preserve:
466
- result_string += node.string
467
- else:
468
- result_string += fix_content(arr[p], node.string)
469
- p += 1
470
- if mode == 'translate_zh':
471
- pattern = re.compile(r'\\begin\{abstract\}.*\n')
472
- match = pattern.search(result_string)
473
- if not match:
474
- # match \abstract{xxxx}
475
- pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
476
- match = pattern_compile.search(result_string)
477
- position = match.regs[1][0]
478
- else:
479
- # match \begin{abstract}xxxx\end{abstract}
480
- position = match.end()
481
- result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
482
- return result_string
483
-
484
- def split(self, txt, project_folder, opts):
485
- """
486
- break down latex file to a linked list,
487
- each node use a preserve flag to indicate whether it should
488
- be proccessed by GPT.
489
- P.S. use multiprocessing to avoid timeout error
490
- """
491
- import multiprocessing
492
- manager = multiprocessing.Manager()
493
- return_dict = manager.dict()
494
- p = multiprocessing.Process(
495
- target=split_subprocess,
496
- args=(txt, project_folder, return_dict, opts))
497
- p.start()
498
- p.join()
499
- p.close()
500
- self.nodes = return_dict['nodes']
501
- self.sp = return_dict['segment_parts_for_gpt']
502
- return self.sp
503
-
504
-
505
-
506
- class LatexPaperFileGroup():
507
- """
508
- use tokenizer to break down text according to max_token_limit
509
- """
510
- def __init__(self):
511
- self.file_paths = []
512
- self.file_contents = []
513
- self.sp_file_contents = []
514
- self.sp_file_index = []
515
- self.sp_file_tag = []
516
-
517
- # count_token
518
- from request_llm.bridge_all import model_info
519
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
520
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
521
- self.get_token_num = get_token_num
522
-
523
- def run_file_split(self, max_token_limit=1900):
524
- """
525
- use tokenizer to break down text according to max_token_limit
526
- """
527
- for index, file_content in enumerate(self.file_contents):
528
- if self.get_token_num(file_content) < max_token_limit:
529
- self.sp_file_contents.append(file_content)
530
- self.sp_file_index.append(index)
531
- self.sp_file_tag.append(self.file_paths[index])
532
- else:
533
- from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
534
- segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
535
- for j, segment in enumerate(segments):
536
- self.sp_file_contents.append(segment)
537
- self.sp_file_index.append(index)
538
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
539
- print('Segmentation: done')
540
-
541
- def merge_result(self):
542
- self.file_result = ["" for _ in range(len(self.file_paths))]
543
- for r, k in zip(self.sp_file_result, self.sp_file_index):
544
- self.file_result[k] += r
545
-
546
- def write_result(self):
547
- manifest = []
548
- for path, res in zip(self.file_paths, self.file_result):
549
- with open(path + '.polish.tex', 'w', encoding='utf8') as f:
550
- manifest.append(path + '.polish.tex')
551
- f.write(res)
552
- return manifest
553
-
554
- def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
555
-
556
- # write html
557
- try:
558
- import shutil
559
- from .crazy_utils import construct_html
560
- from toolbox import gen_time_str
561
- ch = construct_html()
562
- orig = ""
563
- trans = ""
564
- final = []
565
- for c,r in zip(sp_file_contents, sp_file_result):
566
- final.append(c)
567
- final.append(r)
568
- for i, k in enumerate(final):
569
- if i%2==0:
570
- orig = k
571
- if i%2==1:
572
- trans = k
573
- ch.add_row(a=orig, b=trans)
574
- create_report_file_name = f"{gen_time_str()}.trans.html"
575
- ch.save_file(create_report_file_name)
576
- shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
577
- promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
578
- except:
579
- from toolbox import trimmed_format_exc
580
- print('writing html result failed:', trimmed_format_exc())
581
-
582
- def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
583
- import time, os, re
584
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
585
- from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
586
-
587
- # <-------- 寻找主tex文件 ---------->
588
- maintex = 寻找Latex主文件(file_manifest, mode)
589
- chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
590
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
591
- time.sleep(3)
592
-
593
- # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
594
- main_tex_basename = os.path.basename(maintex)
595
- assert main_tex_basename.endswith('.tex')
596
- main_tex_basename_bare = main_tex_basename[:-4]
597
- may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
598
- if os.path.exists(may_exist_bbl):
599
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
600
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
601
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
602
-
603
- with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
604
- content = f.read()
605
- merged_content = merge_tex_files(project_folder, content, mode)
606
-
607
- with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
608
- f.write(merged_content)
609
-
610
- # <-------- 精细切分latex文件 ---------->
611
- chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
612
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
613
- lps = LatexPaperSplit()
614
- res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
615
-
616
- # <-------- 拆分过长的latex片段 ---------->
617
- pfg = LatexPaperFileGroup()
618
- for index, r in enumerate(res):
619
- pfg.file_paths.append('segment-' + str(index))
620
- pfg.file_contents.append(r)
621
-
622
- pfg.run_file_split(max_token_limit=1024)
623
- n_split = len(pfg.sp_file_contents)
624
-
625
- # <-------- 根据需要切换prompt ---------->
626
- inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
627
- inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
628
-
629
- if os.path.exists(pj(project_folder,'temp.pkl')):
630
-
631
- # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ---------->
632
- pfg = objload(file=pj(project_folder,'temp.pkl'))
633
-
634
- else:
635
- # <-------- gpt 多线程请求 ---------->
636
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
637
- inputs_array=inputs_array,
638
- inputs_show_user_array=inputs_show_user_array,
639
- llm_kwargs=llm_kwargs,
640
- chatbot=chatbot,
641
- history_array=[[""] for _ in range(n_split)],
642
- sys_prompt_array=sys_prompt_array,
643
- # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
644
- scroller_max_len = 40
645
- )
646
-
647
- # <-------- 文本碎片重组为完整的tex片段 ---------->
648
- pfg.sp_file_result = []
649
- for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
650
- pfg.sp_file_result.append(gpt_say)
651
- pfg.merge_result()
652
-
653
- # <-------- 临时存储用于调试 ---------->
654
- pfg.get_token_num = None
655
- objdump(pfg, file=pj(project_folder,'temp.pkl'))
656
-
657
- write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
658
-
659
- # <-------- 写出文件 ---------->
660
- msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
661
- final_tex = lps.merge_result(pfg.file_result, mode, msg)
662
- with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
663
- if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
664
-
665
-
666
- # <-------- 整理结果, 退出 ---------->
667
- chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
668
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
669
-
670
- # <-------- 返回 ---------->
671
- return project_folder + f'/merge_{mode}.tex'
672
-
673
-
674
-
675
- def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
676
- try:
677
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
678
- log = f.read()
679
- with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
680
- file_lines = f.readlines()
681
- import re
682
- buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
683
- buggy_lines = [int(l) for l in buggy_lines]
684
- buggy_lines = sorted(buggy_lines)
685
- print("removing lines that has errors", buggy_lines)
686
- file_lines.pop(buggy_lines[0]-1)
687
- with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
688
- f.writelines(file_lines)
689
- return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
690
- except:
691
- print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
692
- return False, -1, [-1]
693
-
694
- def compile_latex_with_timeout(command, cwd, timeout=60):
695
- import subprocess
696
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
697
- try:
698
- stdout, stderr = process.communicate(timeout=timeout)
699
- except subprocess.TimeoutExpired:
700
- process.kill()
701
- stdout, stderr = process.communicate()
702
- print("Process timed out!")
703
- return False
704
- return True
705
-
706
- def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
707
- import os, time
708
- current_dir = os.getcwd()
709
- n_fix = 1
710
- max_try = 32
711
- chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
712
- chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
713
- yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
714
-
715
- while True:
716
- import os
717
-
718
- # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
719
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
720
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
721
-
722
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
723
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
724
-
725
- if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
726
- # 只有第二步成功,才能继续下面的步骤
727
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
728
- if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
729
- ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
730
- if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
731
- ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
732
-
733
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
734
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
735
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
736
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
737
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
738
-
739
- if mode!='translate_zh':
740
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
741
- print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
742
- ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
743
-
744
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
745
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
746
- ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
747
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
748
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
749
-
750
-
751
- # <---------- 检查结果 ----------->
752
- results_ = ""
753
- original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
754
- modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
755
- diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
756
- results_ += f"原始PDF编译是否成功: {original_pdf_success};"
757
- results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
758
- results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
759
- yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
760
-
761
- if diff_pdf_success:
762
- result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
763
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
764
- if modified_pdf_success:
765
- yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
766
- result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
767
- if os.path.exists(pj(work_folder, '..', 'translation')):
768
- shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
769
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
770
- return True # 成功啦
771
- else:
772
- if n_fix>=max_try: break
773
- n_fix += 1
774
- can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
775
- file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
776
- log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
777
- tex_name=f'{main_file_modified}.tex',
778
- tex_name_pure=f'{main_file_modified}',
779
- n_fix=n_fix,
780
- work_folder_modified=work_folder_modified,
781
- )
782
- yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
783
- if not can_retry: break
784
-
785
- return False # 失败啦
786
-
787
-
788
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/live_audio/aliyunASR.py DELETED
@@ -1,261 +0,0 @@
1
- import time, logging, json, sys, struct
2
- import numpy as np
3
- from scipy.io.wavfile import WAVE_FORMAT
4
-
5
- def write_numpy_to_wave(filename, rate, data, add_header=False):
6
- """
7
- Write a NumPy array as a WAV file.
8
- """
9
- def _array_tofile(fid, data):
10
- # ravel gives a c-contiguous buffer
11
- fid.write(data.ravel().view('b').data)
12
-
13
- if hasattr(filename, 'write'):
14
- fid = filename
15
- else:
16
- fid = open(filename, 'wb')
17
-
18
- fs = rate
19
-
20
- try:
21
- dkind = data.dtype.kind
22
- if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
23
- data.dtype.itemsize == 1)):
24
- raise ValueError("Unsupported data type '%s'" % data.dtype)
25
-
26
- header_data = b''
27
-
28
- header_data += b'RIFF'
29
- header_data += b'\x00\x00\x00\x00'
30
- header_data += b'WAVE'
31
-
32
- # fmt chunk
33
- header_data += b'fmt '
34
- if dkind == 'f':
35
- format_tag = WAVE_FORMAT.IEEE_FLOAT
36
- else:
37
- format_tag = WAVE_FORMAT.PCM
38
- if data.ndim == 1:
39
- channels = 1
40
- else:
41
- channels = data.shape[1]
42
- bit_depth = data.dtype.itemsize * 8
43
- bytes_per_second = fs*(bit_depth // 8)*channels
44
- block_align = channels * (bit_depth // 8)
45
-
46
- fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
47
- bytes_per_second, block_align, bit_depth)
48
- if not (dkind == 'i' or dkind == 'u'):
49
- # add cbSize field for non-PCM files
50
- fmt_chunk_data += b'\x00\x00'
51
-
52
- header_data += struct.pack('<I', len(fmt_chunk_data))
53
- header_data += fmt_chunk_data
54
-
55
- # fact chunk (non-PCM files)
56
- if not (dkind == 'i' or dkind == 'u'):
57
- header_data += b'fact'
58
- header_data += struct.pack('<II', 4, data.shape[0])
59
-
60
- # check data size (needs to be immediately before the data chunk)
61
- if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
62
- raise ValueError("Data exceeds wave file size limit")
63
- if add_header:
64
- fid.write(header_data)
65
- # data chunk
66
- fid.write(b'data')
67
- fid.write(struct.pack('<I', data.nbytes))
68
- if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
69
- sys.byteorder == 'big'):
70
- data = data.byteswap()
71
- _array_tofile(fid, data)
72
-
73
- if add_header:
74
- # Determine file size and place it in correct
75
- # position at start of the file.
76
- size = fid.tell()
77
- fid.seek(4)
78
- fid.write(struct.pack('<I', size-8))
79
-
80
- finally:
81
- if not hasattr(filename, 'write'):
82
- fid.close()
83
- else:
84
- fid.seek(0)
85
-
86
- def is_speaker_speaking(vad, data, sample_rate):
87
- # Function to detect if the speaker is speaking
88
- # The WebRTC VAD only accepts 16-bit mono PCM audio,
89
- # sampled at 8000, 16000, 32000 or 48000 Hz.
90
- # A frame must be either 10, 20, or 30 ms in duration:
91
- frame_duration = 30
92
- n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
93
- res_list = []
94
- for t in range(len(data)):
95
- if t!=0 and t % n_bit_each == 0:
96
- res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
97
-
98
- info = ''.join(['^' if r else '.' for r in res_list])
99
- info = info[:10]
100
- if any(res_list):
101
- return True, info
102
- else:
103
- return False, info
104
-
105
-
106
- class AliyunASR():
107
-
108
- def test_on_sentence_begin(self, message, *args):
109
- # print("test_on_sentence_begin:{}".format(message))
110
- pass
111
-
112
- def test_on_sentence_end(self, message, *args):
113
- # print("test_on_sentence_end:{}".format(message))
114
- message = json.loads(message)
115
- self.parsed_sentence = message['payload']['result']
116
- self.event_on_entence_end.set()
117
- # print(self.parsed_sentence)
118
-
119
- def test_on_start(self, message, *args):
120
- # print("test_on_start:{}".format(message))
121
- pass
122
-
123
- def test_on_error(self, message, *args):
124
- logging.error("on_error args=>{}".format(args))
125
- pass
126
-
127
- def test_on_close(self, *args):
128
- self.aliyun_service_ok = False
129
- pass
130
-
131
- def test_on_result_chg(self, message, *args):
132
- # print("test_on_chg:{}".format(message))
133
- message = json.loads(message)
134
- self.parsed_text = message['payload']['result']
135
- self.event_on_result_chg.set()
136
-
137
- def test_on_completed(self, message, *args):
138
- # print("on_completed:args=>{} message=>{}".format(args, message))
139
- pass
140
-
141
- def audio_convertion_thread(self, uuid):
142
- # 在一个异步线程中采集音频
143
- import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
144
- import tempfile
145
- from scipy import io
146
- from toolbox import get_conf
147
- from .audio_io import change_sample_rate
148
- from .audio_io import RealtimeAudioDistribution
149
- NEW_SAMPLERATE = 16000
150
- rad = RealtimeAudioDistribution()
151
- rad.clean_up()
152
- temp_folder = tempfile.gettempdir()
153
- TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
154
- if len(TOKEN) == 0:
155
- TOKEN = self.get_token()
156
- self.aliyun_service_ok = True
157
- URL="wss://nls-gateway.aliyuncs.com/ws/v1"
158
- sr = nls.NlsSpeechTranscriber(
159
- url=URL,
160
- token=TOKEN,
161
- appkey=APPKEY,
162
- on_sentence_begin=self.test_on_sentence_begin,
163
- on_sentence_end=self.test_on_sentence_end,
164
- on_start=self.test_on_start,
165
- on_result_changed=self.test_on_result_chg,
166
- on_completed=self.test_on_completed,
167
- on_error=self.test_on_error,
168
- on_close=self.test_on_close,
169
- callback_args=[uuid.hex]
170
- )
171
- timeout_limit_second = 20
172
- r = sr.start(aformat="pcm",
173
- timeout=timeout_limit_second,
174
- enable_intermediate_result=True,
175
- enable_punctuation_prediction=True,
176
- enable_inverse_text_normalization=True)
177
-
178
- import webrtcvad
179
- vad = webrtcvad.Vad()
180
- vad.set_mode(1)
181
-
182
- is_previous_frame_transmitted = False # 上一帧是否有人说话
183
- previous_frame_data = None
184
- echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
185
- echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
186
- keep_alive_last_send_time = time.time()
187
- while not self.stop:
188
- # time.sleep(self.capture_interval)
189
- audio = rad.read(uuid.hex)
190
- if audio is not None:
191
- # convert to pcm file
192
- temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
193
- dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
194
- write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
195
- # read pcm binary
196
- with open(temp_file, "rb") as f: data = f.read()
197
- is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
198
-
199
- if is_speaking or echo_cnt > 0:
200
- # 如果话筒激活 / 如果处于回声收尾阶段
201
- echo_cnt -= 1
202
- if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
203
- if previous_frame_data is not None: data = previous_frame_data + data
204
- if is_speaking:
205
- echo_cnt = echo_cnt_max
206
- slices = zip(*(iter(data),) * 640) # 640个字节为一组
207
- for i in slices: sr.send_audio(bytes(i))
208
- keep_alive_last_send_time = time.time()
209
- is_previous_frame_transmitted = True
210
- else:
211
- is_previous_frame_transmitted = False
212
- echo_cnt = 0
213
- # 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
214
- if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
215
- slices = zip(*(iter(data),) * 640) # 640个字节为一组
216
- for i in slices: sr.send_audio(bytes(i))
217
- keep_alive_last_send_time = time.time()
218
- is_previous_frame_transmitted = True
219
- self.audio_shape = info
220
- else:
221
- time.sleep(0.1)
222
-
223
- if not self.aliyun_service_ok:
224
- self.stop = True
225
- self.stop_msg = 'Aliyun音频服务异常,请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
226
- r = sr.stop()
227
-
228
- def get_token(self):
229
- from toolbox import get_conf
230
- import json
231
- from aliyunsdkcore.request import CommonRequest
232
- from aliyunsdkcore.client import AcsClient
233
- AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
234
-
235
- # 创建AcsClient实例
236
- client = AcsClient(
237
- AccessKey_ID,
238
- AccessKey_secret,
239
- "cn-shanghai"
240
- )
241
-
242
- # 创建request,并设置参数。
243
- request = CommonRequest()
244
- request.set_method('POST')
245
- request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
246
- request.set_version('2019-02-28')
247
- request.set_action_name('CreateToken')
248
-
249
- try:
250
- response = client.do_action_with_exception(request)
251
- print(response)
252
- jss = json.loads(response)
253
- if 'Token' in jss and 'Id' in jss['Token']:
254
- token = jss['Token']['Id']
255
- expireTime = jss['Token']['ExpireTime']
256
- print("token = " + token)
257
- print("expireTime = " + str(expireTime))
258
- except Exception as e:
259
- print(e)
260
-
261
- return token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/live_audio/audio_io.py DELETED
@@ -1,51 +0,0 @@
1
- import numpy as np
2
- from scipy import interpolate
3
-
4
- def Singleton(cls):
5
- _instance = {}
6
-
7
- def _singleton(*args, **kargs):
8
- if cls not in _instance:
9
- _instance[cls] = cls(*args, **kargs)
10
- return _instance[cls]
11
-
12
- return _singleton
13
-
14
-
15
- @Singleton
16
- class RealtimeAudioDistribution():
17
- def __init__(self) -> None:
18
- self.data = {}
19
- self.max_len = 1024*1024
20
- self.rate = 48000 # 只读,每秒采样数量
21
-
22
- def clean_up(self):
23
- self.data = {}
24
-
25
- def feed(self, uuid, audio):
26
- self.rate, audio_ = audio
27
- # print('feed', len(audio_), audio_[-25:])
28
- if uuid not in self.data:
29
- self.data[uuid] = audio_
30
- else:
31
- new_arr = np.concatenate((self.data[uuid], audio_))
32
- if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
33
- self.data[uuid] = new_arr
34
-
35
- def read(self, uuid):
36
- if uuid in self.data:
37
- res = self.data.pop(uuid)
38
- # print('\r read-', len(res), '-', max(res), end='', flush=True)
39
- else:
40
- res = None
41
- return res
42
-
43
- def change_sample_rate(audio, old_sr, new_sr):
44
- duration = audio.shape[0] / old_sr
45
-
46
- time_old = np.linspace(0, duration, audio.shape[0])
47
- time_new = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
48
-
49
- interpolator = interpolate.interp1d(time_old, audio.T)
50
- new_audio = interpolator(time_new).T
51
- return new_audio.astype(np.int16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/multi_stage/multi_stage_utils.py DELETED
@@ -1,93 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from typing import List
3
- from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
- from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
5
- from request_llms.bridge_all import predict_no_ui_long_connection
6
- from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
7
- import time
8
- import pickle
9
-
10
- def have_any_recent_upload_files(chatbot):
11
- _5min = 5 * 60
12
- if not chatbot: return False # chatbot is None
13
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
14
- if not most_recent_uploaded: return False # most_recent_uploaded is None
15
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
16
- else: return False # most_recent_uploaded is too old
17
-
18
- class GptAcademicState():
19
- def __init__(self):
20
- self.reset()
21
-
22
- def reset(self):
23
- pass
24
-
25
- def dump_state(self, chatbot):
26
- chatbot._cookies['plugin_state'] = pickle.dumps(self)
27
-
28
- def set_state(self, chatbot, key, value):
29
- setattr(self, key, value)
30
- chatbot._cookies['plugin_state'] = pickle.dumps(self)
31
-
32
- def get_state(chatbot, cls=None):
33
- state = chatbot._cookies.get('plugin_state', None)
34
- if state is not None: state = pickle.loads(state)
35
- elif cls is not None: state = cls()
36
- else: state = GptAcademicState()
37
- state.chatbot = chatbot
38
- return state
39
-
40
-
41
- class GptAcademicGameBaseState():
42
- """
43
- 1. first init: __init__ ->
44
- """
45
- def init_game(self, chatbot, lock_plugin):
46
- self.plugin_name = None
47
- self.callback_fn = None
48
- self.delete_game = False
49
- self.step_cnt = 0
50
-
51
- def lock_plugin(self, chatbot):
52
- if self.callback_fn is None:
53
- raise ValueError("callback_fn is None")
54
- chatbot._cookies['lock_plugin'] = self.callback_fn
55
- self.dump_state(chatbot)
56
-
57
- def get_plugin_name(self):
58
- if self.plugin_name is None:
59
- raise ValueError("plugin_name is None")
60
- return self.plugin_name
61
-
62
- def dump_state(self, chatbot):
63
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
64
-
65
- def set_state(self, chatbot, key, value):
66
- setattr(self, key, value)
67
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
68
-
69
- @staticmethod
70
- def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
71
- state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
72
- if state is not None:
73
- state = pickle.loads(state)
74
- else:
75
- state = cls()
76
- state.init_game(chatbot, lock_plugin)
77
- state.plugin_name = plugin_name
78
- state.llm_kwargs = llm_kwargs
79
- state.chatbot = chatbot
80
- state.callback_fn = callback_fn
81
- return state
82
-
83
- def continue_game(self, prompt, chatbot, history):
84
- # 游戏主体
85
- yield from self.step(prompt, chatbot, history)
86
- self.step_cnt += 1
87
- # 保存状态,收尾
88
- self.dump_state(chatbot)
89
- # 如果游戏结束,清理
90
- if self.delete_game:
91
- chatbot._cookies['lock_plugin'] = None
92
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
93
- yield from update_ui(chatbot=chatbot, history=history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/breakdown_txt.py DELETED
@@ -1,125 +0,0 @@
1
- from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
2
-
3
- def force_breakdown(txt, limit, get_token_fn):
4
- """ 当无法用标点、空行分割时,我们用最暴力的方法切割
5
- """
6
- for i in reversed(range(len(txt))):
7
- if get_token_fn(txt[:i]) < limit:
8
- return txt[:i], txt[i:]
9
- return "Tiktoken未知错误", "Tiktoken未知错误"
10
-
11
-
12
- def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
13
- """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
14
- 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
15
- """
16
- _min = int(5e4)
17
- _max = int(1e5)
18
- # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
19
- if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
20
- remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
21
- remain_txt_to_cut_storage = ""
22
- if len(remain_txt_to_cut) > _max:
23
- remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
24
- remain_txt_to_cut = remain_txt_to_cut[:_max]
25
- return remain_txt_to_cut, remain_txt_to_cut_storage
26
-
27
-
28
- def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
29
- """ 文本切分
30
- """
31
- res = []
32
- total_len = len(txt_tocut)
33
- fin_len = 0
34
- remain_txt_to_cut = txt_tocut
35
- remain_txt_to_cut_storage = ""
36
- # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
37
- remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
38
-
39
- while True:
40
- if get_token_fn(remain_txt_to_cut) <= limit:
41
- # 如果剩余文本的token数小于限制,那么就不用切了
42
- res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
43
- break
44
- else:
45
- # 如果剩余文本的token数大于限制,那么就切
46
- lines = remain_txt_to_cut.split('\n')
47
-
48
- # 估计一个切分点
49
- estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
50
- estimated_line_cut = int(estimated_line_cut)
51
-
52
- # 开始查找合适切分点的偏移(cnt)
53
- cnt = 0
54
- for cnt in reversed(range(estimated_line_cut)):
55
- if must_break_at_empty_line:
56
- # 首先尝试用双空行(\n\n)作为切分点
57
- if lines[cnt] != "":
58
- continue
59
- prev = "\n".join(lines[:cnt])
60
- post = "\n".join(lines[cnt:])
61
- if get_token_fn(prev) < limit:
62
- break
63
-
64
- if cnt == 0:
65
- # 如果没有找到合适的切分点
66
- if break_anyway:
67
- # 是否允许暴力切分
68
- prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
69
- else:
70
- # 不允许直接报错
71
- raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
72
-
73
- # 追加列表
74
- res.append(prev); fin_len+=len(prev)
75
- # 准备下一次迭代
76
- remain_txt_to_cut = post
77
- remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
78
- process = fin_len/total_len
79
- print(f'正在文本切分 {int(process*100)}%')
80
- if len(remain_txt_to_cut.strip()) == 0:
81
- break
82
- return res
83
-
84
-
85
- def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
86
- """ 使用多种方式尝试切分文本,以满足 token 限制
87
- """
88
- from request_llms.bridge_all import model_info
89
- enc = model_info[llm_model]['tokenizer']
90
- def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
91
- try:
92
- # 第1次尝试,将双空行(\n\n)作为切分点
93
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
94
- except RuntimeError:
95
- try:
96
- # 第2次尝试,将单空行(\n)作为切分点
97
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
98
- except RuntimeError:
99
- try:
100
- # 第3次尝试,将英文句号(.)作为切分点
101
- res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
102
- return [r.replace('。\n', '.') for r in res]
103
- except RuntimeError as e:
104
- try:
105
- # 第4次尝试,将中文句号(。)作为切分点
106
- res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
107
- return [r.replace('。。\n', '。') for r in res]
108
- except RuntimeError as e:
109
- # 第5次尝试,没办法了,随便切一下吧
110
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
111
-
112
- breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
113
-
114
- if __name__ == '__main__':
115
- from crazy_functions.crazy_utils import read_and_clean_pdf_text
116
- file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
117
-
118
- from request_llms.bridge_all import model_info
119
- for i in range(5):
120
- file_content += file_content
121
-
122
- print(len(file_content))
123
- TOKEN_LIMIT_PER_FRAGMENT = 2500
124
- res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
125
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/parse_pdf.py DELETED
@@ -1,171 +0,0 @@
1
- from functools import lru_cache
2
- from toolbox import gen_time_str
3
- from toolbox import promote_file_to_downloadzone
4
- from toolbox import write_history_to_file, promote_file_to_downloadzone
5
- from toolbox import get_conf
6
- from toolbox import ProxyNetworkActivate
7
- from colorful import *
8
- import requests
9
- import random
10
- import copy
11
- import os
12
- import math
13
-
14
- class GROBID_OFFLINE_EXCEPTION(Exception): pass
15
-
16
- def get_avail_grobid_url():
17
- GROBID_URLS = get_conf('GROBID_URLS')
18
- if len(GROBID_URLS) == 0: return None
19
- try:
20
- _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
21
- if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
22
- with ProxyNetworkActivate('Connect_Grobid'):
23
- res = requests.get(_grobid_url+'/api/isalive')
24
- if res.text=='true': return _grobid_url
25
- else: return None
26
- except:
27
- return None
28
-
29
- @lru_cache(maxsize=32)
30
- def parse_pdf(pdf_path, grobid_url):
31
- import scipdf # pip install scipdf_parser
32
- if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
33
- try:
34
- with ProxyNetworkActivate('Connect_Grobid'):
35
- article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
36
- except GROBID_OFFLINE_EXCEPTION:
37
- raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
38
- except:
39
- raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
40
- return article_dict
41
-
42
-
43
- def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
44
- # -=-=-=-=-=-=-=-= 写出第1个文件:翻译前后混合 -=-=-=-=-=-=-=-=
45
- res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
46
- promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
47
- generated_conclusion_files.append(res_path)
48
-
49
- # -=-=-=-=-=-=-=-= 写出第2个文件:仅翻译后的文本 -=-=-=-=-=-=-=-=
50
- translated_res_array = []
51
- # 记录当前的大章节标题:
52
- last_section_name = ""
53
- for index, value in enumerate(gpt_response_collection):
54
- # 先挑选偶数序列号:
55
- if index % 2 != 0:
56
- # 先提取当前英文标题:
57
- cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
58
- # 如果index是1的话,则直接使用first section name:
59
- if cur_section_name != last_section_name:
60
- cur_value = cur_section_name + '\n'
61
- last_section_name = copy.deepcopy(cur_section_name)
62
- else:
63
- cur_value = ""
64
- # 再做一个小修改:重新修改当前part的标题,默认用英文的
65
- cur_value += value
66
- translated_res_array.append(cur_value)
67
- res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + translated_res_array,
68
- file_basename = f"{gen_time_str()}-translated_only.md",
69
- file_fullname = None,
70
- auto_caption = False)
71
- promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
72
- generated_conclusion_files.append(res_path)
73
- return res_path
74
-
75
- def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
76
- from crazy_functions.pdf_fns.report_gen_html import construct_html
77
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
78
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
79
- from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
80
-
81
- prompt = "以下是一篇学术论文的基本信息:\n"
82
- # title
83
- title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
84
- # authors
85
- authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
86
- # abstract
87
- abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
88
- # command
89
- prompt += f"请将题目和摘要翻译为{DST_LANG}。"
90
- meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
91
-
92
- # 单线,获取文章meta信息
93
- paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
94
- inputs=prompt,
95
- inputs_show_user=prompt,
96
- llm_kwargs=llm_kwargs,
97
- chatbot=chatbot, history=[],
98
- sys_prompt="You are an academic paper reader。",
99
- )
100
-
101
- # 多线,翻译
102
- inputs_array = []
103
- inputs_show_user_array = []
104
-
105
- # get_token_num
106
- from request_llms.bridge_all import model_info
107
- enc = model_info[llm_kwargs['llm_model']]['tokenizer']
108
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
109
-
110
- def break_down(txt):
111
- raw_token_num = get_token_num(txt)
112
- if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
113
- return [txt]
114
- else:
115
- # raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
116
- # find a smooth token limit to achieve even seperation
117
- count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
118
- token_limit_smooth = raw_token_num // count + count
119
- return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
120
-
121
- for section in article_dict.get('sections'):
122
- if len(section['text']) == 0: continue
123
- section_frags = break_down(section['text'])
124
- for i, fragment in enumerate(section_frags):
125
- heading = section['heading']
126
- if len(section_frags) > 1: heading += f' Part-{i+1}'
127
- inputs_array.append(
128
- f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
129
- )
130
- inputs_show_user_array.append(
131
- f"# {heading}\n\n{fragment}"
132
- )
133
-
134
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
135
- inputs_array=inputs_array,
136
- inputs_show_user_array=inputs_show_user_array,
137
- llm_kwargs=llm_kwargs,
138
- chatbot=chatbot,
139
- history_array=[meta for _ in inputs_array],
140
- sys_prompt_array=[
141
- "请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
142
- )
143
- # -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
144
- produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
145
-
146
- # -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
147
- ch = construct_html()
148
- orig = ""
149
- trans = ""
150
- gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
151
- for i,k in enumerate(gpt_response_collection_html):
152
- if i%2==0:
153
- gpt_response_collection_html[i] = inputs_show_user_array[i//2]
154
- else:
155
- # 先提取当前英文标题:
156
- cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
157
- cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
158
- gpt_response_collection_html[i] = cur_value
159
-
160
- final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
161
- final.extend(gpt_response_collection_html)
162
- for i, k in enumerate(final):
163
- if i%2==0:
164
- orig = k
165
- if i%2==1:
166
- trans = k
167
- ch.add_row(a=orig, b=trans)
168
- create_report_file_name = f"{os.path.basename(fp)}.trans.html"
169
- html_file = ch.save_file(create_report_file_name)
170
- generated_conclusion_files.append(html_file)
171
- promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/report_gen_html.py DELETED
@@ -1,58 +0,0 @@
1
- from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
2
- import os
3
-
4
-
5
-
6
-
7
- class construct_html():
8
- def __init__(self) -> None:
9
- self.html_string = ""
10
-
11
- def add_row(self, a, b):
12
- from toolbox import markdown_convertion
13
- template = """
14
- {
15
- primary_col: {
16
- header: String.raw`__PRIMARY_HEADER__`,
17
- msg: String.raw`__PRIMARY_MSG__`,
18
- },
19
- secondary_rol: {
20
- header: String.raw`__SECONDARY_HEADER__`,
21
- msg: String.raw`__SECONDARY_MSG__`,
22
- }
23
- },
24
- """
25
- def std(str):
26
- str = str.replace(r'`',r'&#96;')
27
- if str.endswith("\\"): str += ' '
28
- if str.endswith("}"): str += ' '
29
- if str.endswith("$"): str += ' '
30
- return str
31
-
32
- template_ = template
33
- a_lines = a.split('\n')
34
- b_lines = b.split('\n')
35
-
36
- if len(a_lines) == 1 or len(a_lines[0]) > 50:
37
- template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
38
- template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
39
- else:
40
- template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
41
- template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
42
-
43
- if len(b_lines) == 1 or len(b_lines[0]) > 50:
44
- template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
45
- template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
46
- else:
47
- template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
48
- template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
49
- self.html_string += template_
50
-
51
- def save_file(self, file_name):
52
- from toolbox import get_log_folder
53
- with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
54
- html_template = f.read()
55
- html_template = html_template.replace("__TF_ARR__", self.html_string)
56
- with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
57
- f.write(html_template.encode('utf-8', 'ignore').decode())
58
- return os.path.join(get_log_folder(), file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/report_template.html DELETED
The diff for this file is too large to render. See raw diff
 
crazy_functions/test_project/cpp/cppipc/buffer.cpp DELETED
@@ -1,87 +0,0 @@
1
- #include "libipc/buffer.h"
2
- #include "libipc/utility/pimpl.h"
3
-
4
- #include <cstring>
5
-
6
- namespace ipc {
7
-
8
- bool operator==(buffer const & b1, buffer const & b2) {
9
- return (b1.size() == b2.size()) && (std::memcmp(b1.data(), b2.data(), b1.size()) == 0);
10
- }
11
-
12
- bool operator!=(buffer const & b1, buffer const & b2) {
13
- return !(b1 == b2);
14
- }
15
-
16
- class buffer::buffer_ : public pimpl<buffer_> {
17
- public:
18
- void* p_;
19
- std::size_t s_;
20
- void* a_;
21
- buffer::destructor_t d_;
22
-
23
- buffer_(void* p, std::size_t s, buffer::destructor_t d, void* a)
24
- : p_(p), s_(s), a_(a), d_(d) {
25
- }
26
-
27
- ~buffer_() {
28
- if (d_ == nullptr) return;
29
- d_((a_ == nullptr) ? p_ : a_, s_);
30
- }
31
- };
32
-
33
- buffer::buffer()
34
- : buffer(nullptr, 0, nullptr, nullptr) {
35
- }
36
-
37
- buffer::buffer(void* p, std::size_t s, destructor_t d)
38
- : p_(p_->make(p, s, d, nullptr)) {
39
- }
40
-
41
- buffer::buffer(void* p, std::size_t s, destructor_t d, void* additional)
42
- : p_(p_->make(p, s, d, additional)) {
43
- }
44
-
45
- buffer::buffer(void* p, std::size_t s)
46
- : buffer(p, s, nullptr) {
47
- }
48
-
49
- buffer::buffer(char const & c)
50
- : buffer(const_cast<char*>(&c), 1) {
51
- }
52
-
53
- buffer::buffer(buffer&& rhs)
54
- : buffer() {
55
- swap(rhs);
56
- }
57
-
58
- buffer::~buffer() {
59
- p_->clear();
60
- }
61
-
62
- void buffer::swap(buffer& rhs) {
63
- std::swap(p_, rhs.p_);
64
- }
65
-
66
- buffer& buffer::operator=(buffer rhs) {
67
- swap(rhs);
68
- return *this;
69
- }
70
-
71
- bool buffer::empty() const noexcept {
72
- return (impl(p_)->p_ == nullptr) || (impl(p_)->s_ == 0);
73
- }
74
-
75
- void* buffer::data() noexcept {
76
- return impl(p_)->p_;
77
- }
78
-
79
- void const * buffer::data() const noexcept {
80
- return impl(p_)->p_;
81
- }
82
-
83
- std::size_t buffer::size() const noexcept {
84
- return impl(p_)->s_;
85
- }
86
-
87
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/ipc.cpp DELETED
@@ -1,701 +0,0 @@
1
-
2
- #include <type_traits>
3
- #include <cstring>
4
- #include <algorithm>
5
- #include <utility> // std::pair, std::move, std::forward
6
- #include <atomic>
7
- #include <type_traits> // aligned_storage_t
8
- #include <string>
9
- #include <vector>
10
- #include <array>
11
- #include <cassert>
12
-
13
- #include "libipc/ipc.h"
14
- #include "libipc/def.h"
15
- #include "libipc/shm.h"
16
- #include "libipc/pool_alloc.h"
17
- #include "libipc/queue.h"
18
- #include "libipc/policy.h"
19
- #include "libipc/rw_lock.h"
20
- #include "libipc/waiter.h"
21
-
22
- #include "libipc/utility/log.h"
23
- #include "libipc/utility/id_pool.h"
24
- #include "libipc/utility/scope_guard.h"
25
- #include "libipc/utility/utility.h"
26
-
27
- #include "libipc/memory/resource.h"
28
- #include "libipc/platform/detail.h"
29
- #include "libipc/circ/elem_array.h"
30
-
31
- namespace {
32
-
33
- using msg_id_t = std::uint32_t;
34
- using acc_t = std::atomic<msg_id_t>;
35
-
36
- template <std::size_t DataSize, std::size_t AlignSize>
37
- struct msg_t;
38
-
39
- template <std::size_t AlignSize>
40
- struct msg_t<0, AlignSize> {
41
- msg_id_t cc_id_;
42
- msg_id_t id_;
43
- std::int32_t remain_;
44
- bool storage_;
45
- };
46
-
47
- template <std::size_t DataSize, std::size_t AlignSize>
48
- struct msg_t : msg_t<0, AlignSize> {
49
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
50
-
51
- msg_t() = default;
52
- msg_t(msg_id_t cc_id, msg_id_t id, std::int32_t remain, void const * data, std::size_t size)
53
- : msg_t<0, AlignSize> {cc_id, id, remain, (data == nullptr) || (size == 0)} {
54
- if (this->storage_) {
55
- if (data != nullptr) {
56
- // copy storage-id
57
- *reinterpret_cast<ipc::storage_id_t*>(&data_) =
58
- *static_cast<ipc::storage_id_t const *>(data);
59
- }
60
- }
61
- else std::memcpy(&data_, data, size);
62
- }
63
- };
64
-
65
- template <typename T>
66
- ipc::buff_t make_cache(T& data, std::size_t size) {
67
- auto ptr = ipc::mem::alloc(size);
68
- std::memcpy(ptr, &data, (ipc::detail::min)(sizeof(data), size));
69
- return { ptr, size, ipc::mem::free };
70
- }
71
-
72
- struct cache_t {
73
- std::size_t fill_;
74
- ipc::buff_t buff_;
75
-
76
- cache_t(std::size_t f, ipc::buff_t && b)
77
- : fill_(f), buff_(std::move(b))
78
- {}
79
-
80
- void append(void const * data, std::size_t size) {
81
- if (fill_ >= buff_.size() || data == nullptr || size == 0) return;
82
- auto new_fill = (ipc::detail::min)(fill_ + size, buff_.size());
83
- std::memcpy(static_cast<ipc::byte_t*>(buff_.data()) + fill_, data, new_fill - fill_);
84
- fill_ = new_fill;
85
- }
86
- };
87
-
88
- auto cc_acc() {
89
- static ipc::shm::handle acc_h("__CA_CONN__", sizeof(acc_t));
90
- return static_cast<acc_t*>(acc_h.get());
91
- }
92
-
93
- IPC_CONSTEXPR_ std::size_t align_chunk_size(std::size_t size) noexcept {
94
- return (((size - 1) / ipc::large_msg_align) + 1) * ipc::large_msg_align;
95
- }
96
-
97
- IPC_CONSTEXPR_ std::size_t calc_chunk_size(std::size_t size) noexcept {
98
- return ipc::make_align(alignof(std::max_align_t), align_chunk_size(
99
- ipc::make_align(alignof(std::max_align_t), sizeof(std::atomic<ipc::circ::cc_t>)) + size));
100
- }
101
-
102
- struct chunk_t {
103
- std::atomic<ipc::circ::cc_t> &conns() noexcept {
104
- return *reinterpret_cast<std::atomic<ipc::circ::cc_t> *>(this);
105
- }
106
-
107
- void *data() noexcept {
108
- return reinterpret_cast<ipc::byte_t *>(this)
109
- + ipc::make_align(alignof(std::max_align_t), sizeof(std::atomic<ipc::circ::cc_t>));
110
- }
111
- };
112
-
113
- struct chunk_info_t {
114
- ipc::id_pool<> pool_;
115
- ipc::spin_lock lock_;
116
-
117
- IPC_CONSTEXPR_ static std::size_t chunks_mem_size(std::size_t chunk_size) noexcept {
118
- return ipc::id_pool<>::max_count * chunk_size;
119
- }
120
-
121
- ipc::byte_t *chunks_mem() noexcept {
122
- return reinterpret_cast<ipc::byte_t *>(this + 1);
123
- }
124
-
125
- chunk_t *at(std::size_t chunk_size, ipc::storage_id_t id) noexcept {
126
- if (id < 0) return nullptr;
127
- return reinterpret_cast<chunk_t *>(chunks_mem() + (chunk_size * id));
128
- }
129
- };
130
-
131
- auto& chunk_storages() {
132
- class chunk_handle_t {
133
- ipc::shm::handle handle_;
134
-
135
- public:
136
- chunk_info_t *get_info(std::size_t chunk_size) {
137
- if (!handle_.valid() &&
138
- !handle_.acquire( ("__CHUNK_INFO__" + ipc::to_string(chunk_size)).c_str(),
139
- sizeof(chunk_info_t) + chunk_info_t::chunks_mem_size(chunk_size) )) {
140
- ipc::error("[chunk_storages] chunk_shm.id_info_.acquire failed: chunk_size = %zd\n", chunk_size);
141
- return nullptr;
142
- }
143
- auto info = static_cast<chunk_info_t*>(handle_.get());
144
- if (info == nullptr) {
145
- ipc::error("[chunk_storages] chunk_shm.id_info_.get failed: chunk_size = %zd\n", chunk_size);
146
- return nullptr;
147
- }
148
- return info;
149
- }
150
- };
151
- static ipc::map<std::size_t, chunk_handle_t> chunk_hs;
152
- return chunk_hs;
153
- }
154
-
155
- chunk_info_t *chunk_storage_info(std::size_t chunk_size) {
156
- auto &storages = chunk_storages();
157
- std::decay_t<decltype(storages)>::iterator it;
158
- {
159
- static ipc::rw_lock lock;
160
- IPC_UNUSED_ std::shared_lock<ipc::rw_lock> guard {lock};
161
- if ((it = storages.find(chunk_size)) == storages.end()) {
162
- using chunk_handle_t = std::decay_t<decltype(storages)>::value_type::second_type;
163
- guard.unlock();
164
- IPC_UNUSED_ std::lock_guard<ipc::rw_lock> guard {lock};
165
- it = storages.emplace(chunk_size, chunk_handle_t{}).first;
166
- }
167
- }
168
- return it->second.get_info(chunk_size);
169
- }
170
-
171
- std::pair<ipc::storage_id_t, void*> acquire_storage(std::size_t size, ipc::circ::cc_t conns) {
172
- std::size_t chunk_size = calc_chunk_size(size);
173
- auto info = chunk_storage_info(chunk_size);
174
- if (info == nullptr) return {};
175
-
176
- info->lock_.lock();
177
- info->pool_.prepare();
178
- // got an unique id
179
- auto id = info->pool_.acquire();
180
- info->lock_.unlock();
181
-
182
- auto chunk = info->at(chunk_size, id);
183
- if (chunk == nullptr) return {};
184
- chunk->conns().store(conns, std::memory_order_relaxed);
185
- return { id, chunk->data() };
186
- }
187
-
188
- void *find_storage(ipc::storage_id_t id, std::size_t size) {
189
- if (id < 0) {
190
- ipc::error("[find_storage] id is invalid: id = %ld, size = %zd\n", (long)id, size);
191
- return nullptr;
192
- }
193
- std::size_t chunk_size = calc_chunk_size(size);
194
- auto info = chunk_storage_info(chunk_size);
195
- if (info == nullptr) return nullptr;
196
- return info->at(chunk_size, id)->data();
197
- }
198
-
199
- void release_storage(ipc::storage_id_t id, std::size_t size) {
200
- if (id < 0) {
201
- ipc::error("[release_storage] id is invalid: id = %ld, size = %zd\n", (long)id, size);
202
- return;
203
- }
204
- std::size_t chunk_size = calc_chunk_size(size);
205
- auto info = chunk_storage_info(chunk_size);
206
- if (info == nullptr) return;
207
- info->lock_.lock();
208
- info->pool_.release(id);
209
- info->lock_.unlock();
210
- }
211
-
212
- template <ipc::relat Rp, ipc::relat Rc>
213
- bool sub_rc(ipc::wr<Rp, Rc, ipc::trans::unicast>,
214
- std::atomic<ipc::circ::cc_t> &/*conns*/, ipc::circ::cc_t /*curr_conns*/, ipc::circ::cc_t /*conn_id*/) noexcept {
215
- return true;
216
- }
217
-
218
- template <ipc::relat Rp, ipc::relat Rc>
219
- bool sub_rc(ipc::wr<Rp, Rc, ipc::trans::broadcast>,
220
- std::atomic<ipc::circ::cc_t> &conns, ipc::circ::cc_t curr_conns, ipc::circ::cc_t conn_id) noexcept {
221
- auto last_conns = curr_conns & ~conn_id;
222
- for (unsigned k = 0;;) {
223
- auto chunk_conns = conns.load(std::memory_order_acquire);
224
- if (conns.compare_exchange_weak(chunk_conns, chunk_conns & last_conns, std::memory_order_release)) {
225
- return (chunk_conns & last_conns) == 0;
226
- }
227
- ipc::yield(k);
228
- }
229
- }
230
-
231
- template <typename Flag>
232
- void recycle_storage(ipc::storage_id_t id, std::size_t size, ipc::circ::cc_t curr_conns, ipc::circ::cc_t conn_id) {
233
- if (id < 0) {
234
- ipc::error("[recycle_storage] id is invalid: id = %ld, size = %zd\n", (long)id, size);
235
- return;
236
- }
237
- std::size_t chunk_size = calc_chunk_size(size);
238
- auto info = chunk_storage_info(chunk_size);
239
- if (info == nullptr) return;
240
-
241
- auto chunk = info->at(chunk_size, id);
242
- if (chunk == nullptr) return;
243
-
244
- if (!sub_rc(Flag{}, chunk->conns(), curr_conns, conn_id)) {
245
- return;
246
- }
247
- info->lock_.lock();
248
- info->pool_.release(id);
249
- info->lock_.unlock();
250
- }
251
-
252
- template <typename MsgT>
253
- bool clear_message(void* p) {
254
- auto msg = static_cast<MsgT*>(p);
255
- if (msg->storage_) {
256
- std::int32_t r_size = static_cast<std::int32_t>(ipc::data_length) + msg->remain_;
257
- if (r_size <= 0) {
258
- ipc::error("[clear_message] invalid msg size: %d\n", (int)r_size);
259
- return true;
260
- }
261
- release_storage(
262
- *reinterpret_cast<ipc::storage_id_t*>(&msg->data_),
263
- static_cast<std::size_t>(r_size));
264
- }
265
- return true;
266
- }
267
-
268
- struct conn_info_head {
269
-
270
- ipc::string name_;
271
- msg_id_t cc_id_; // connection-info id
272
- ipc::detail::waiter cc_waiter_, wt_waiter_, rd_waiter_;
273
- ipc::shm::handle acc_h_;
274
-
275
- conn_info_head(char const * name)
276
- : name_ {name}
277
- , cc_id_ {(cc_acc() == nullptr) ? 0 : cc_acc()->fetch_add(1, std::memory_order_relaxed)}
278
- , cc_waiter_{("__CC_CONN__" + name_).c_str()}
279
- , wt_waiter_{("__WT_CONN__" + name_).c_str()}
280
- , rd_waiter_{("__RD_CONN__" + name_).c_str()}
281
- , acc_h_ {("__AC_CONN__" + name_).c_str(), sizeof(acc_t)} {
282
- }
283
-
284
- void quit_waiting() {
285
- cc_waiter_.quit_waiting();
286
- wt_waiter_.quit_waiting();
287
- rd_waiter_.quit_waiting();
288
- }
289
-
290
- auto acc() {
291
- return static_cast<acc_t*>(acc_h_.get());
292
- }
293
-
294
- auto& recv_cache() {
295
- thread_local ipc::unordered_map<msg_id_t, cache_t> tls;
296
- return tls;
297
- }
298
- };
299
-
300
- template <typename W, typename F>
301
- bool wait_for(W& waiter, F&& pred, std::uint64_t tm) {
302
- if (tm == 0) return !pred();
303
- for (unsigned k = 0; pred();) {
304
- bool ret = true;
305
- ipc::sleep(k, [&k, &ret, &waiter, &pred, tm] {
306
- ret = waiter.wait_if(std::forward<F>(pred), tm);
307
- k = 0;
308
- });
309
- if (!ret) return false; // timeout or fail
310
- if (k == 0) break; // k has been reset
311
- }
312
- return true;
313
- }
314
-
315
- template <typename Policy,
316
- std::size_t DataSize = ipc::data_length,
317
- std::size_t AlignSize = (ipc::detail::min)(DataSize, alignof(std::max_align_t))>
318
- struct queue_generator {
319
-
320
- using queue_t = ipc::queue<msg_t<DataSize, AlignSize>, Policy>;
321
-
322
- struct conn_info_t : conn_info_head {
323
- queue_t que_;
324
-
325
- conn_info_t(char const * name)
326
- : conn_info_head{name}
327
- , que_{("__QU_CONN__" +
328
- ipc::to_string(DataSize) + "__" +
329
- ipc::to_string(AlignSize) + "__" + name).c_str()} {
330
- }
331
-
332
- void disconnect_receiver() {
333
- bool dis = que_.disconnect();
334
- this->quit_waiting();
335
- if (dis) {
336
- this->recv_cache().clear();
337
- }
338
- }
339
- };
340
- };
341
-
342
- template <typename Policy>
343
- struct detail_impl {
344
-
345
- using policy_t = Policy;
346
- using flag_t = typename policy_t::flag_t;
347
- using queue_t = typename queue_generator<policy_t>::queue_t;
348
- using conn_info_t = typename queue_generator<policy_t>::conn_info_t;
349
-
350
- constexpr static conn_info_t* info_of(ipc::handle_t h) noexcept {
351
- return static_cast<conn_info_t*>(h);
352
- }
353
-
354
- constexpr static queue_t* queue_of(ipc::handle_t h) noexcept {
355
- return (info_of(h) == nullptr) ? nullptr : &(info_of(h)->que_);
356
- }
357
-
358
- /* API implementations */
359
-
360
- static void disconnect(ipc::handle_t h) {
361
- auto que = queue_of(h);
362
- if (que == nullptr) {
363
- return;
364
- }
365
- que->shut_sending();
366
- assert(info_of(h) != nullptr);
367
- info_of(h)->disconnect_receiver();
368
- }
369
-
370
- static bool reconnect(ipc::handle_t * ph, bool start_to_recv) {
371
- assert(ph != nullptr);
372
- assert(*ph != nullptr);
373
- auto que = queue_of(*ph);
374
- if (que == nullptr) {
375
- return false;
376
- }
377
- if (start_to_recv) {
378
- que->shut_sending();
379
- if (que->connect()) { // wouldn't connect twice
380
- info_of(*ph)->cc_waiter_.broadcast();
381
- return true;
382
- }
383
- return false;
384
- }
385
- // start_to_recv == false
386
- if (que->connected()) {
387
- info_of(*ph)->disconnect_receiver();
388
- }
389
- return que->ready_sending();
390
- }
391
-
392
- static bool connect(ipc::handle_t * ph, char const * name, bool start_to_recv) {
393
- assert(ph != nullptr);
394
- if (*ph == nullptr) {
395
- *ph = ipc::mem::alloc<conn_info_t>(name);
396
- }
397
- return reconnect(ph, start_to_recv);
398
- }
399
-
400
- static void destroy(ipc::handle_t h) {
401
- disconnect(h);
402
- ipc::mem::free(info_of(h));
403
- }
404
-
405
- static std::size_t recv_count(ipc::handle_t h) noexcept {
406
- auto que = queue_of(h);
407
- if (que == nullptr) {
408
- return ipc::invalid_value;
409
- }
410
- return que->conn_count();
411
- }
412
-
413
- static bool wait_for_recv(ipc::handle_t h, std::size_t r_count, std::uint64_t tm) {
414
- auto que = queue_of(h);
415
- if (que == nullptr) {
416
- return false;
417
- }
418
- return wait_for(info_of(h)->cc_waiter_, [que, r_count] {
419
- return que->conn_count() < r_count;
420
- }, tm);
421
- }
422
-
423
- template <typename F>
424
- static bool send(F&& gen_push, ipc::handle_t h, void const * data, std::size_t size) {
425
- if (data == nullptr || size == 0) {
426
- ipc::error("fail: send(%p, %zd)\n", data, size);
427
- return false;
428
- }
429
- auto que = queue_of(h);
430
- if (que == nullptr) {
431
- ipc::error("fail: send, queue_of(h) == nullptr\n");
432
- return false;
433
- }
434
- if (que->elems() == nullptr) {
435
- ipc::error("fail: send, queue_of(h)->elems() == nullptr\n");
436
- return false;
437
- }
438
- if (!que->ready_sending()) {
439
- ipc::error("fail: send, que->ready_sending() == false\n");
440
- return false;
441
- }
442
- ipc::circ::cc_t conns = que->elems()->connections(std::memory_order_relaxed);
443
- if (conns == 0) {
444
- ipc::error("fail: send, there is no receiver on this connection.\n");
445
- return false;
446
- }
447
- // calc a new message id
448
- auto acc = info_of(h)->acc();
449
- if (acc == nullptr) {
450
- ipc::error("fail: send, info_of(h)->acc() == nullptr\n");
451
- return false;
452
- }
453
- auto msg_id = acc->fetch_add(1, std::memory_order_relaxed);
454
- auto try_push = std::forward<F>(gen_push)(info_of(h), que, msg_id);
455
- if (size > ipc::large_msg_limit) {
456
- auto dat = acquire_storage(size, conns);
457
- void * buf = dat.second;
458
- if (buf != nullptr) {
459
- std::memcpy(buf, data, size);
460
- return try_push(static_cast<std::int32_t>(size) -
461
- static_cast<std::int32_t>(ipc::data_length), &(dat.first), 0);
462
- }
463
- // try using message fragment
464
- //ipc::log("fail: shm::handle for big message. msg_id: %zd, size: %zd\n", msg_id, size);
465
- }
466
- // push message fragment
467
- std::int32_t offset = 0;
468
- for (std::int32_t i = 0; i < static_cast<std::int32_t>(size / ipc::data_length); ++i, offset += ipc::data_length) {
469
- if (!try_push(static_cast<std::int32_t>(size) - offset - static_cast<std::int32_t>(ipc::data_length),
470
- static_cast<ipc::byte_t const *>(data) + offset, ipc::data_length)) {
471
- return false;
472
- }
473
- }
474
- // if remain > 0, this is the last message fragment
475
- std::int32_t remain = static_cast<std::int32_t>(size) - offset;
476
- if (remain > 0) {
477
- if (!try_push(remain - static_cast<std::int32_t>(ipc::data_length),
478
- static_cast<ipc::byte_t const *>(data) + offset,
479
- static_cast<std::size_t>(remain))) {
480
- return false;
481
- }
482
- }
483
- return true;
484
- }
485
-
486
- static bool send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) {
487
- return send([tm](auto info, auto que, auto msg_id) {
488
- return [tm, info, que, msg_id](std::int32_t remain, void const * data, std::size_t size) {
489
- if (!wait_for(info->wt_waiter_, [&] {
490
- return !que->push(
491
- [](void*) { return true; },
492
- info->cc_id_, msg_id, remain, data, size);
493
- }, tm)) {
494
- ipc::log("force_push: msg_id = %zd, remain = %d, size = %zd\n", msg_id, remain, size);
495
- if (!que->force_push(
496
- clear_message<typename queue_t::value_t>,
497
- info->cc_id_, msg_id, remain, data, size)) {
498
- return false;
499
- }
500
- }
501
- info->rd_waiter_.broadcast();
502
- return true;
503
- };
504
- }, h, data, size);
505
- }
506
-
507
- static bool try_send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) {
508
- return send([tm](auto info, auto que, auto msg_id) {
509
- return [tm, info, que, msg_id](std::int32_t remain, void const * data, std::size_t size) {
510
- if (!wait_for(info->wt_waiter_, [&] {
511
- return !que->push(
512
- [](void*) { return true; },
513
- info->cc_id_, msg_id, remain, data, size);
514
- }, tm)) {
515
- return false;
516
- }
517
- info->rd_waiter_.broadcast();
518
- return true;
519
- };
520
- }, h, data, size);
521
- }
522
-
523
- static ipc::buff_t recv(ipc::handle_t h, std::uint64_t tm) {
524
- auto que = queue_of(h);
525
- if (que == nullptr) {
526
- ipc::error("fail: recv, queue_of(h) == nullptr\n");
527
- return {};
528
- }
529
- if (!que->connected()) {
530
- // hasn't connected yet, just return.
531
- return {};
532
- }
533
- auto& rc = info_of(h)->recv_cache();
534
- for (;;) {
535
- // pop a new message
536
- typename queue_t::value_t msg;
537
- if (!wait_for(info_of(h)->rd_waiter_, [que, &msg] {
538
- return !que->pop(msg);
539
- }, tm)) {
540
- // pop failed, just return.
541
- return {};
542
- }
543
- info_of(h)->wt_waiter_.broadcast();
544
- if ((info_of(h)->acc() != nullptr) && (msg.cc_id_ == info_of(h)->cc_id_)) {
545
- continue; // ignore message to self
546
- }
547
- // msg.remain_ may minus & abs(msg.remain_) < data_length
548
- std::int32_t r_size = static_cast<std::int32_t>(ipc::data_length) + msg.remain_;
549
- if (r_size <= 0) {
550
- ipc::error("fail: recv, r_size = %d\n", (int)r_size);
551
- return {};
552
- }
553
- std::size_t msg_size = static_cast<std::size_t>(r_size);
554
- // large message
555
- if (msg.storage_) {
556
- ipc::storage_id_t buf_id = *reinterpret_cast<ipc::storage_id_t*>(&msg.data_);
557
- void* buf = find_storage(buf_id, msg_size);
558
- if (buf != nullptr) {
559
- struct recycle_t {
560
- ipc::storage_id_t storage_id;
561
- ipc::circ::cc_t curr_conns;
562
- ipc::circ::cc_t conn_id;
563
- } *r_info = ipc::mem::alloc<recycle_t>(recycle_t{
564
- buf_id, que->elems()->connections(std::memory_order_relaxed), que->connected_id()
565
- });
566
- if (r_info == nullptr) {
567
- ipc::log("fail: ipc::mem::alloc<recycle_t>.\n");
568
- return ipc::buff_t{buf, msg_size}; // no recycle
569
- } else {
570
- return ipc::buff_t{buf, msg_size, [](void* p_info, std::size_t size) {
571
- auto r_info = static_cast<recycle_t *>(p_info);
572
- IPC_UNUSED_ auto finally = ipc::guard([r_info] {
573
- ipc::mem::free(r_info);
574
- });
575
- recycle_storage<flag_t>(r_info->storage_id, size, r_info->curr_conns, r_info->conn_id);
576
- }, r_info};
577
- }
578
- } else {
579
- ipc::log("fail: shm::handle for large message. msg_id: %zd, buf_id: %zd, size: %zd\n", msg.id_, buf_id, msg_size);
580
- continue;
581
- }
582
- }
583
- // find cache with msg.id_
584
- auto cac_it = rc.find(msg.id_);
585
- if (cac_it == rc.end()) {
586
- if (msg_size <= ipc::data_length) {
587
- return make_cache(msg.data_, msg_size);
588
- }
589
- // gc
590
- if (rc.size() > 1024) {
591
- std::vector<msg_id_t> need_del;
592
- for (auto const & pair : rc) {
593
- auto cmp = std::minmax(msg.id_, pair.first);
594
- if (cmp.second - cmp.first > 8192) {
595
- need_del.push_back(pair.first);
596
- }
597
- }
598
- for (auto id : need_del) rc.erase(id);
599
- }
600
- // cache the first message fragment
601
- rc.emplace(msg.id_, cache_t { ipc::data_length, make_cache(msg.data_, msg_size) });
602
- }
603
- // has cached before this message
604
- else {
605
- auto& cac = cac_it->second;
606
- // this is the last message fragment
607
- if (msg.remain_ <= 0) {
608
- cac.append(&(msg.data_), msg_size);
609
- // finish this message, erase it from cache
610
- auto buff = std::move(cac.buff_);
611
- rc.erase(cac_it);
612
- return buff;
613
- }
614
- // there are remain datas after this message
615
- cac.append(&(msg.data_), ipc::data_length);
616
- }
617
- }
618
- }
619
-
620
- static ipc::buff_t try_recv(ipc::handle_t h) {
621
- return recv(h, 0);
622
- }
623
-
624
- }; // detail_impl<Policy>
625
-
626
- template <typename Flag>
627
- using policy_t = ipc::policy::choose<ipc::circ::elem_array, Flag>;
628
-
629
- } // internal-linkage
630
-
631
- namespace ipc {
632
-
633
- template <typename Flag>
634
- ipc::handle_t chan_impl<Flag>::inited() {
635
- ipc::detail::waiter::init();
636
- return nullptr;
637
- }
638
-
639
- template <typename Flag>
640
- bool chan_impl<Flag>::connect(ipc::handle_t * ph, char const * name, unsigned mode) {
641
- return detail_impl<policy_t<Flag>>::connect(ph, name, mode & receiver);
642
- }
643
-
644
- template <typename Flag>
645
- bool chan_impl<Flag>::reconnect(ipc::handle_t * ph, unsigned mode) {
646
- return detail_impl<policy_t<Flag>>::reconnect(ph, mode & receiver);
647
- }
648
-
649
- template <typename Flag>
650
- void chan_impl<Flag>::disconnect(ipc::handle_t h) {
651
- detail_impl<policy_t<Flag>>::disconnect(h);
652
- }
653
-
654
- template <typename Flag>
655
- void chan_impl<Flag>::destroy(ipc::handle_t h) {
656
- detail_impl<policy_t<Flag>>::destroy(h);
657
- }
658
-
659
- template <typename Flag>
660
- char const * chan_impl<Flag>::name(ipc::handle_t h) {
661
- auto info = detail_impl<policy_t<Flag>>::info_of(h);
662
- return (info == nullptr) ? nullptr : info->name_.c_str();
663
- }
664
-
665
- template <typename Flag>
666
- std::size_t chan_impl<Flag>::recv_count(ipc::handle_t h) {
667
- return detail_impl<policy_t<Flag>>::recv_count(h);
668
- }
669
-
670
- template <typename Flag>
671
- bool chan_impl<Flag>::wait_for_recv(ipc::handle_t h, std::size_t r_count, std::uint64_t tm) {
672
- return detail_impl<policy_t<Flag>>::wait_for_recv(h, r_count, tm);
673
- }
674
-
675
- template <typename Flag>
676
- bool chan_impl<Flag>::send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) {
677
- return detail_impl<policy_t<Flag>>::send(h, data, size, tm);
678
- }
679
-
680
- template <typename Flag>
681
- buff_t chan_impl<Flag>::recv(ipc::handle_t h, std::uint64_t tm) {
682
- return detail_impl<policy_t<Flag>>::recv(h, tm);
683
- }
684
-
685
- template <typename Flag>
686
- bool chan_impl<Flag>::try_send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) {
687
- return detail_impl<policy_t<Flag>>::try_send(h, data, size, tm);
688
- }
689
-
690
- template <typename Flag>
691
- buff_t chan_impl<Flag>::try_recv(ipc::handle_t h) {
692
- return detail_impl<policy_t<Flag>>::try_recv(h);
693
- }
694
-
695
- template struct chan_impl<ipc::wr<relat::single, relat::single, trans::unicast >>;
696
- // template struct chan_impl<ipc::wr<relat::single, relat::multi , trans::unicast >>; // TBD
697
- // template struct chan_impl<ipc::wr<relat::multi , relat::multi , trans::unicast >>; // TBD
698
- template struct chan_impl<ipc::wr<relat::single, relat::multi , trans::broadcast>>;
699
- template struct chan_impl<ipc::wr<relat::multi , relat::multi , trans::broadcast>>;
700
-
701
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/policy.h DELETED
@@ -1,25 +0,0 @@
1
- #pragma once
2
-
3
- #include <type_traits>
4
-
5
- #include "libipc/def.h"
6
- #include "libipc/prod_cons.h"
7
-
8
- #include "libipc/circ/elem_array.h"
9
-
10
- namespace ipc {
11
- namespace policy {
12
-
13
- template <template <typename, std::size_t...> class Elems, typename Flag>
14
- struct choose;
15
-
16
- template <typename Flag>
17
- struct choose<circ::elem_array, Flag> {
18
- using flag_t = Flag;
19
-
20
- template <std::size_t DataSize, std::size_t AlignSize>
21
- using elems_t = circ::elem_array<ipc::prod_cons_impl<flag_t>, DataSize, AlignSize>;
22
- };
23
-
24
- } // namespace policy
25
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/pool_alloc.cpp DELETED
@@ -1,17 +0,0 @@
1
- #include "libipc/pool_alloc.h"
2
-
3
- #include "libipc/memory/resource.h"
4
-
5
- namespace ipc {
6
- namespace mem {
7
-
8
- void* pool_alloc::alloc(std::size_t size) {
9
- return async_pool_alloc::alloc(size);
10
- }
11
-
12
- void pool_alloc::free(void* p, std::size_t size) {
13
- async_pool_alloc::free(p, size);
14
- }
15
-
16
- } // namespace mem
17
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/prod_cons.h DELETED
@@ -1,433 +0,0 @@
1
- #pragma once
2
-
3
- #include <atomic>
4
- #include <utility>
5
- #include <cstring>
6
- #include <type_traits>
7
- #include <cstdint>
8
-
9
- #include "libipc/def.h"
10
-
11
- #include "libipc/platform/detail.h"
12
- #include "libipc/circ/elem_def.h"
13
- #include "libipc/utility/log.h"
14
- #include "libipc/utility/utility.h"
15
-
16
- namespace ipc {
17
-
18
- ////////////////////////////////////////////////////////////////
19
- /// producer-consumer implementation
20
- ////////////////////////////////////////////////////////////////
21
-
22
- template <typename Flag>
23
- struct prod_cons_impl;
24
-
25
- template <>
26
- struct prod_cons_impl<wr<relat::single, relat::single, trans::unicast>> {
27
-
28
- template <std::size_t DataSize, std::size_t AlignSize>
29
- struct elem_t {
30
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
31
- };
32
-
33
- alignas(cache_line_size) std::atomic<circ::u2_t> rd_; // read index
34
- alignas(cache_line_size) std::atomic<circ::u2_t> wt_; // write index
35
-
36
- constexpr circ::u2_t cursor() const noexcept {
37
- return 0;
38
- }
39
-
40
- template <typename W, typename F, typename E>
41
- bool push(W* /*wrapper*/, F&& f, E* elems) {
42
- auto cur_wt = circ::index_of(wt_.load(std::memory_order_relaxed));
43
- if (cur_wt == circ::index_of(rd_.load(std::memory_order_acquire) - 1)) {
44
- return false; // full
45
- }
46
- std::forward<F>(f)(&(elems[cur_wt].data_));
47
- wt_.fetch_add(1, std::memory_order_release);
48
- return true;
49
- }
50
-
51
- /**
52
- * In single-single-unicast, 'force_push' means 'no reader' or 'the only one reader is dead'.
53
- * So we could just disconnect all connections of receiver, and return false.
54
- */
55
- template <typename W, typename F, typename E>
56
- bool force_push(W* wrapper, F&&, E*) {
57
- wrapper->elems()->disconnect_receiver(~static_cast<circ::cc_t>(0u));
58
- return false;
59
- }
60
-
61
- template <typename W, typename F, typename R, typename E>
62
- bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) {
63
- auto cur_rd = circ::index_of(rd_.load(std::memory_order_relaxed));
64
- if (cur_rd == circ::index_of(wt_.load(std::memory_order_acquire))) {
65
- return false; // empty
66
- }
67
- std::forward<F>(f)(&(elems[cur_rd].data_));
68
- std::forward<R>(out)(true);
69
- rd_.fetch_add(1, std::memory_order_release);
70
- return true;
71
- }
72
- };
73
-
74
- template <>
75
- struct prod_cons_impl<wr<relat::single, relat::multi , trans::unicast>>
76
- : prod_cons_impl<wr<relat::single, relat::single, trans::unicast>> {
77
-
78
- template <typename W, typename F, typename E>
79
- bool force_push(W* wrapper, F&&, E*) {
80
- wrapper->elems()->disconnect_receiver(1);
81
- return false;
82
- }
83
-
84
- template <typename W, typename F, typename R,
85
- template <std::size_t, std::size_t> class E, std::size_t DS, std::size_t AS>
86
- bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E<DS, AS>* elems) {
87
- byte_t buff[DS];
88
- for (unsigned k = 0;;) {
89
- auto cur_rd = rd_.load(std::memory_order_relaxed);
90
- if (circ::index_of(cur_rd) ==
91
- circ::index_of(wt_.load(std::memory_order_acquire))) {
92
- return false; // empty
93
- }
94
- std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff));
95
- if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) {
96
- std::forward<F>(f)(buff);
97
- std::forward<R>(out)(true);
98
- return true;
99
- }
100
- ipc::yield(k);
101
- }
102
- }
103
- };
104
-
105
- template <>
106
- struct prod_cons_impl<wr<relat::multi , relat::multi, trans::unicast>>
107
- : prod_cons_impl<wr<relat::single, relat::multi, trans::unicast>> {
108
-
109
- using flag_t = std::uint64_t;
110
-
111
- template <std::size_t DataSize, std::size_t AlignSize>
112
- struct elem_t {
113
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
114
- std::atomic<flag_t> f_ct_ { 0 }; // commit flag
115
- };
116
-
117
- alignas(cache_line_size) std::atomic<circ::u2_t> ct_; // commit index
118
-
119
- template <typename W, typename F, typename E>
120
- bool push(W* /*wrapper*/, F&& f, E* elems) {
121
- circ::u2_t cur_ct, nxt_ct;
122
- for (unsigned k = 0;;) {
123
- cur_ct = ct_.load(std::memory_order_relaxed);
124
- if (circ::index_of(nxt_ct = cur_ct + 1) ==
125
- circ::index_of(rd_.load(std::memory_order_acquire))) {
126
- return false; // full
127
- }
128
- if (ct_.compare_exchange_weak(cur_ct, nxt_ct, std::memory_order_acq_rel)) {
129
- break;
130
- }
131
- ipc::yield(k);
132
- }
133
- auto* el = elems + circ::index_of(cur_ct);
134
- std::forward<F>(f)(&(el->data_));
135
- // set flag & try update wt
136
- el->f_ct_.store(~static_cast<flag_t>(cur_ct), std::memory_order_release);
137
- while (1) {
138
- auto cac_ct = el->f_ct_.load(std::memory_order_acquire);
139
- if (cur_ct != wt_.load(std::memory_order_relaxed)) {
140
- return true;
141
- }
142
- if ((~cac_ct) != cur_ct) {
143
- return true;
144
- }
145
- if (!el->f_ct_.compare_exchange_strong(cac_ct, 0, std::memory_order_relaxed)) {
146
- return true;
147
- }
148
- wt_.store(nxt_ct, std::memory_order_release);
149
- cur_ct = nxt_ct;
150
- nxt_ct = cur_ct + 1;
151
- el = elems + circ::index_of(cur_ct);
152
- }
153
- return true;
154
- }
155
-
156
- template <typename W, typename F, typename E>
157
- bool force_push(W* wrapper, F&&, E*) {
158
- wrapper->elems()->disconnect_receiver(1);
159
- return false;
160
- }
161
-
162
- template <typename W, typename F, typename R,
163
- template <std::size_t, std::size_t> class E, std::size_t DS, std::size_t AS>
164
- bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E<DS, AS>* elems) {
165
- byte_t buff[DS];
166
- for (unsigned k = 0;;) {
167
- auto cur_rd = rd_.load(std::memory_order_relaxed);
168
- auto cur_wt = wt_.load(std::memory_order_acquire);
169
- auto id_rd = circ::index_of(cur_rd);
170
- auto id_wt = circ::index_of(cur_wt);
171
- if (id_rd == id_wt) {
172
- auto* el = elems + id_wt;
173
- auto cac_ct = el->f_ct_.load(std::memory_order_acquire);
174
- if ((~cac_ct) != cur_wt) {
175
- return false; // empty
176
- }
177
- if (el->f_ct_.compare_exchange_weak(cac_ct, 0, std::memory_order_relaxed)) {
178
- wt_.store(cur_wt + 1, std::memory_order_release);
179
- }
180
- k = 0;
181
- }
182
- else {
183
- std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff));
184
- if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) {
185
- std::forward<F>(f)(buff);
186
- std::forward<R>(out)(true);
187
- return true;
188
- }
189
- ipc::yield(k);
190
- }
191
- }
192
- }
193
- };
194
-
195
- template <>
196
- struct prod_cons_impl<wr<relat::single, relat::multi, trans::broadcast>> {
197
-
198
- using rc_t = std::uint64_t;
199
-
200
- enum : rc_t {
201
- ep_mask = 0x00000000ffffffffull,
202
- ep_incr = 0x0000000100000000ull
203
- };
204
-
205
- template <std::size_t DataSize, std::size_t AlignSize>
206
- struct elem_t {
207
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
208
- std::atomic<rc_t> rc_ { 0 }; // read-counter
209
- };
210
-
211
- alignas(cache_line_size) std::atomic<circ::u2_t> wt_; // write index
212
- alignas(cache_line_size) rc_t epoch_ { 0 }; // only one writer
213
-
214
- circ::u2_t cursor() const noexcept {
215
- return wt_.load(std::memory_order_acquire);
216
- }
217
-
218
- template <typename W, typename F, typename E>
219
- bool push(W* wrapper, F&& f, E* elems) {
220
- E* el;
221
- for (unsigned k = 0;;) {
222
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
223
- if (cc == 0) return false; // no reader
224
- el = elems + circ::index_of(wt_.load(std::memory_order_relaxed));
225
- // check all consumers have finished reading this element
226
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
227
- circ::cc_t rem_cc = cur_rc & ep_mask;
228
- if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch_)) {
229
- return false; // has not finished yet
230
- }
231
- // consider rem_cc to be 0 here
232
- if (el->rc_.compare_exchange_weak(
233
- cur_rc, epoch_ | static_cast<rc_t>(cc), std::memory_order_release)) {
234
- break;
235
- }
236
- ipc::yield(k);
237
- }
238
- std::forward<F>(f)(&(el->data_));
239
- wt_.fetch_add(1, std::memory_order_release);
240
- return true;
241
- }
242
-
243
- template <typename W, typename F, typename E>
244
- bool force_push(W* wrapper, F&& f, E* elems) {
245
- E* el;
246
- epoch_ += ep_incr;
247
- for (unsigned k = 0;;) {
248
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
249
- if (cc == 0) return false; // no reader
250
- el = elems + circ::index_of(wt_.load(std::memory_order_relaxed));
251
- // check all consumers have finished reading this element
252
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
253
- circ::cc_t rem_cc = cur_rc & ep_mask;
254
- if (cc & rem_cc) {
255
- ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc);
256
- cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers
257
- if (cc == 0) return false; // no reader
258
- }
259
- // just compare & exchange
260
- if (el->rc_.compare_exchange_weak(
261
- cur_rc, epoch_ | static_cast<rc_t>(cc), std::memory_order_release)) {
262
- break;
263
- }
264
- ipc::yield(k);
265
- }
266
- std::forward<F>(f)(&(el->data_));
267
- wt_.fetch_add(1, std::memory_order_release);
268
- return true;
269
- }
270
-
271
- template <typename W, typename F, typename R, typename E>
272
- bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E* elems) {
273
- if (cur == cursor()) return false; // acquire
274
- auto* el = elems + circ::index_of(cur++);
275
- std::forward<F>(f)(&(el->data_));
276
- for (unsigned k = 0;;) {
277
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
278
- if ((cur_rc & ep_mask) == 0) {
279
- std::forward<R>(out)(true);
280
- return true;
281
- }
282
- auto nxt_rc = cur_rc & ~static_cast<rc_t>(wrapper->connected_id());
283
- if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) {
284
- std::forward<R>(out)((nxt_rc & ep_mask) == 0);
285
- return true;
286
- }
287
- ipc::yield(k);
288
- }
289
- }
290
- };
291
-
292
- template <>
293
- struct prod_cons_impl<wr<relat::multi, relat::multi, trans::broadcast>> {
294
-
295
- using rc_t = std::uint64_t;
296
- using flag_t = std::uint64_t;
297
-
298
- enum : rc_t {
299
- rc_mask = 0x00000000ffffffffull,
300
- ep_mask = 0x00ffffffffffffffull,
301
- ep_incr = 0x0100000000000000ull,
302
- ic_mask = 0xff000000ffffffffull,
303
- ic_incr = 0x0000000100000000ull
304
- };
305
-
306
- template <std::size_t DataSize, std::size_t AlignSize>
307
- struct elem_t {
308
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
309
- std::atomic<rc_t > rc_ { 0 }; // read-counter
310
- std::atomic<flag_t> f_ct_ { 0 }; // commit flag
311
- };
312
-
313
- alignas(cache_line_size) std::atomic<circ::u2_t> ct_; // commit index
314
- alignas(cache_line_size) std::atomic<rc_t> epoch_ { 0 };
315
-
316
- circ::u2_t cursor() const noexcept {
317
- return ct_.load(std::memory_order_acquire);
318
- }
319
-
320
- constexpr static rc_t inc_rc(rc_t rc) noexcept {
321
- return (rc & ic_mask) | ((rc + ic_incr) & ~ic_mask);
322
- }
323
-
324
- constexpr static rc_t inc_mask(rc_t rc) noexcept {
325
- return inc_rc(rc) & ~rc_mask;
326
- }
327
-
328
- template <typename W, typename F, typename E>
329
- bool push(W* wrapper, F&& f, E* elems) {
330
- E* el;
331
- circ::u2_t cur_ct;
332
- rc_t epoch = epoch_.load(std::memory_order_acquire);
333
- for (unsigned k = 0;;) {
334
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
335
- if (cc == 0) return false; // no reader
336
- el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed));
337
- // check all consumers have finished reading this element
338
- auto cur_rc = el->rc_.load(std::memory_order_relaxed);
339
- circ::cc_t rem_cc = cur_rc & rc_mask;
340
- if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch)) {
341
- return false; // has not finished yet
342
- }
343
- else if (!rem_cc) {
344
- auto cur_fl = el->f_ct_.load(std::memory_order_acquire);
345
- if ((cur_fl != cur_ct) && cur_fl) {
346
- return false; // full
347
- }
348
- }
349
- // consider rem_cc to be 0 here
350
- if (el->rc_.compare_exchange_weak(
351
- cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast<rc_t>(cc), std::memory_order_relaxed) &&
352
- epoch_.compare_exchange_weak(epoch, epoch, std::memory_order_acq_rel)) {
353
- break;
354
- }
355
- ipc::yield(k);
356
- }
357
- // only one thread/process would touch here at one time
358
- ct_.store(cur_ct + 1, std::memory_order_release);
359
- std::forward<F>(f)(&(el->data_));
360
- // set flag & try update wt
361
- el->f_ct_.store(~static_cast<flag_t>(cur_ct), std::memory_order_release);
362
- return true;
363
- }
364
-
365
- template <typename W, typename F, typename E>
366
- bool force_push(W* wrapper, F&& f, E* elems) {
367
- E* el;
368
- circ::u2_t cur_ct;
369
- rc_t epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr;
370
- for (unsigned k = 0;;) {
371
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
372
- if (cc == 0) return false; // no reader
373
- el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed));
374
- // check all consumers have finished reading this element
375
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
376
- circ::cc_t rem_cc = cur_rc & rc_mask;
377
- if (cc & rem_cc) {
378
- ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc);
379
- cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers
380
- if (cc == 0) return false; // no reader
381
- }
382
- // just compare & exchange
383
- if (el->rc_.compare_exchange_weak(
384
- cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast<rc_t>(cc), std::memory_order_relaxed)) {
385
- if (epoch == epoch_.load(std::memory_order_acquire)) {
386
- break;
387
- }
388
- else if (push(wrapper, std::forward<F>(f), elems)) {
389
- return true;
390
- }
391
- epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr;
392
- }
393
- ipc::yield(k);
394
- }
395
- // only one thread/process would touch here at one time
396
- ct_.store(cur_ct + 1, std::memory_order_release);
397
- std::forward<F>(f)(&(el->data_));
398
- // set flag & try update wt
399
- el->f_ct_.store(~static_cast<flag_t>(cur_ct), std::memory_order_release);
400
- return true;
401
- }
402
-
403
- template <typename W, typename F, typename R, typename E, std::size_t N>
404
- bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E(& elems)[N]) {
405
- auto* el = elems + circ::index_of(cur);
406
- auto cur_fl = el->f_ct_.load(std::memory_order_acquire);
407
- if (cur_fl != ~static_cast<flag_t>(cur)) {
408
- return false; // empty
409
- }
410
- ++cur;
411
- std::forward<F>(f)(&(el->data_));
412
- for (unsigned k = 0;;) {
413
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
414
- if ((cur_rc & rc_mask) == 0) {
415
- std::forward<R>(out)(true);
416
- el->f_ct_.store(cur + N - 1, std::memory_order_release);
417
- return true;
418
- }
419
- auto nxt_rc = inc_rc(cur_rc) & ~static_cast<rc_t>(wrapper->connected_id());
420
- bool last_one = false;
421
- if ((last_one = (nxt_rc & rc_mask) == 0)) {
422
- el->f_ct_.store(cur + N - 1, std::memory_order_release);
423
- }
424
- if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) {
425
- std::forward<R>(out)(last_one);
426
- return true;
427
- }
428
- ipc::yield(k);
429
- }
430
- }
431
- };
432
-
433
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/queue.h DELETED
@@ -1,216 +0,0 @@
1
- #pragma once
2
-
3
- #include <type_traits>
4
- #include <new>
5
- #include <utility> // [[since C++14]]: std::exchange
6
- #include <algorithm>
7
- #include <atomic>
8
- #include <tuple>
9
- #include <thread>
10
- #include <chrono>
11
- #include <string>
12
- #include <cassert> // assert
13
-
14
- #include "libipc/def.h"
15
- #include "libipc/shm.h"
16
- #include "libipc/rw_lock.h"
17
-
18
- #include "libipc/utility/log.h"
19
- #include "libipc/platform/detail.h"
20
- #include "libipc/circ/elem_def.h"
21
-
22
- namespace ipc {
23
- namespace detail {
24
-
25
- class queue_conn {
26
- protected:
27
- circ::cc_t connected_ = 0;
28
- shm::handle elems_h_;
29
-
30
- template <typename Elems>
31
- Elems* open(char const * name) {
32
- if (name == nullptr || name[0] == '\0') {
33
- ipc::error("fail open waiter: name is empty!\n");
34
- return nullptr;
35
- }
36
- if (!elems_h_.acquire(name, sizeof(Elems))) {
37
- return nullptr;
38
- }
39
- auto elems = static_cast<Elems*>(elems_h_.get());
40
- if (elems == nullptr) {
41
- ipc::error("fail acquire elems: %s\n", name);
42
- return nullptr;
43
- }
44
- elems->init();
45
- return elems;
46
- }
47
-
48
- void close() {
49
- elems_h_.release();
50
- }
51
-
52
- public:
53
- queue_conn() = default;
54
- queue_conn(const queue_conn&) = delete;
55
- queue_conn& operator=(const queue_conn&) = delete;
56
-
57
- bool connected() const noexcept {
58
- return connected_ != 0;
59
- }
60
-
61
- circ::cc_t connected_id() const noexcept {
62
- return connected_;
63
- }
64
-
65
- template <typename Elems>
66
- auto connect(Elems* elems) noexcept
67
- /*needs 'optional' here*/
68
- -> std::tuple<bool, bool, decltype(std::declval<Elems>().cursor())> {
69
- if (elems == nullptr) return {};
70
- // if it's already connected, just return
71
- if (connected()) return {connected(), false, 0};
72
- connected_ = elems->connect_receiver();
73
- return {connected(), true, elems->cursor()};
74
- }
75
-
76
- template <typename Elems>
77
- bool disconnect(Elems* elems) noexcept {
78
- if (elems == nullptr) return false;
79
- // if it's already disconnected, just return false
80
- if (!connected()) return false;
81
- elems->disconnect_receiver(std::exchange(connected_, 0));
82
- return true;
83
- }
84
- };
85
-
86
- template <typename Elems>
87
- class queue_base : public queue_conn {
88
- using base_t = queue_conn;
89
-
90
- public:
91
- using elems_t = Elems;
92
- using policy_t = typename elems_t::policy_t;
93
-
94
- protected:
95
- elems_t * elems_ = nullptr;
96
- decltype(std::declval<elems_t>().cursor()) cursor_ = 0;
97
- bool sender_flag_ = false;
98
-
99
- public:
100
- using base_t::base_t;
101
-
102
- queue_base() = default;
103
-
104
- explicit queue_base(char const * name)
105
- : queue_base{} {
106
- elems_ = open<elems_t>(name);
107
- }
108
-
109
- explicit queue_base(elems_t * elems) noexcept
110
- : queue_base{} {
111
- assert(elems != nullptr);
112
- elems_ = elems;
113
- }
114
-
115
- /* not virtual */ ~queue_base() {
116
- base_t::close();
117
- }
118
-
119
- elems_t * elems() noexcept { return elems_; }
120
- elems_t const * elems() const noexcept { return elems_; }
121
-
122
- bool ready_sending() noexcept {
123
- if (elems_ == nullptr) return false;
124
- return sender_flag_ || (sender_flag_ = elems_->connect_sender());
125
- }
126
-
127
- void shut_sending() noexcept {
128
- if (elems_ == nullptr) return;
129
- if (!sender_flag_) return;
130
- elems_->disconnect_sender();
131
- }
132
-
133
- bool connect() noexcept {
134
- auto tp = base_t::connect(elems_);
135
- if (std::get<0>(tp) && std::get<1>(tp)) {
136
- cursor_ = std::get<2>(tp);
137
- return true;
138
- }
139
- return std::get<0>(tp);
140
- }
141
-
142
- bool disconnect() noexcept {
143
- return base_t::disconnect(elems_);
144
- }
145
-
146
- std::size_t conn_count() const noexcept {
147
- return (elems_ == nullptr) ? static_cast<std::size_t>(invalid_value) : elems_->conn_count();
148
- }
149
-
150
- bool valid() const noexcept {
151
- return elems_ != nullptr;
152
- }
153
-
154
- bool empty() const noexcept {
155
- return !valid() || (cursor_ == elems_->cursor());
156
- }
157
-
158
- template <typename T, typename F, typename... P>
159
- bool push(F&& prep, P&&... params) {
160
- if (elems_ == nullptr) return false;
161
- return elems_->push(this, [&](void* p) {
162
- if (prep(p)) ::new (p) T(std::forward<P>(params)...);
163
- });
164
- }
165
-
166
- template <typename T, typename F, typename... P>
167
- bool force_push(F&& prep, P&&... params) {
168
- if (elems_ == nullptr) return false;
169
- return elems_->force_push(this, [&](void* p) {
170
- if (prep(p)) ::new (p) T(std::forward<P>(params)...);
171
- });
172
- }
173
-
174
- template <typename T, typename F>
175
- bool pop(T& item, F&& out) {
176
- if (elems_ == nullptr) {
177
- return false;
178
- }
179
- return elems_->pop(this, &(this->cursor_), [&item](void* p) {
180
- ::new (&item) T(std::move(*static_cast<T*>(p)));
181
- }, std::forward<F>(out));
182
- }
183
- };
184
-
185
- } // namespace detail
186
-
187
- template <typename T, typename Policy>
188
- class queue final : public detail::queue_base<typename Policy::template elems_t<sizeof(T), alignof(T)>> {
189
- using base_t = detail::queue_base<typename Policy::template elems_t<sizeof(T), alignof(T)>>;
190
-
191
- public:
192
- using value_t = T;
193
-
194
- using base_t::base_t;
195
-
196
- template <typename... P>
197
- bool push(P&&... params) {
198
- return base_t::template push<T>(std::forward<P>(params)...);
199
- }
200
-
201
- template <typename... P>
202
- bool force_push(P&&... params) {
203
- return base_t::template force_push<T>(std::forward<P>(params)...);
204
- }
205
-
206
- bool pop(T& item) {
207
- return base_t::pop(item, [](bool) {});
208
- }
209
-
210
- template <typename F>
211
- bool pop(T& item, F&& out) {
212
- return base_t::pop(item, std::forward<F>(out));
213
- }
214
- };
215
-
216
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/shm.cpp DELETED
@@ -1,103 +0,0 @@
1
-
2
- #include <string>
3
- #include <utility>
4
-
5
- #include "libipc/shm.h"
6
-
7
- #include "libipc/utility/pimpl.h"
8
- #include "libipc/memory/resource.h"
9
-
10
- namespace ipc {
11
- namespace shm {
12
-
13
- class handle::handle_ : public pimpl<handle_> {
14
- public:
15
- shm::id_t id_ = nullptr;
16
- void* m_ = nullptr;
17
-
18
- ipc::string n_;
19
- std::size_t s_ = 0;
20
- };
21
-
22
- handle::handle()
23
- : p_(p_->make()) {
24
- }
25
-
26
- handle::handle(char const * name, std::size_t size, unsigned mode)
27
- : handle() {
28
- acquire(name, size, mode);
29
- }
30
-
31
- handle::handle(handle&& rhs)
32
- : handle() {
33
- swap(rhs);
34
- }
35
-
36
- handle::~handle() {
37
- release();
38
- p_->clear();
39
- }
40
-
41
- void handle::swap(handle& rhs) {
42
- std::swap(p_, rhs.p_);
43
- }
44
-
45
- handle& handle::operator=(handle rhs) {
46
- swap(rhs);
47
- return *this;
48
- }
49
-
50
- bool handle::valid() const noexcept {
51
- return impl(p_)->m_ != nullptr;
52
- }
53
-
54
- std::size_t handle::size() const noexcept {
55
- return impl(p_)->s_;
56
- }
57
-
58
- char const * handle::name() const noexcept {
59
- return impl(p_)->n_.c_str();
60
- }
61
-
62
- std::int32_t handle::ref() const noexcept {
63
- return shm::get_ref(impl(p_)->id_);
64
- }
65
-
66
- void handle::sub_ref() noexcept {
67
- shm::sub_ref(impl(p_)->id_);
68
- }
69
-
70
- bool handle::acquire(char const * name, std::size_t size, unsigned mode) {
71
- release();
72
- impl(p_)->id_ = shm::acquire((impl(p_)->n_ = name).c_str(), size, mode);
73
- impl(p_)->m_ = shm::get_mem(impl(p_)->id_, &(impl(p_)->s_));
74
- return valid();
75
- }
76
-
77
- std::int32_t handle::release() {
78
- if (impl(p_)->id_ == nullptr) return -1;
79
- return shm::release(detach());
80
- }
81
-
82
- void* handle::get() const {
83
- return impl(p_)->m_;
84
- }
85
-
86
- void handle::attach(id_t id) {
87
- if (id == nullptr) return;
88
- release();
89
- impl(p_)->id_ = id;
90
- impl(p_)->m_ = shm::get_mem(impl(p_)->id_, &(impl(p_)->s_));
91
- }
92
-
93
- id_t handle::detach() {
94
- auto old = impl(p_)->id_;
95
- impl(p_)->id_ = nullptr;
96
- impl(p_)->m_ = nullptr;
97
- impl(p_)->s_ = 0;
98
- impl(p_)->n_.clear();
99
- return old;
100
- }
101
-
102
- } // namespace shm
103
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/waiter.h DELETED
@@ -1,83 +0,0 @@
1
- #pragma once
2
-
3
- #include <utility>
4
- #include <string>
5
- #include <mutex>
6
- #include <atomic>
7
-
8
- #include "libipc/def.h"
9
- #include "libipc/mutex.h"
10
- #include "libipc/condition.h"
11
- #include "libipc/platform/detail.h"
12
-
13
- namespace ipc {
14
- namespace detail {
15
-
16
- class waiter {
17
- ipc::sync::condition cond_;
18
- ipc::sync::mutex lock_;
19
- std::atomic<bool> quit_ {false};
20
-
21
- public:
22
- static void init();
23
-
24
- waiter() = default;
25
- waiter(char const *name) {
26
- open(name);
27
- }
28
-
29
- ~waiter() {
30
- close();
31
- }
32
-
33
- bool valid() const noexcept {
34
- return cond_.valid() && lock_.valid();
35
- }
36
-
37
- bool open(char const *name) noexcept {
38
- quit_.store(false, std::memory_order_relaxed);
39
- if (!cond_.open((std::string{"_waiter_cond_"} + name).c_str())) {
40
- return false;
41
- }
42
- if (!lock_.open((std::string{"_waiter_lock_"} + name).c_str())) {
43
- cond_.close();
44
- return false;
45
- }
46
- return valid();
47
- }
48
-
49
- void close() noexcept {
50
- cond_.close();
51
- lock_.close();
52
- }
53
-
54
- template <typename F>
55
- bool wait_if(F &&pred, std::uint64_t tm = ipc::invalid_value) noexcept {
56
- IPC_UNUSED_ std::lock_guard<ipc::sync::mutex> guard {lock_};
57
- while ([this, &pred] {
58
- return !quit_.load(std::memory_order_relaxed)
59
- && std::forward<F>(pred)();
60
- }()) {
61
- if (!cond_.wait(lock_, tm)) return false;
62
- }
63
- return true;
64
- }
65
-
66
- bool notify() noexcept {
67
- std::lock_guard<ipc::sync::mutex>{lock_}; // barrier
68
- return cond_.notify(lock_);
69
- }
70
-
71
- bool broadcast() noexcept {
72
- std::lock_guard<ipc::sync::mutex>{lock_}; // barrier
73
- return cond_.broadcast(lock_);
74
- }
75
-
76
- bool quit_waiting() {
77
- quit_.store(true, std::memory_order_release);
78
- return broadcast();
79
- }
80
- };
81
-
82
- } // namespace detail
83
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/cppipc/来源 DELETED
@@ -1,3 +0,0 @@
1
- https://github.com/mutouyun/cpp-ipc
2
-
3
- A high-performance inter-process communication library using shared memory on Linux/Windows.
 
 
 
 
crazy_functions/test_project/cpp/libJPG/jpgd.cpp DELETED
@@ -1,3276 +0,0 @@
1
- // jpgd.cpp - C++ class for JPEG decompression.
2
- // Public domain, Rich Geldreich <richgel99@gmail.com>
3
- // Last updated Apr. 16, 2011
4
- // Alex Evans: Linear memory allocator (taken from jpge.h).
5
- //
6
- // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
7
- //
8
- // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
9
- // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
10
- // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
11
-
12
- #include "jpgd.h"
13
- #include <string.h>
14
-
15
- #include <assert.h>
16
- // BEGIN EPIC MOD
17
- #define JPGD_ASSERT(x) { assert(x); CA_ASSUME(x); } (void)0
18
- // END EPIC MOD
19
-
20
- #ifdef _MSC_VER
21
- #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
22
- #endif
23
-
24
- // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
25
- // This is slower, but results in higher quality on images with highly saturated colors.
26
- #define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
27
-
28
- #define JPGD_TRUE (1)
29
- #define JPGD_FALSE (0)
30
-
31
- #define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
32
- #define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
33
-
34
- namespace jpgd {
35
-
36
- static inline void *jpgd_malloc(size_t nSize) { return FMemory::Malloc(nSize); }
37
- static inline void jpgd_free(void *p) { FMemory::Free(p); }
38
-
39
- // BEGIN EPIC MOD
40
- //@UE3 - use UE3 BGRA encoding instead of assuming RGBA
41
- // stolen from IImageWrapper.h
42
- enum ERGBFormatJPG
43
- {
44
- Invalid = -1,
45
- RGBA = 0,
46
- BGRA = 1,
47
- Gray = 2,
48
- };
49
- static ERGBFormatJPG jpg_format;
50
- // END EPIC MOD
51
-
52
- // DCT coefficients are stored in this sequence.
53
- static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
54
-
55
- enum JPEG_MARKER
56
- {
57
- M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
58
- M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
59
- M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
60
- M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
61
- M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
62
- };
63
-
64
- enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
65
-
66
- #define CONST_BITS 13
67
- #define PASS1_BITS 2
68
- #define SCALEDONE ((int32)1)
69
-
70
- #define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */
71
- #define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */
72
- #define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */
73
- #define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */
74
- #define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */
75
- #define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */
76
- #define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */
77
- #define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */
78
- #define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */
79
- #define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */
80
- #define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */
81
- #define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */
82
-
83
- #define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n))
84
- #define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
85
-
86
- #define MULTIPLY(var, cnst) ((var) * (cnst))
87
-
88
- #define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
89
-
90
- // Compiler creates a fast path 1D IDCT for X non-zero columns
91
- template <int NONZERO_COLS>
92
- struct Row
93
- {
94
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
95
- {
96
- // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
97
- #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
98
-
99
- const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
100
-
101
- const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
102
- const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
103
- const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
104
-
105
- const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
106
- const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
107
-
108
- const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
109
-
110
- const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
111
-
112
- const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
113
- const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
114
-
115
- const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
116
- const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
117
- const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
118
- const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
119
-
120
- const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
121
- const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
122
- const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
123
- const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
124
-
125
- pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
126
- pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
127
- pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
128
- pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
129
- pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
130
- pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
131
- pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
132
- pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
133
- }
134
- };
135
-
136
- template <>
137
- struct Row<0>
138
- {
139
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
140
- {
141
- #ifdef _MSC_VER
142
- pTemp; pSrc;
143
- #endif
144
- }
145
- };
146
-
147
- template <>
148
- struct Row<1>
149
- {
150
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
151
- {
152
- const int dcval = (pSrc[0] << PASS1_BITS);
153
-
154
- pTemp[0] = dcval;
155
- pTemp[1] = dcval;
156
- pTemp[2] = dcval;
157
- pTemp[3] = dcval;
158
- pTemp[4] = dcval;
159
- pTemp[5] = dcval;
160
- pTemp[6] = dcval;
161
- pTemp[7] = dcval;
162
- }
163
- };
164
-
165
- // Compiler creates a fast path 1D IDCT for X non-zero rows
166
- template <int NONZERO_ROWS>
167
- struct Col
168
- {
169
- static void idct(uint8* pDst_ptr, const int* pTemp)
170
- {
171
- // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
172
- #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
173
-
174
- const int z2 = ACCESS_ROW(2);
175
- const int z3 = ACCESS_ROW(6);
176
-
177
- const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
178
- const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
179
- const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
180
-
181
- const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
182
- const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
183
-
184
- const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
185
-
186
- const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
187
-
188
- const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
189
- const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
190
-
191
- const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
192
- const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
193
- const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
194
- const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
195
-
196
- const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
197
- const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
198
- const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
199
- const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
200
-
201
- int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
202
- pDst_ptr[8*0] = (uint8)CLAMP(i);
203
-
204
- i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
205
- pDst_ptr[8*7] = (uint8)CLAMP(i);
206
-
207
- i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
208
- pDst_ptr[8*1] = (uint8)CLAMP(i);
209
-
210
- i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
211
- pDst_ptr[8*6] = (uint8)CLAMP(i);
212
-
213
- i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
214
- pDst_ptr[8*2] = (uint8)CLAMP(i);
215
-
216
- i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
217
- pDst_ptr[8*5] = (uint8)CLAMP(i);
218
-
219
- i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
220
- pDst_ptr[8*3] = (uint8)CLAMP(i);
221
-
222
- i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
223
- pDst_ptr[8*4] = (uint8)CLAMP(i);
224
- }
225
- };
226
-
227
- template <>
228
- struct Col<1>
229
- {
230
- static void idct(uint8* pDst_ptr, const int* pTemp)
231
- {
232
- int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
233
- const uint8 dcval_clamped = (uint8)CLAMP(dcval);
234
- pDst_ptr[0*8] = dcval_clamped;
235
- pDst_ptr[1*8] = dcval_clamped;
236
- pDst_ptr[2*8] = dcval_clamped;
237
- pDst_ptr[3*8] = dcval_clamped;
238
- pDst_ptr[4*8] = dcval_clamped;
239
- pDst_ptr[5*8] = dcval_clamped;
240
- pDst_ptr[6*8] = dcval_clamped;
241
- pDst_ptr[7*8] = dcval_clamped;
242
- }
243
- };
244
-
245
- static const uint8 s_idct_row_table[] =
246
- {
247
- 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
248
- 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
249
- 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
250
- 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
251
- 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
252
- 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
253
- 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
254
- 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
255
- };
256
-
257
- static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
258
-
259
- void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
260
- {
261
- JPGD_ASSERT(block_max_zag >= 1);
262
- JPGD_ASSERT(block_max_zag <= 64);
263
-
264
- if (block_max_zag == 1)
265
- {
266
- int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
267
- k = CLAMP(k);
268
- k = k | (k<<8);
269
- k = k | (k<<16);
270
-
271
- for (int i = 8; i > 0; i--)
272
- {
273
- *(int*)&pDst_ptr[0] = k;
274
- *(int*)&pDst_ptr[4] = k;
275
- pDst_ptr += 8;
276
- }
277
- return;
278
- }
279
-
280
- int temp[64];
281
-
282
- const jpgd_block_t* pSrc = pSrc_ptr;
283
- int* pTemp = temp;
284
-
285
- const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
286
- int i;
287
- for (i = 8; i > 0; i--, pRow_tab++)
288
- {
289
- switch (*pRow_tab)
290
- {
291
- case 0: Row<0>::idct(pTemp, pSrc); break;
292
- case 1: Row<1>::idct(pTemp, pSrc); break;
293
- case 2: Row<2>::idct(pTemp, pSrc); break;
294
- case 3: Row<3>::idct(pTemp, pSrc); break;
295
- case 4: Row<4>::idct(pTemp, pSrc); break;
296
- case 5: Row<5>::idct(pTemp, pSrc); break;
297
- case 6: Row<6>::idct(pTemp, pSrc); break;
298
- case 7: Row<7>::idct(pTemp, pSrc); break;
299
- case 8: Row<8>::idct(pTemp, pSrc); break;
300
- }
301
-
302
- pSrc += 8;
303
- pTemp += 8;
304
- }
305
-
306
- pTemp = temp;
307
-
308
- const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
309
- for (i = 8; i > 0; i--)
310
- {
311
- switch (nonzero_rows)
312
- {
313
- case 1: Col<1>::idct(pDst_ptr, pTemp); break;
314
- case 2: Col<2>::idct(pDst_ptr, pTemp); break;
315
- case 3: Col<3>::idct(pDst_ptr, pTemp); break;
316
- case 4: Col<4>::idct(pDst_ptr, pTemp); break;
317
- case 5: Col<5>::idct(pDst_ptr, pTemp); break;
318
- case 6: Col<6>::idct(pDst_ptr, pTemp); break;
319
- case 7: Col<7>::idct(pDst_ptr, pTemp); break;
320
- case 8: Col<8>::idct(pDst_ptr, pTemp); break;
321
- }
322
-
323
- pTemp++;
324
- pDst_ptr++;
325
- }
326
- }
327
-
328
- void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
329
- {
330
- int temp[64];
331
- int* pTemp = temp;
332
- const jpgd_block_t* pSrc = pSrc_ptr;
333
-
334
- for (int i = 4; i > 0; i--)
335
- {
336
- Row<4>::idct(pTemp, pSrc);
337
- pSrc += 8;
338
- pTemp += 8;
339
- }
340
-
341
- pTemp = temp;
342
- for (int i = 8; i > 0; i--)
343
- {
344
- Col<4>::idct(pDst_ptr, pTemp);
345
- pTemp++;
346
- pDst_ptr++;
347
- }
348
- }
349
-
350
- // Retrieve one character from the input stream.
351
- inline uint jpeg_decoder::get_char()
352
- {
353
- // Any bytes remaining in buffer?
354
- if (!m_in_buf_left)
355
- {
356
- // Try to get more bytes.
357
- prep_in_buffer();
358
- // Still nothing to get?
359
- if (!m_in_buf_left)
360
- {
361
- // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
362
- int t = m_tem_flag;
363
- m_tem_flag ^= 1;
364
- if (t)
365
- return 0xD9;
366
- else
367
- return 0xFF;
368
- }
369
- }
370
-
371
- uint c = *m_pIn_buf_ofs++;
372
- m_in_buf_left--;
373
-
374
- return c;
375
- }
376
-
377
- // Same as previous method, except can indicate if the character is a pad character or not.
378
- inline uint jpeg_decoder::get_char(bool *pPadding_flag)
379
- {
380
- if (!m_in_buf_left)
381
- {
382
- prep_in_buffer();
383
- if (!m_in_buf_left)
384
- {
385
- *pPadding_flag = true;
386
- int t = m_tem_flag;
387
- m_tem_flag ^= 1;
388
- if (t)
389
- return 0xD9;
390
- else
391
- return 0xFF;
392
- }
393
- }
394
-
395
- *pPadding_flag = false;
396
-
397
- uint c = *m_pIn_buf_ofs++;
398
- m_in_buf_left--;
399
-
400
- return c;
401
- }
402
-
403
- // Inserts a previously retrieved character back into the input buffer.
404
- inline void jpeg_decoder::stuff_char(uint8 q)
405
- {
406
- *(--m_pIn_buf_ofs) = q;
407
- m_in_buf_left++;
408
- }
409
-
410
- // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
411
- inline uint8 jpeg_decoder::get_octet()
412
- {
413
- bool padding_flag;
414
- int c = get_char(&padding_flag);
415
-
416
- if (c == 0xFF)
417
- {
418
- if (padding_flag)
419
- return 0xFF;
420
-
421
- c = get_char(&padding_flag);
422
- if (padding_flag)
423
- {
424
- stuff_char(0xFF);
425
- return 0xFF;
426
- }
427
-
428
- if (c == 0x00)
429
- return 0xFF;
430
- else
431
- {
432
- stuff_char(static_cast<uint8>(c));
433
- stuff_char(0xFF);
434
- return 0xFF;
435
- }
436
- }
437
-
438
- return static_cast<uint8>(c);
439
- }
440
-
441
- // Retrieves a variable number of bits from the input stream. Does not recognize markers.
442
- inline uint jpeg_decoder::get_bits(int num_bits)
443
- {
444
- if (!num_bits)
445
- return 0;
446
-
447
- uint i = m_bit_buf >> (32 - num_bits);
448
-
449
- if ((m_bits_left -= num_bits) <= 0)
450
- {
451
- m_bit_buf <<= (num_bits += m_bits_left);
452
-
453
- uint c1 = get_char();
454
- uint c2 = get_char();
455
- m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
456
-
457
- m_bit_buf <<= -m_bits_left;
458
-
459
- m_bits_left += 16;
460
-
461
- JPGD_ASSERT(m_bits_left >= 0);
462
- }
463
- else
464
- m_bit_buf <<= num_bits;
465
-
466
- return i;
467
- }
468
-
469
- // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
470
- inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
471
- {
472
- if (!num_bits)
473
- return 0;
474
-
475
- uint i = m_bit_buf >> (32 - num_bits);
476
-
477
- if ((m_bits_left -= num_bits) <= 0)
478
- {
479
- m_bit_buf <<= (num_bits += m_bits_left);
480
-
481
- if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
482
- {
483
- uint c1 = get_octet();
484
- uint c2 = get_octet();
485
- m_bit_buf |= (c1 << 8) | c2;
486
- }
487
- else
488
- {
489
- m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
490
- m_in_buf_left -= 2;
491
- m_pIn_buf_ofs += 2;
492
- }
493
-
494
- m_bit_buf <<= -m_bits_left;
495
-
496
- m_bits_left += 16;
497
-
498
- JPGD_ASSERT(m_bits_left >= 0);
499
- }
500
- else
501
- m_bit_buf <<= num_bits;
502
-
503
- return i;
504
- }
505
-
506
- // Decodes a Huffman encoded symbol.
507
- inline int jpeg_decoder::huff_decode(huff_tables *pH)
508
- {
509
- int symbol;
510
-
511
- // Check first 8-bits: do we have a complete symbol?
512
- if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
513
- {
514
- // Decode more bits, use a tree traversal to find symbol.
515
- int ofs = 23;
516
- do
517
- {
518
- symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
519
- ofs--;
520
- } while (symbol < 0);
521
-
522
- get_bits_no_markers(8 + (23 - ofs));
523
- }
524
- else
525
- get_bits_no_markers(pH->code_size[symbol]);
526
-
527
- return symbol;
528
- }
529
-
530
- // Decodes a Huffman encoded symbol.
531
- inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
532
- {
533
- int symbol;
534
-
535
- // Check first 8-bits: do we have a complete symbol?
536
- if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
537
- {
538
- // Use a tree traversal to find symbol.
539
- int ofs = 23;
540
- do
541
- {
542
- symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
543
- ofs--;
544
- } while (symbol < 0);
545
-
546
- get_bits_no_markers(8 + (23 - ofs));
547
-
548
- extra_bits = get_bits_no_markers(symbol & 0xF);
549
- }
550
- else
551
- {
552
- JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
553
-
554
- if (symbol & 0x8000)
555
- {
556
- get_bits_no_markers((symbol >> 8) & 31);
557
- extra_bits = symbol >> 16;
558
- }
559
- else
560
- {
561
- int code_size = (symbol >> 8) & 31;
562
- int num_extra_bits = symbol & 0xF;
563
- int bits = code_size + num_extra_bits;
564
- if (bits <= (m_bits_left + 16))
565
- extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
566
- else
567
- {
568
- get_bits_no_markers(code_size);
569
- extra_bits = get_bits_no_markers(num_extra_bits);
570
- }
571
- }
572
-
573
- symbol &= 0xFF;
574
- }
575
-
576
- return symbol;
577
- }
578
-
579
- // Tables and macro used to fully decode the DPCM differences.
580
- static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
581
- static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 };
582
- static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) };
583
- #define HUFF_EXTEND(x,s) ((x) < s_extend_test[s] ? (x) + s_extend_offset[s] : (x))
584
-
585
- // Clamps a value between 0-255.
586
- inline uint8 jpeg_decoder::clamp(int i)
587
- {
588
- if (static_cast<uint>(i) > 255)
589
- i = (((~i) >> 31) & 0xFF);
590
-
591
- return static_cast<uint8>(i);
592
- }
593
-
594
- namespace DCT_Upsample
595
- {
596
- struct Matrix44
597
- {
598
- typedef int Element_Type;
599
- enum { NUM_ROWS = 4, NUM_COLS = 4 };
600
-
601
- Element_Type v[NUM_ROWS][NUM_COLS];
602
-
603
- inline int rows() const { return NUM_ROWS; }
604
- inline int cols() const { return NUM_COLS; }
605
-
606
- inline const Element_Type & at(int r, int c) const { return v[r][c]; }
607
- inline Element_Type & at(int r, int c) { return v[r][c]; }
608
-
609
- inline Matrix44() { }
610
-
611
- inline Matrix44& operator += (const Matrix44& a)
612
- {
613
- for (int r = 0; r < NUM_ROWS; r++)
614
- {
615
- at(r, 0) += a.at(r, 0);
616
- at(r, 1) += a.at(r, 1);
617
- at(r, 2) += a.at(r, 2);
618
- at(r, 3) += a.at(r, 3);
619
- }
620
- return *this;
621
- }
622
-
623
- inline Matrix44& operator -= (const Matrix44& a)
624
- {
625
- for (int r = 0; r < NUM_ROWS; r++)
626
- {
627
- at(r, 0) -= a.at(r, 0);
628
- at(r, 1) -= a.at(r, 1);
629
- at(r, 2) -= a.at(r, 2);
630
- at(r, 3) -= a.at(r, 3);
631
- }
632
- return *this;
633
- }
634
-
635
- friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
636
- {
637
- Matrix44 ret;
638
- for (int r = 0; r < NUM_ROWS; r++)
639
- {
640
- ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
641
- ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
642
- ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
643
- ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
644
- }
645
- return ret;
646
- }
647
-
648
- friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
649
- {
650
- Matrix44 ret;
651
- for (int r = 0; r < NUM_ROWS; r++)
652
- {
653
- ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
654
- ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
655
- ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
656
- ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
657
- }
658
- return ret;
659
- }
660
-
661
- static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
662
- {
663
- for (int r = 0; r < 4; r++)
664
- {
665
- pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
666
- pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
667
- pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
668
- pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
669
- }
670
- }
671
-
672
- static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
673
- {
674
- for (int r = 0; r < 4; r++)
675
- {
676
- pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
677
- pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
678
- pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
679
- pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
680
- }
681
- }
682
- };
683
-
684
- const int FRACT_BITS = 10;
685
- const int SCALE = 1 << FRACT_BITS;
686
-
687
- typedef int Temp_Type;
688
- #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
689
- #define F(i) ((int)((i) * SCALE + .5f))
690
-
691
- // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
692
- #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
693
-
694
- // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
695
- template<int NUM_ROWS, int NUM_COLS>
696
- struct P_Q
697
- {
698
- static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
699
- {
700
- // 4x8 = 4x8 times 8x8, matrix 0 is constant
701
- const Temp_Type X000 = AT(0, 0);
702
- const Temp_Type X001 = AT(0, 1);
703
- const Temp_Type X002 = AT(0, 2);
704
- const Temp_Type X003 = AT(0, 3);
705
- const Temp_Type X004 = AT(0, 4);
706
- const Temp_Type X005 = AT(0, 5);
707
- const Temp_Type X006 = AT(0, 6);
708
- const Temp_Type X007 = AT(0, 7);
709
- const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
710
- const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
711
- const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
712
- const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
713
- const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
714
- const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
715
- const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
716
- const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
717
- const Temp_Type X020 = AT(4, 0);
718
- const Temp_Type X021 = AT(4, 1);
719
- const Temp_Type X022 = AT(4, 2);
720
- const Temp_Type X023 = AT(4, 3);
721
- const Temp_Type X024 = AT(4, 4);
722
- const Temp_Type X025 = AT(4, 5);
723
- const Temp_Type X026 = AT(4, 6);
724
- const Temp_Type X027 = AT(4, 7);
725
- const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
726
- const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
727
- const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
728
- const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
729
- const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
730
- const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
731
- const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
732
- const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
733
-
734
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
735
- P.at(0, 0) = X000;
736
- P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
737
- P.at(0, 2) = X004;
738
- P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
739
- P.at(1, 0) = X010;
740
- P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
741
- P.at(1, 2) = X014;
742
- P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
743
- P.at(2, 0) = X020;
744
- P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
745
- P.at(2, 2) = X024;
746
- P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
747
- P.at(3, 0) = X030;
748
- P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
749
- P.at(3, 2) = X034;
750
- P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
751
- // 40 muls 24 adds
752
-
753
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
754
- Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
755
- Q.at(0, 1) = X002;
756
- Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
757
- Q.at(0, 3) = X006;
758
- Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
759
- Q.at(1, 1) = X012;
760
- Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
761
- Q.at(1, 3) = X016;
762
- Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
763
- Q.at(2, 1) = X022;
764
- Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
765
- Q.at(2, 3) = X026;
766
- Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
767
- Q.at(3, 1) = X032;
768
- Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
769
- Q.at(3, 3) = X036;
770
- // 40 muls 24 adds
771
- }
772
- };
773
-
774
- template<int NUM_ROWS, int NUM_COLS>
775
- struct R_S
776
- {
777
- static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
778
- {
779
- // 4x8 = 4x8 times 8x8, matrix 0 is constant
780
- const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
781
- const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
782
- const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
783
- const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
784
- const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
785
- const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
786
- const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
787
- const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
788
- const Temp_Type X110 = AT(2, 0);
789
- const Temp_Type X111 = AT(2, 1);
790
- const Temp_Type X112 = AT(2, 2);
791
- const Temp_Type X113 = AT(2, 3);
792
- const Temp_Type X114 = AT(2, 4);
793
- const Temp_Type X115 = AT(2, 5);
794
- const Temp_Type X116 = AT(2, 6);
795
- const Temp_Type X117 = AT(2, 7);
796
- const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
797
- const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
798
- const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
799
- const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
800
- const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
801
- const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
802
- const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
803
- const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
804
- const Temp_Type X130 = AT(6, 0);
805
- const Temp_Type X131 = AT(6, 1);
806
- const Temp_Type X132 = AT(6, 2);
807
- const Temp_Type X133 = AT(6, 3);
808
- const Temp_Type X134 = AT(6, 4);
809
- const Temp_Type X135 = AT(6, 5);
810
- const Temp_Type X136 = AT(6, 6);
811
- const Temp_Type X137 = AT(6, 7);
812
- // 80 muls 48 adds
813
-
814
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
815
- R.at(0, 0) = X100;
816
- R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
817
- R.at(0, 2) = X104;
818
- R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
819
- R.at(1, 0) = X110;
820
- R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
821
- R.at(1, 2) = X114;
822
- R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
823
- R.at(2, 0) = X120;
824
- R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
825
- R.at(2, 2) = X124;
826
- R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
827
- R.at(3, 0) = X130;
828
- R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
829
- R.at(3, 2) = X134;
830
- R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
831
- // 40 muls 24 adds
832
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
833
- S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
834
- S.at(0, 1) = X102;
835
- S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
836
- S.at(0, 3) = X106;
837
- S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
838
- S.at(1, 1) = X112;
839
- S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
840
- S.at(1, 3) = X116;
841
- S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
842
- S.at(2, 1) = X122;
843
- S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
844
- S.at(2, 3) = X126;
845
- S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
846
- S.at(3, 1) = X132;
847
- S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
848
- S.at(3, 3) = X136;
849
- // 40 muls 24 adds
850
- }
851
- };
852
- } // end namespace DCT_Upsample
853
-
854
- // Unconditionally frees all allocated m_blocks.
855
- void jpeg_decoder::free_all_blocks()
856
- {
857
- m_pStream = NULL;
858
- for (mem_block *b = m_pMem_blocks; b; )
859
- {
860
- mem_block *n = b->m_pNext;
861
- jpgd_free(b);
862
- b = n;
863
- }
864
- m_pMem_blocks = NULL;
865
- }
866
-
867
- // This method handles all errors.
868
- // It could easily be changed to use C++ exceptions.
869
- void jpeg_decoder::stop_decoding(jpgd_status status)
870
- {
871
- m_error_code = status;
872
- free_all_blocks();
873
- longjmp(m_jmp_state, status);
874
-
875
- // we shouldn't get here as longjmp shouldn't return, but we put it here to make it explicit
876
- // that this function doesn't return, otherwise we get this error:
877
- //
878
- // error : function declared 'noreturn' should not return
879
- exit(1);
880
- }
881
-
882
- void *jpeg_decoder::alloc(size_t nSize, bool zero)
883
- {
884
- nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
885
- char *rv = NULL;
886
- for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext)
887
- {
888
- if ((b->m_used_count + nSize) <= b->m_size)
889
- {
890
- rv = b->m_data + b->m_used_count;
891
- b->m_used_count += nSize;
892
- break;
893
- }
894
- }
895
- if (!rv)
896
- {
897
- int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
898
- mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
899
- if (!b) stop_decoding(JPGD_NOTENOUGHMEM);
900
- b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
901
- b->m_used_count = nSize;
902
- b->m_size = capacity;
903
- rv = b->m_data;
904
- }
905
- if (zero) memset(rv, 0, nSize);
906
- return rv;
907
- }
908
-
909
- void jpeg_decoder::word_clear(void *p, uint16 c, uint n)
910
- {
911
- uint8 *pD = (uint8*)p;
912
- const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
913
- while (n)
914
- {
915
- pD[0] = l; pD[1] = h; pD += 2;
916
- n--;
917
- }
918
- }
919
-
920
- // Refill the input buffer.
921
- // This method will sit in a loop until (A) the buffer is full or (B)
922
- // the stream's read() method reports and end of file condition.
923
- void jpeg_decoder::prep_in_buffer()
924
- {
925
- m_in_buf_left = 0;
926
- m_pIn_buf_ofs = m_in_buf;
927
-
928
- if (m_eof_flag)
929
- return;
930
-
931
- do
932
- {
933
- int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
934
- if (bytes_read == -1)
935
- stop_decoding(JPGD_STREAM_READ);
936
-
937
- m_in_buf_left += bytes_read;
938
- } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
939
-
940
- m_total_bytes_read += m_in_buf_left;
941
-
942
- // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
943
- // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
944
- word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
945
- }
946
-
947
- // Read a Huffman code table.
948
- void jpeg_decoder::read_dht_marker()
949
- {
950
- int i, index, count;
951
- uint8 huff_num[17];
952
- uint8 huff_val[256];
953
-
954
- uint num_left = get_bits(16);
955
-
956
- if (num_left < 2)
957
- stop_decoding(JPGD_BAD_DHT_MARKER);
958
-
959
- num_left -= 2;
960
-
961
- while (num_left)
962
- {
963
- index = get_bits(8);
964
-
965
- huff_num[0] = 0;
966
-
967
- count = 0;
968
-
969
- for (i = 1; i <= 16; i++)
970
- {
971
- huff_num[i] = static_cast<uint8>(get_bits(8));
972
- count += huff_num[i];
973
- }
974
-
975
- if (count > 255)
976
- stop_decoding(JPGD_BAD_DHT_COUNTS);
977
-
978
- for (i = 0; i < count; i++)
979
- huff_val[i] = static_cast<uint8>(get_bits(8));
980
-
981
- i = 1 + 16 + count;
982
-
983
- if (num_left < (uint)i)
984
- stop_decoding(JPGD_BAD_DHT_MARKER);
985
-
986
- num_left -= i;
987
-
988
- if ((index & 0x10) > 0x10)
989
- stop_decoding(JPGD_BAD_DHT_INDEX);
990
-
991
- index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
992
-
993
- if (index >= JPGD_MAX_HUFF_TABLES)
994
- stop_decoding(JPGD_BAD_DHT_INDEX);
995
-
996
- if (!m_huff_num[index])
997
- m_huff_num[index] = (uint8 *)alloc(17);
998
-
999
- if (!m_huff_val[index])
1000
- m_huff_val[index] = (uint8 *)alloc(256);
1001
-
1002
- m_huff_ac[index] = (index & 0x10) != 0;
1003
- memcpy(m_huff_num[index], huff_num, 17);
1004
- memcpy(m_huff_val[index], huff_val, 256);
1005
- }
1006
- }
1007
-
1008
- // Read a quantization table.
1009
- void jpeg_decoder::read_dqt_marker()
1010
- {
1011
- int n, i, prec;
1012
- uint num_left;
1013
- uint temp;
1014
-
1015
- num_left = get_bits(16);
1016
-
1017
- if (num_left < 2)
1018
- stop_decoding(JPGD_BAD_DQT_MARKER);
1019
-
1020
- num_left -= 2;
1021
-
1022
- while (num_left)
1023
- {
1024
- n = get_bits(8);
1025
- prec = n >> 4;
1026
- n &= 0x0F;
1027
-
1028
- if (n >= JPGD_MAX_QUANT_TABLES)
1029
- stop_decoding(JPGD_BAD_DQT_TABLE);
1030
-
1031
- if (!m_quant[n])
1032
- m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
1033
-
1034
- // read quantization entries, in zag order
1035
- for (i = 0; i < 64; i++)
1036
- {
1037
- temp = get_bits(8);
1038
-
1039
- if (prec)
1040
- temp = (temp << 8) + get_bits(8);
1041
-
1042
- m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
1043
- }
1044
-
1045
- i = 64 + 1;
1046
-
1047
- if (prec)
1048
- i += 64;
1049
-
1050
- if (num_left < (uint)i)
1051
- stop_decoding(JPGD_BAD_DQT_LENGTH);
1052
-
1053
- num_left -= i;
1054
- }
1055
- }
1056
-
1057
- // Read the start of frame (SOF) marker.
1058
- void jpeg_decoder::read_sof_marker()
1059
- {
1060
- int i;
1061
- uint num_left;
1062
-
1063
- num_left = get_bits(16);
1064
-
1065
- if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */
1066
- stop_decoding(JPGD_BAD_PRECISION);
1067
-
1068
- m_image_y_size = get_bits(16);
1069
-
1070
- if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1071
- stop_decoding(JPGD_BAD_HEIGHT);
1072
-
1073
- m_image_x_size = get_bits(16);
1074
-
1075
- if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1076
- stop_decoding(JPGD_BAD_WIDTH);
1077
-
1078
- m_comps_in_frame = get_bits(8);
1079
-
1080
- if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1081
- stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1082
-
1083
- if (num_left != (uint)(m_comps_in_frame * 3 + 8))
1084
- stop_decoding(JPGD_BAD_SOF_LENGTH);
1085
-
1086
- for (i = 0; i < m_comps_in_frame; i++)
1087
- {
1088
- m_comp_ident[i] = get_bits(8);
1089
- m_comp_h_samp[i] = get_bits(4);
1090
- m_comp_v_samp[i] = get_bits(4);
1091
- m_comp_quant[i] = get_bits(8);
1092
- }
1093
- }
1094
-
1095
- // Used to skip unrecognized markers.
1096
- void jpeg_decoder::skip_variable_marker()
1097
- {
1098
- uint num_left;
1099
-
1100
- num_left = get_bits(16);
1101
-
1102
- if (num_left < 2)
1103
- stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1104
-
1105
- num_left -= 2;
1106
-
1107
- while (num_left)
1108
- {
1109
- get_bits(8);
1110
- num_left--;
1111
- }
1112
- }
1113
-
1114
- // Read a define restart interval (DRI) marker.
1115
- void jpeg_decoder::read_dri_marker()
1116
- {
1117
- if (get_bits(16) != 4)
1118
- stop_decoding(JPGD_BAD_DRI_LENGTH);
1119
-
1120
- m_restart_interval = get_bits(16);
1121
- }
1122
-
1123
- // Read a start of scan (SOS) marker.
1124
- void jpeg_decoder::read_sos_marker()
1125
- {
1126
- uint num_left;
1127
- int i, ci, n, c, cc;
1128
-
1129
- num_left = get_bits(16);
1130
-
1131
- n = get_bits(8);
1132
-
1133
- m_comps_in_scan = n;
1134
-
1135
- num_left -= 3;
1136
-
1137
- if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1138
- stop_decoding(JPGD_BAD_SOS_LENGTH);
1139
-
1140
- for (i = 0; i < n; i++)
1141
- {
1142
- cc = get_bits(8);
1143
- c = get_bits(8);
1144
- num_left -= 2;
1145
-
1146
- for (ci = 0; ci < m_comps_in_frame; ci++)
1147
- if (cc == m_comp_ident[ci])
1148
- break;
1149
-
1150
- if (ci >= m_comps_in_frame)
1151
- stop_decoding(JPGD_BAD_SOS_COMP_ID);
1152
-
1153
- m_comp_list[i] = ci;
1154
- m_comp_dc_tab[ci] = (c >> 4) & 15;
1155
- m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1156
- }
1157
-
1158
- m_spectral_start = get_bits(8);
1159
- m_spectral_end = get_bits(8);
1160
- m_successive_high = get_bits(4);
1161
- m_successive_low = get_bits(4);
1162
-
1163
- if (!m_progressive_flag)
1164
- {
1165
- m_spectral_start = 0;
1166
- m_spectral_end = 63;
1167
- }
1168
-
1169
- num_left -= 3;
1170
-
1171
- while (num_left) /* read past whatever is num_left */
1172
- {
1173
- get_bits(8);
1174
- num_left--;
1175
- }
1176
- }
1177
-
1178
- // Finds the next marker.
1179
- int jpeg_decoder::next_marker()
1180
- {
1181
- uint c, bytes;
1182
-
1183
- bytes = 0;
1184
-
1185
- do
1186
- {
1187
- do
1188
- {
1189
- bytes++;
1190
- c = get_bits(8);
1191
- } while (c != 0xFF);
1192
-
1193
- do
1194
- {
1195
- c = get_bits(8);
1196
- } while (c == 0xFF);
1197
-
1198
- } while (c == 0);
1199
-
1200
- // If bytes > 0 here, there where extra bytes before the marker (not good).
1201
-
1202
- return c;
1203
- }
1204
-
1205
- // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1206
- // encountered.
1207
- int jpeg_decoder::process_markers()
1208
- {
1209
- int c;
1210
-
1211
- for ( ; ; )
1212
- {
1213
- c = next_marker();
1214
-
1215
- switch (c)
1216
- {
1217
- case M_SOF0:
1218
- case M_SOF1:
1219
- case M_SOF2:
1220
- case M_SOF3:
1221
- case M_SOF5:
1222
- case M_SOF6:
1223
- case M_SOF7:
1224
- // case M_JPG:
1225
- case M_SOF9:
1226
- case M_SOF10:
1227
- case M_SOF11:
1228
- case M_SOF13:
1229
- case M_SOF14:
1230
- case M_SOF15:
1231
- case M_SOI:
1232
- case M_EOI:
1233
- case M_SOS:
1234
- {
1235
- return c;
1236
- }
1237
- case M_DHT:
1238
- {
1239
- read_dht_marker();
1240
- break;
1241
- }
1242
- // No arithmitic support - dumb patents!
1243
- case M_DAC:
1244
- {
1245
- stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1246
- break;
1247
- }
1248
- case M_DQT:
1249
- {
1250
- read_dqt_marker();
1251
- break;
1252
- }
1253
- case M_DRI:
1254
- {
1255
- read_dri_marker();
1256
- break;
1257
- }
1258
- //case M_APP0: /* no need to read the JFIF marker */
1259
-
1260
- case M_JPG:
1261
- case M_RST0: /* no parameters */
1262
- case M_RST1:
1263
- case M_RST2:
1264
- case M_RST3:
1265
- case M_RST4:
1266
- case M_RST5:
1267
- case M_RST6:
1268
- case M_RST7:
1269
- case M_TEM:
1270
- {
1271
- stop_decoding(JPGD_UNEXPECTED_MARKER);
1272
- break;
1273
- }
1274
- default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1275
- {
1276
- skip_variable_marker();
1277
- break;
1278
- }
1279
- }
1280
- }
1281
- }
1282
-
1283
- // Finds the start of image (SOI) marker.
1284
- // This code is rather defensive: it only checks the first 512 bytes to avoid
1285
- // false positives.
1286
- void jpeg_decoder::locate_soi_marker()
1287
- {
1288
- uint lastchar, thischar;
1289
- uint bytesleft;
1290
-
1291
- lastchar = get_bits(8);
1292
-
1293
- thischar = get_bits(8);
1294
-
1295
- /* ok if it's a normal JPEG file without a special header */
1296
-
1297
- if ((lastchar == 0xFF) && (thischar == M_SOI))
1298
- return;
1299
-
1300
- bytesleft = 4096; //512;
1301
-
1302
- for ( ; ; )
1303
- {
1304
- if (--bytesleft == 0)
1305
- stop_decoding(JPGD_NOT_JPEG);
1306
-
1307
- lastchar = thischar;
1308
-
1309
- thischar = get_bits(8);
1310
-
1311
- if (lastchar == 0xFF)
1312
- {
1313
- if (thischar == M_SOI)
1314
- break;
1315
- else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1316
- stop_decoding(JPGD_NOT_JPEG);
1317
- }
1318
- }
1319
-
1320
- // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1321
- thischar = (m_bit_buf >> 24) & 0xFF;
1322
-
1323
- if (thischar != 0xFF)
1324
- stop_decoding(JPGD_NOT_JPEG);
1325
- }
1326
-
1327
- // Find a start of frame (SOF) marker.
1328
- void jpeg_decoder::locate_sof_marker()
1329
- {
1330
- locate_soi_marker();
1331
-
1332
- int c = process_markers();
1333
-
1334
- switch (c)
1335
- {
1336
- case M_SOF2:
1337
- m_progressive_flag = JPGD_TRUE;
1338
- case M_SOF0: /* baseline DCT */
1339
- case M_SOF1: /* extended sequential DCT */
1340
- {
1341
- read_sof_marker();
1342
- break;
1343
- }
1344
- case M_SOF9: /* Arithmitic coding */
1345
- {
1346
- stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1347
- break;
1348
- }
1349
- default:
1350
- {
1351
- stop_decoding(JPGD_UNSUPPORTED_MARKER);
1352
- break;
1353
- }
1354
- }
1355
- }
1356
-
1357
- // Find a start of scan (SOS) marker.
1358
- int jpeg_decoder::locate_sos_marker()
1359
- {
1360
- int c;
1361
-
1362
- c = process_markers();
1363
-
1364
- if (c == M_EOI)
1365
- return JPGD_FALSE;
1366
- else if (c != M_SOS)
1367
- stop_decoding(JPGD_UNEXPECTED_MARKER);
1368
-
1369
- read_sos_marker();
1370
-
1371
- return JPGD_TRUE;
1372
- }
1373
-
1374
- // Reset everything to default/uninitialized state.
1375
- void jpeg_decoder::init(jpeg_decoder_stream *pStream)
1376
- {
1377
- m_pMem_blocks = NULL;
1378
- m_error_code = JPGD_SUCCESS;
1379
- m_ready_flag = false;
1380
- m_image_x_size = m_image_y_size = 0;
1381
- m_pStream = pStream;
1382
- m_progressive_flag = JPGD_FALSE;
1383
-
1384
- memset(m_huff_ac, 0, sizeof(m_huff_ac));
1385
- memset(m_huff_num, 0, sizeof(m_huff_num));
1386
- memset(m_huff_val, 0, sizeof(m_huff_val));
1387
- memset(m_quant, 0, sizeof(m_quant));
1388
-
1389
- m_scan_type = 0;
1390
- m_comps_in_frame = 0;
1391
-
1392
- memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
1393
- memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
1394
- memset(m_comp_quant, 0, sizeof(m_comp_quant));
1395
- memset(m_comp_ident, 0, sizeof(m_comp_ident));
1396
- memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
1397
- memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
1398
-
1399
- m_comps_in_scan = 0;
1400
- memset(m_comp_list, 0, sizeof(m_comp_list));
1401
- memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
1402
- memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
1403
-
1404
- m_spectral_start = 0;
1405
- m_spectral_end = 0;
1406
- m_successive_low = 0;
1407
- m_successive_high = 0;
1408
- m_max_mcu_x_size = 0;
1409
- m_max_mcu_y_size = 0;
1410
- m_blocks_per_mcu = 0;
1411
- m_max_blocks_per_row = 0;
1412
- m_mcus_per_row = 0;
1413
- m_mcus_per_col = 0;
1414
- m_expanded_blocks_per_component = 0;
1415
- m_expanded_blocks_per_mcu = 0;
1416
- m_expanded_blocks_per_row = 0;
1417
- m_freq_domain_chroma_upsample = false;
1418
-
1419
- memset(m_mcu_org, 0, sizeof(m_mcu_org));
1420
-
1421
- m_total_lines_left = 0;
1422
- m_mcu_lines_left = 0;
1423
- m_real_dest_bytes_per_scan_line = 0;
1424
- m_dest_bytes_per_scan_line = 0;
1425
- m_dest_bytes_per_pixel = 0;
1426
-
1427
- memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
1428
-
1429
- memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
1430
- memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
1431
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1432
-
1433
- m_eob_run = 0;
1434
-
1435
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1436
-
1437
- m_pIn_buf_ofs = m_in_buf;
1438
- m_in_buf_left = 0;
1439
- m_eof_flag = false;
1440
- m_tem_flag = 0;
1441
-
1442
- memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
1443
- memset(m_in_buf, 0, sizeof(m_in_buf));
1444
- memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
1445
-
1446
- m_restart_interval = 0;
1447
- m_restarts_left = 0;
1448
- m_next_restart_num = 0;
1449
-
1450
- m_max_mcus_per_row = 0;
1451
- m_max_blocks_per_mcu = 0;
1452
- m_max_mcus_per_col = 0;
1453
-
1454
- memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
1455
- m_pMCU_coefficients = NULL;
1456
- m_pSample_buf = NULL;
1457
-
1458
- m_total_bytes_read = 0;
1459
-
1460
- m_pScan_line_0 = NULL;
1461
- m_pScan_line_1 = NULL;
1462
-
1463
- // Ready the input buffer.
1464
- prep_in_buffer();
1465
-
1466
- // Prime the bit buffer.
1467
- m_bits_left = 16;
1468
- m_bit_buf = 0;
1469
-
1470
- get_bits(16);
1471
- get_bits(16);
1472
-
1473
- for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1474
- m_mcu_block_max_zag[i] = 64;
1475
- }
1476
-
1477
- #define SCALEBITS 16
1478
- #define ONE_HALF ((int) 1 << (SCALEBITS-1))
1479
- #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
1480
-
1481
- // Create a few tables that allow us to quickly convert YCbCr to RGB.
1482
- void jpeg_decoder::create_look_ups()
1483
- {
1484
- for (int i = 0; i <= 255; i++)
1485
- {
1486
- int k = i - 128;
1487
- m_crr[i] = ( FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS;
1488
- m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
1489
- m_crg[i] = (-FIX(0.71414f)) * k;
1490
- m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
1491
- }
1492
- }
1493
-
1494
- // This method throws back into the stream any bytes that where read
1495
- // into the bit buffer during initial marker scanning.
1496
- void jpeg_decoder::fix_in_buffer()
1497
- {
1498
- // In case any 0xFF's where pulled into the buffer during marker scanning.
1499
- JPGD_ASSERT((m_bits_left & 7) == 0);
1500
-
1501
- if (m_bits_left == 16)
1502
- stuff_char( (uint8)(m_bit_buf & 0xFF));
1503
-
1504
- if (m_bits_left >= 8)
1505
- stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF));
1506
-
1507
- stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
1508
- stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
1509
-
1510
- m_bits_left = 16;
1511
- get_bits_no_markers(16);
1512
- get_bits_no_markers(16);
1513
- }
1514
-
1515
- void jpeg_decoder::transform_mcu(int mcu_row)
1516
- {
1517
- jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1518
- uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1519
-
1520
- for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1521
- {
1522
- idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1523
- pSrc_ptr += 64;
1524
- pDst_ptr += 64;
1525
- }
1526
- }
1527
-
1528
- static const uint8 s_max_rc[64] =
1529
- {
1530
- 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1531
- 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1532
- 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1533
- 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1534
- };
1535
-
1536
- void jpeg_decoder::transform_mcu_expand(int mcu_row)
1537
- {
1538
- jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1539
- uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1540
-
1541
- // Y IDCT
1542
- int mcu_block;
1543
- for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1544
- {
1545
- idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1546
- pSrc_ptr += 64;
1547
- pDst_ptr += 64;
1548
- }
1549
-
1550
- // Chroma IDCT, with upsampling
1551
- jpgd_block_t temp_block[64];
1552
-
1553
- for (int i = 0; i < 2; i++)
1554
- {
1555
- DCT_Upsample::Matrix44 P, Q, R, S;
1556
-
1557
- JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
1558
- JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
1559
-
1560
- switch (s_max_rc[m_mcu_block_max_zag[mcu_block++] - 1])
1561
- {
1562
- case 1*16+1:
1563
- DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
1564
- DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
1565
- break;
1566
- case 1*16+2:
1567
- DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
1568
- DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
1569
- break;
1570
- case 2*16+2:
1571
- DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
1572
- DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
1573
- break;
1574
- case 3*16+2:
1575
- DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
1576
- DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
1577
- break;
1578
- case 3*16+3:
1579
- DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
1580
- DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
1581
- break;
1582
- case 3*16+4:
1583
- DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
1584
- DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
1585
- break;
1586
- case 4*16+4:
1587
- DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
1588
- DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
1589
- break;
1590
- case 5*16+4:
1591
- DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
1592
- DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
1593
- break;
1594
- case 5*16+5:
1595
- DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
1596
- DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
1597
- break;
1598
- case 5*16+6:
1599
- DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
1600
- DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
1601
- break;
1602
- case 6*16+6:
1603
- DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
1604
- DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
1605
- break;
1606
- case 7*16+6:
1607
- DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
1608
- DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
1609
- break;
1610
- case 7*16+7:
1611
- DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
1612
- DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
1613
- break;
1614
- case 7*16+8:
1615
- DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
1616
- DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
1617
- break;
1618
- case 8*16+8:
1619
- DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
1620
- DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
1621
- break;
1622
- default:
1623
- JPGD_ASSERT(false);
1624
- }
1625
-
1626
- DCT_Upsample::Matrix44 a(P + Q); P -= Q;
1627
- DCT_Upsample::Matrix44& b = P;
1628
- DCT_Upsample::Matrix44 c(R + S); R -= S;
1629
- DCT_Upsample::Matrix44& d = R;
1630
-
1631
- DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
1632
- idct_4x4(temp_block, pDst_ptr);
1633
- pDst_ptr += 64;
1634
-
1635
- DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
1636
- idct_4x4(temp_block, pDst_ptr);
1637
- pDst_ptr += 64;
1638
-
1639
- DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
1640
- idct_4x4(temp_block, pDst_ptr);
1641
- pDst_ptr += 64;
1642
-
1643
- DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
1644
- idct_4x4(temp_block, pDst_ptr);
1645
- pDst_ptr += 64;
1646
-
1647
- pSrc_ptr += 64;
1648
- }
1649
- }
1650
-
1651
- // Loads and dequantizes the next row of (already decoded) coefficients.
1652
- // Progressive images only.
1653
- void jpeg_decoder::load_next_row()
1654
- {
1655
- int i;
1656
- jpgd_block_t *p;
1657
- jpgd_quant_t *q;
1658
- int mcu_row, mcu_block, row_block = 0;
1659
- int component_num, component_id;
1660
- int block_x_mcu[JPGD_MAX_COMPONENTS];
1661
-
1662
- memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
1663
-
1664
- for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1665
- {
1666
- int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1667
-
1668
- for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1669
- {
1670
- component_id = m_mcu_org[mcu_block];
1671
- q = m_quant[m_comp_quant[component_id]];
1672
-
1673
- p = m_pMCU_coefficients + 64 * mcu_block;
1674
-
1675
- jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1676
- jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1677
- p[0] = pDC[0];
1678
- memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
1679
-
1680
- for (i = 63; i > 0; i--)
1681
- if (p[g_ZAG[i]])
1682
- break;
1683
-
1684
- m_mcu_block_max_zag[mcu_block] = i + 1;
1685
-
1686
- for ( ; i >= 0; i--)
1687
- if (p[g_ZAG[i]])
1688
- p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
1689
-
1690
- row_block++;
1691
-
1692
- if (m_comps_in_scan == 1)
1693
- block_x_mcu[component_id]++;
1694
- else
1695
- {
1696
- if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
1697
- {
1698
- block_x_mcu_ofs = 0;
1699
-
1700
- if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
1701
- {
1702
- block_y_mcu_ofs = 0;
1703
-
1704
- block_x_mcu[component_id] += m_comp_h_samp[component_id];
1705
- }
1706
- }
1707
- }
1708
- }
1709
-
1710
- if (m_freq_domain_chroma_upsample)
1711
- transform_mcu_expand(mcu_row);
1712
- else
1713
- transform_mcu(mcu_row);
1714
- }
1715
-
1716
- if (m_comps_in_scan == 1)
1717
- m_block_y_mcu[m_comp_list[0]]++;
1718
- else
1719
- {
1720
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1721
- {
1722
- component_id = m_comp_list[component_num];
1723
-
1724
- m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
1725
- }
1726
- }
1727
- }
1728
-
1729
- // Restart interval processing.
1730
- void jpeg_decoder::process_restart()
1731
- {
1732
- int i;
1733
- int c = 0;
1734
-
1735
- // Align to a byte boundry
1736
- // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1737
- //get_bits_no_markers(m_bits_left & 7);
1738
-
1739
- // Let's scan a little bit to find the marker, but not _too_ far.
1740
- // 1536 is a "fudge factor" that determines how much to scan.
1741
- for (i = 1536; i > 0; i--)
1742
- if (get_char() == 0xFF)
1743
- break;
1744
-
1745
- if (i == 0)
1746
- stop_decoding(JPGD_BAD_RESTART_MARKER);
1747
-
1748
- for ( ; i > 0; i--)
1749
- if ((c = get_char()) != 0xFF)
1750
- break;
1751
-
1752
- if (i == 0)
1753
- stop_decoding(JPGD_BAD_RESTART_MARKER);
1754
-
1755
- // Is it the expected marker? If not, something bad happened.
1756
- if (c != (m_next_restart_num + M_RST0))
1757
- stop_decoding(JPGD_BAD_RESTART_MARKER);
1758
-
1759
- // Reset each component's DC prediction values.
1760
- memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
1761
-
1762
- m_eob_run = 0;
1763
-
1764
- m_restarts_left = m_restart_interval;
1765
-
1766
- m_next_restart_num = (m_next_restart_num + 1) & 7;
1767
-
1768
- // Get the bit buffer going again...
1769
-
1770
- m_bits_left = 16;
1771
- get_bits_no_markers(16);
1772
- get_bits_no_markers(16);
1773
- }
1774
-
1775
- static inline int dequantize_ac(int c, int q) { c *= q; return c; }
1776
-
1777
- // Decodes and dequantizes the next row of coefficients.
1778
- void jpeg_decoder::decode_next_row()
1779
- {
1780
- int row_block = 0;
1781
-
1782
- for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1783
- {
1784
- if ((m_restart_interval) && (m_restarts_left == 0))
1785
- process_restart();
1786
-
1787
- jpgd_block_t* p = m_pMCU_coefficients;
1788
- for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1789
- {
1790
- int component_id = m_mcu_org[mcu_block];
1791
- jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
1792
-
1793
- int r, s;
1794
- s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
1795
- s = HUFF_EXTEND(r, s);
1796
-
1797
- m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
1798
-
1799
- p[0] = static_cast<jpgd_block_t>(s * q[0]);
1800
-
1801
- int prev_num_set = m_mcu_block_max_zag[mcu_block];
1802
-
1803
- huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
1804
-
1805
- int k;
1806
- for (k = 1; k < 64; k++)
1807
- {
1808
- int extra_bits;
1809
- s = huff_decode(pH, extra_bits);
1810
-
1811
- r = s >> 4;
1812
- s &= 15;
1813
-
1814
- if (s)
1815
- {
1816
- if (r)
1817
- {
1818
- if ((k + r) > 63)
1819
- stop_decoding(JPGD_DECODE_ERROR);
1820
-
1821
- if (k < prev_num_set)
1822
- {
1823
- int n = JPGD_MIN(r, prev_num_set - k);
1824
- int kt = k;
1825
- while (n--)
1826
- p[g_ZAG[kt++]] = 0;
1827
- }
1828
-
1829
- k += r;
1830
- }
1831
-
1832
- s = HUFF_EXTEND(extra_bits, s);
1833
-
1834
- JPGD_ASSERT(k < 64);
1835
-
1836
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
1837
- }
1838
- else
1839
- {
1840
- if (r == 15)
1841
- {
1842
- if ((k + 16) > 64)
1843
- stop_decoding(JPGD_DECODE_ERROR);
1844
-
1845
- if (k < prev_num_set)
1846
- {
1847
- int n = JPGD_MIN(16, prev_num_set - k);
1848
- int kt = k;
1849
- while (n--)
1850
- {
1851
- JPGD_ASSERT(kt <= 63);
1852
- p[g_ZAG[kt++]] = 0;
1853
- }
1854
- }
1855
-
1856
- k += 16 - 1; // - 1 because the loop counter is k
1857
- // BEGIN EPIC MOD
1858
- JPGD_ASSERT(k < 64 && p[g_ZAG[k]] == 0);
1859
- // END EPIC MOD
1860
- }
1861
- else
1862
- break;
1863
- }
1864
- }
1865
-
1866
- if (k < prev_num_set)
1867
- {
1868
- int kt = k;
1869
- while (kt < prev_num_set)
1870
- p[g_ZAG[kt++]] = 0;
1871
- }
1872
-
1873
- m_mcu_block_max_zag[mcu_block] = k;
1874
-
1875
- row_block++;
1876
- }
1877
-
1878
- if (m_freq_domain_chroma_upsample)
1879
- transform_mcu_expand(mcu_row);
1880
- else
1881
- transform_mcu(mcu_row);
1882
-
1883
- m_restarts_left--;
1884
- }
1885
- }
1886
-
1887
- // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1888
- void jpeg_decoder::H1V1Convert()
1889
- {
1890
- int row = m_max_mcu_y_size - m_mcu_lines_left;
1891
- uint8 *d = m_pScan_line_0;
1892
- uint8 *s = m_pSample_buf + row * 8;
1893
-
1894
- for (int i = m_max_mcus_per_row; i > 0; i--)
1895
- {
1896
- for (int j = 0; j < 8; j++)
1897
- {
1898
- int y = s[j];
1899
- int cb = s[64+j];
1900
- int cr = s[128+j];
1901
-
1902
- if (jpg_format == ERGBFormatJPG::BGRA)
1903
- {
1904
- d[0] = clamp(y + m_cbb[cb]);
1905
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1906
- d[2] = clamp(y + m_crr[cr]);
1907
- d[3] = 255;
1908
- }
1909
- else
1910
- {
1911
- d[0] = clamp(y + m_crr[cr]);
1912
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1913
- d[2] = clamp(y + m_cbb[cb]);
1914
- d[3] = 255;
1915
- }
1916
- d += 4;
1917
- }
1918
-
1919
- s += 64*3;
1920
- }
1921
- }
1922
-
1923
- // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
1924
- void jpeg_decoder::H2V1Convert()
1925
- {
1926
- int row = m_max_mcu_y_size - m_mcu_lines_left;
1927
- uint8 *d0 = m_pScan_line_0;
1928
- uint8 *y = m_pSample_buf + row * 8;
1929
- uint8 *c = m_pSample_buf + 2*64 + row * 8;
1930
-
1931
- for (int i = m_max_mcus_per_row; i > 0; i--)
1932
- {
1933
- for (int l = 0; l < 2; l++)
1934
- {
1935
- for (int j = 0; j < 4; j++)
1936
- {
1937
- int cb = c[0];
1938
- int cr = c[64];
1939
-
1940
- int rc = m_crr[cr];
1941
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1942
- int bc = m_cbb[cb];
1943
-
1944
- int yy = y[j<<1];
1945
- if (jpg_format == ERGBFormatJPG::BGRA)
1946
- {
1947
- d0[0] = clamp(yy+bc);
1948
- d0[1] = clamp(yy+gc);
1949
- d0[2] = clamp(yy+rc);
1950
- d0[3] = 255;
1951
- yy = y[(j<<1)+1];
1952
- d0[4] = clamp(yy+bc);
1953
- d0[5] = clamp(yy+gc);
1954
- d0[6] = clamp(yy+rc);
1955
- d0[7] = 255;
1956
- }
1957
- else
1958
- {
1959
- d0[0] = clamp(yy+rc);
1960
- d0[1] = clamp(yy+gc);
1961
- d0[2] = clamp(yy+bc);
1962
- d0[3] = 255;
1963
- yy = y[(j<<1)+1];
1964
- d0[4] = clamp(yy+rc);
1965
- d0[5] = clamp(yy+gc);
1966
- d0[6] = clamp(yy+bc);
1967
- d0[7] = 255;
1968
- }
1969
-
1970
- d0 += 8;
1971
-
1972
- c++;
1973
- }
1974
- y += 64;
1975
- }
1976
-
1977
- y += 64*4 - 64*2;
1978
- c += 64*4 - 8;
1979
- }
1980
- }
1981
-
1982
- // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
1983
- void jpeg_decoder::H1V2Convert()
1984
- {
1985
- int row = m_max_mcu_y_size - m_mcu_lines_left;
1986
- uint8 *d0 = m_pScan_line_0;
1987
- uint8 *d1 = m_pScan_line_1;
1988
- uint8 *y;
1989
- uint8 *c;
1990
-
1991
- if (row < 8)
1992
- y = m_pSample_buf + row * 8;
1993
- else
1994
- y = m_pSample_buf + 64*1 + (row & 7) * 8;
1995
-
1996
- c = m_pSample_buf + 64*2 + (row >> 1) * 8;
1997
-
1998
- for (int i = m_max_mcus_per_row; i > 0; i--)
1999
- {
2000
- for (int j = 0; j < 8; j++)
2001
- {
2002
- int cb = c[0+j];
2003
- int cr = c[64+j];
2004
-
2005
- int rc = m_crr[cr];
2006
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2007
- int bc = m_cbb[cb];
2008
-
2009
- int yy = y[j];
2010
- if (jpg_format == ERGBFormatJPG::BGRA)
2011
- {
2012
- d0[0] = clamp(yy+bc);
2013
- d0[1] = clamp(yy+gc);
2014
- d0[2] = clamp(yy+rc);
2015
- d0[3] = 255;
2016
- yy = y[8+j];
2017
- d1[0] = clamp(yy+bc);
2018
- d1[1] = clamp(yy+gc);
2019
- d1[2] = clamp(yy+rc);
2020
- d1[3] = 255;
2021
- }
2022
- else
2023
- {
2024
- d0[0] = clamp(yy+rc);
2025
- d0[1] = clamp(yy+gc);
2026
- d0[2] = clamp(yy+bc);
2027
- d0[3] = 255;
2028
- yy = y[8+j];
2029
- d1[0] = clamp(yy+rc);
2030
- d1[1] = clamp(yy+gc);
2031
- d1[2] = clamp(yy+bc);
2032
- d1[3] = 255;
2033
- }
2034
-
2035
- d0 += 4;
2036
- d1 += 4;
2037
- }
2038
-
2039
- y += 64*4;
2040
- c += 64*4;
2041
- }
2042
- }
2043
-
2044
- // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2045
- void jpeg_decoder::H2V2Convert()
2046
- {
2047
- int row = m_max_mcu_y_size - m_mcu_lines_left;
2048
- uint8 *d0 = m_pScan_line_0;
2049
- uint8 *d1 = m_pScan_line_1;
2050
- uint8 *y;
2051
- uint8 *c;
2052
-
2053
- if (row < 8)
2054
- y = m_pSample_buf + row * 8;
2055
- else
2056
- y = m_pSample_buf + 64*2 + (row & 7) * 8;
2057
-
2058
- c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2059
-
2060
- for (int i = m_max_mcus_per_row; i > 0; i--)
2061
- {
2062
- for (int l = 0; l < 2; l++)
2063
- {
2064
- for (int j = 0; j < 8; j += 2)
2065
- {
2066
- int cb = c[0];
2067
- int cr = c[64];
2068
-
2069
- int rc = m_crr[cr];
2070
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2071
- int bc = m_cbb[cb];
2072
-
2073
- int yy = y[j];
2074
- if (jpg_format == ERGBFormatJPG::BGRA)
2075
- {
2076
- d0[0] = clamp(yy+bc);
2077
- d0[1] = clamp(yy+gc);
2078
- d0[2] = clamp(yy+rc);
2079
- d0[3] = 255;
2080
- yy = y[j+1];
2081
- d0[4] = clamp(yy+bc);
2082
- d0[5] = clamp(yy+gc);
2083
- d0[6] = clamp(yy+rc);
2084
- d0[7] = 255;
2085
- yy = y[j+8];
2086
- d1[0] = clamp(yy+bc);
2087
- d1[1] = clamp(yy+gc);
2088
- d1[2] = clamp(yy+rc);
2089
- d1[3] = 255;
2090
- yy = y[j+8+1];
2091
- d1[4] = clamp(yy+bc);
2092
- d1[5] = clamp(yy+gc);
2093
- d1[6] = clamp(yy+rc);
2094
- d1[7] = 255;
2095
- }
2096
- else
2097
- {
2098
- d0[0] = clamp(yy+rc);
2099
- d0[1] = clamp(yy+gc);
2100
- d0[2] = clamp(yy+bc);
2101
- d0[3] = 255;
2102
- yy = y[j+1];
2103
- d0[4] = clamp(yy+rc);
2104
- d0[5] = clamp(yy+gc);
2105
- d0[6] = clamp(yy+bc);
2106
- d0[7] = 255;
2107
- yy = y[j+8];
2108
- d1[0] = clamp(yy+rc);
2109
- d1[1] = clamp(yy+gc);
2110
- d1[2] = clamp(yy+bc);
2111
- d1[3] = 255;
2112
- yy = y[j+8+1];
2113
- d1[4] = clamp(yy+rc);
2114
- d1[5] = clamp(yy+gc);
2115
- d1[6] = clamp(yy+bc);
2116
- d1[7] = 255;
2117
- }
2118
-
2119
- d0 += 8;
2120
- d1 += 8;
2121
-
2122
- c++;
2123
- }
2124
- y += 64;
2125
- }
2126
-
2127
- y += 64*6 - 64*2;
2128
- c += 64*6 - 8;
2129
- }
2130
- }
2131
-
2132
- // Y (1 block per MCU) to 8-bit grayscale
2133
- void jpeg_decoder::gray_convert()
2134
- {
2135
- int row = m_max_mcu_y_size - m_mcu_lines_left;
2136
- uint8 *d = m_pScan_line_0;
2137
- uint8 *s = m_pSample_buf + row * 8;
2138
-
2139
- for (int i = m_max_mcus_per_row; i > 0; i--)
2140
- {
2141
- *(uint *)d = *(uint *)s;
2142
- *(uint *)(&d[4]) = *(uint *)(&s[4]);
2143
-
2144
- s += 64;
2145
- d += 8;
2146
- }
2147
- }
2148
-
2149
- void jpeg_decoder::expanded_convert()
2150
- {
2151
- int row = m_max_mcu_y_size - m_mcu_lines_left;
2152
-
2153
- uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
2154
-
2155
- uint8* d = m_pScan_line_0;
2156
-
2157
- for (int i = m_max_mcus_per_row; i > 0; i--)
2158
- {
2159
- for (int k = 0; k < m_max_mcu_x_size; k += 8)
2160
- {
2161
- const int Y_ofs = k * 8;
2162
- const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2163
- const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2164
- for (int j = 0; j < 8; j++)
2165
- {
2166
- int y = Py[Y_ofs + j];
2167
- int cb = Py[Cb_ofs + j];
2168
- int cr = Py[Cr_ofs + j];
2169
-
2170
- if (jpg_format == ERGBFormatJPG::BGRA)
2171
- {
2172
- d[0] = clamp(y + m_cbb[cb]);
2173
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
2174
- d[2] = clamp(y + m_crr[cr]);
2175
- d[3] = 255;
2176
- }
2177
- else
2178
- {
2179
- d[0] = clamp(y + m_crr[cr]);
2180
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
2181
- d[2] = clamp(y + m_cbb[cb]);
2182
- d[3] = 255;
2183
- }
2184
-
2185
- d += 4;
2186
- }
2187
- }
2188
-
2189
- Py += 64 * m_expanded_blocks_per_mcu;
2190
- }
2191
- }
2192
-
2193
- // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2194
- void jpeg_decoder::find_eoi()
2195
- {
2196
- if (!m_progressive_flag)
2197
- {
2198
- // Attempt to read the EOI marker.
2199
- //get_bits_no_markers(m_bits_left & 7);
2200
-
2201
- // Prime the bit buffer
2202
- m_bits_left = 16;
2203
- get_bits(16);
2204
- get_bits(16);
2205
-
2206
- // The next marker _should_ be EOI
2207
- process_markers();
2208
- }
2209
-
2210
- m_total_bytes_read -= m_in_buf_left;
2211
- }
2212
-
2213
- int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
2214
- {
2215
- if ((m_error_code) || (!m_ready_flag))
2216
- return JPGD_FAILED;
2217
-
2218
- if (m_total_lines_left == 0)
2219
- return JPGD_DONE;
2220
-
2221
- if (m_mcu_lines_left == 0)
2222
- {
2223
- if (setjmp(m_jmp_state))
2224
- return JPGD_FAILED;
2225
-
2226
- if (m_progressive_flag)
2227
- load_next_row();
2228
- else
2229
- decode_next_row();
2230
-
2231
- // Find the EOI marker if that was the last row.
2232
- if (m_total_lines_left <= m_max_mcu_y_size)
2233
- find_eoi();
2234
-
2235
- m_mcu_lines_left = m_max_mcu_y_size;
2236
- }
2237
-
2238
- if (m_freq_domain_chroma_upsample)
2239
- {
2240
- expanded_convert();
2241
- *pScan_line = m_pScan_line_0;
2242
- }
2243
- else
2244
- {
2245
- switch (m_scan_type)
2246
- {
2247
- case JPGD_YH2V2:
2248
- {
2249
- if ((m_mcu_lines_left & 1) == 0)
2250
- {
2251
- H2V2Convert();
2252
- *pScan_line = m_pScan_line_0;
2253
- }
2254
- else
2255
- *pScan_line = m_pScan_line_1;
2256
-
2257
- break;
2258
- }
2259
- case JPGD_YH2V1:
2260
- {
2261
- H2V1Convert();
2262
- *pScan_line = m_pScan_line_0;
2263
- break;
2264
- }
2265
- case JPGD_YH1V2:
2266
- {
2267
- if ((m_mcu_lines_left & 1) == 0)
2268
- {
2269
- H1V2Convert();
2270
- *pScan_line = m_pScan_line_0;
2271
- }
2272
- else
2273
- *pScan_line = m_pScan_line_1;
2274
-
2275
- break;
2276
- }
2277
- case JPGD_YH1V1:
2278
- {
2279
- H1V1Convert();
2280
- *pScan_line = m_pScan_line_0;
2281
- break;
2282
- }
2283
- case JPGD_GRAYSCALE:
2284
- {
2285
- gray_convert();
2286
- *pScan_line = m_pScan_line_0;
2287
-
2288
- break;
2289
- }
2290
- }
2291
- }
2292
-
2293
- *pScan_line_len = m_real_dest_bytes_per_scan_line;
2294
-
2295
- m_mcu_lines_left--;
2296
- m_total_lines_left--;
2297
-
2298
- return JPGD_SUCCESS;
2299
- }
2300
-
2301
- // Creates the tables needed for efficient Huffman decoding.
2302
- void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
2303
- {
2304
- int p, i, l, si;
2305
- uint8 huffsize[257];
2306
- uint huffcode[257];
2307
- uint code;
2308
- uint subtree;
2309
- int code_size;
2310
- int lastp;
2311
- int nextfreeentry;
2312
- int currententry;
2313
-
2314
- pH->ac_table = m_huff_ac[index] != 0;
2315
-
2316
- p = 0;
2317
-
2318
- for (l = 1; l <= 16; l++)
2319
- {
2320
- for (i = 1; i <= m_huff_num[index][l]; i++)
2321
- huffsize[p++] = static_cast<uint8>(l);
2322
- }
2323
-
2324
- huffsize[p] = 0;
2325
-
2326
- lastp = p;
2327
-
2328
- code = 0;
2329
- si = huffsize[0];
2330
- p = 0;
2331
-
2332
- while (huffsize[p])
2333
- {
2334
- while (huffsize[p] == si)
2335
- {
2336
- huffcode[p++] = code;
2337
- code++;
2338
- }
2339
-
2340
- code <<= 1;
2341
- si++;
2342
- }
2343
-
2344
- memset(pH->look_up, 0, sizeof(pH->look_up));
2345
- memset(pH->look_up2, 0, sizeof(pH->look_up2));
2346
- memset(pH->tree, 0, sizeof(pH->tree));
2347
- memset(pH->code_size, 0, sizeof(pH->code_size));
2348
-
2349
- nextfreeentry = -1;
2350
-
2351
- p = 0;
2352
-
2353
- while (p < lastp)
2354
- {
2355
- i = m_huff_val[index][p];
2356
- code = huffcode[p];
2357
- code_size = huffsize[p];
2358
-
2359
- pH->code_size[i] = static_cast<uint8>(code_size);
2360
-
2361
- if (code_size <= 8)
2362
- {
2363
- code <<= (8 - code_size);
2364
-
2365
- for (l = 1 << (8 - code_size); l > 0; l--)
2366
- {
2367
- JPGD_ASSERT(i < 256);
2368
-
2369
- pH->look_up[code] = i;
2370
-
2371
- bool has_extrabits = false;
2372
- int extra_bits = 0;
2373
- int num_extra_bits = i & 15;
2374
-
2375
- int bits_to_fetch = code_size;
2376
- if (num_extra_bits)
2377
- {
2378
- int total_codesize = code_size + num_extra_bits;
2379
- if (total_codesize <= 8)
2380
- {
2381
- has_extrabits = true;
2382
- extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2383
- JPGD_ASSERT(extra_bits <= 0x7FFF);
2384
- bits_to_fetch += num_extra_bits;
2385
- }
2386
- }
2387
-
2388
- if (!has_extrabits)
2389
- pH->look_up2[code] = i | (bits_to_fetch << 8);
2390
- else
2391
- pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2392
-
2393
- code++;
2394
- }
2395
- }
2396
- else
2397
- {
2398
- subtree = (code >> (code_size - 8)) & 0xFF;
2399
-
2400
- currententry = pH->look_up[subtree];
2401
-
2402
- if (currententry == 0)
2403
- {
2404
- pH->look_up[subtree] = currententry = nextfreeentry;
2405
- pH->look_up2[subtree] = currententry = nextfreeentry;
2406
-
2407
- nextfreeentry -= 2;
2408
- }
2409
-
2410
- code <<= (16 - (code_size - 8));
2411
-
2412
- for (l = code_size; l > 9; l--)
2413
- {
2414
- if ((code & 0x8000) == 0)
2415
- currententry--;
2416
-
2417
- if (pH->tree[-currententry - 1] == 0)
2418
- {
2419
- pH->tree[-currententry - 1] = nextfreeentry;
2420
-
2421
- currententry = nextfreeentry;
2422
-
2423
- nextfreeentry -= 2;
2424
- }
2425
- else
2426
- currententry = pH->tree[-currententry - 1];
2427
-
2428
- code <<= 1;
2429
- }
2430
-
2431
- if ((code & 0x8000) == 0)
2432
- currententry--;
2433
-
2434
- pH->tree[-currententry - 1] = i;
2435
- }
2436
-
2437
- p++;
2438
- }
2439
- }
2440
-
2441
- // Verifies the quantization tables needed for this scan are available.
2442
- void jpeg_decoder::check_quant_tables()
2443
- {
2444
- for (int i = 0; i < m_comps_in_scan; i++)
2445
- if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL)
2446
- stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2447
- }
2448
-
2449
- // Verifies that all the Huffman tables needed for this scan are available.
2450
- void jpeg_decoder::check_huff_tables()
2451
- {
2452
- for (int i = 0; i < m_comps_in_scan; i++)
2453
- {
2454
- if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL))
2455
- stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2456
-
2457
- if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL))
2458
- stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2459
- }
2460
-
2461
- for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2462
- if (m_huff_num[i])
2463
- {
2464
- if (!m_pHuff_tabs[i])
2465
- m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
2466
-
2467
- make_huff_table(i, m_pHuff_tabs[i]);
2468
- }
2469
- }
2470
-
2471
- // Determines the component order inside each MCU.
2472
- // Also calcs how many MCU's are on each row, etc.
2473
- void jpeg_decoder::calc_mcu_block_order()
2474
- {
2475
- int component_num, component_id;
2476
- int max_h_samp = 0, max_v_samp = 0;
2477
-
2478
- for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2479
- {
2480
- if (m_comp_h_samp[component_id] > max_h_samp)
2481
- max_h_samp = m_comp_h_samp[component_id];
2482
-
2483
- if (m_comp_v_samp[component_id] > max_v_samp)
2484
- max_v_samp = m_comp_v_samp[component_id];
2485
- }
2486
-
2487
- for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2488
- {
2489
- m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2490
- m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2491
- }
2492
-
2493
- if (m_comps_in_scan == 1)
2494
- {
2495
- m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
2496
- m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
2497
- }
2498
- else
2499
- {
2500
- m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2501
- m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2502
- }
2503
-
2504
- if (m_comps_in_scan == 1)
2505
- {
2506
- m_mcu_org[0] = m_comp_list[0];
2507
-
2508
- m_blocks_per_mcu = 1;
2509
- }
2510
- else
2511
- {
2512
- m_blocks_per_mcu = 0;
2513
-
2514
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2515
- {
2516
- int num_blocks;
2517
-
2518
- component_id = m_comp_list[component_num];
2519
-
2520
- num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
2521
-
2522
- while (num_blocks--)
2523
- m_mcu_org[m_blocks_per_mcu++] = component_id;
2524
- }
2525
- }
2526
- }
2527
-
2528
- // Starts a new scan.
2529
- int jpeg_decoder::init_scan()
2530
- {
2531
- if (!locate_sos_marker())
2532
- return JPGD_FALSE;
2533
-
2534
- calc_mcu_block_order();
2535
-
2536
- check_huff_tables();
2537
-
2538
- check_quant_tables();
2539
-
2540
- memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
2541
-
2542
- m_eob_run = 0;
2543
-
2544
- if (m_restart_interval)
2545
- {
2546
- m_restarts_left = m_restart_interval;
2547
- m_next_restart_num = 0;
2548
- }
2549
-
2550
- fix_in_buffer();
2551
-
2552
- return JPGD_TRUE;
2553
- }
2554
-
2555
- // Starts a frame. Determines if the number of components or sampling factors
2556
- // are supported.
2557
- void jpeg_decoder::init_frame()
2558
- {
2559
- int i;
2560
-
2561
- if (m_comps_in_frame == 1)
2562
- {
2563
- if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
2564
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2565
-
2566
- m_scan_type = JPGD_GRAYSCALE;
2567
- m_max_blocks_per_mcu = 1;
2568
- m_max_mcu_x_size = 8;
2569
- m_max_mcu_y_size = 8;
2570
- }
2571
- else if (m_comps_in_frame == 3)
2572
- {
2573
- if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
2574
- ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) )
2575
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2576
-
2577
- if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
2578
- {
2579
- m_scan_type = JPGD_YH1V1;
2580
-
2581
- m_max_blocks_per_mcu = 3;
2582
- m_max_mcu_x_size = 8;
2583
- m_max_mcu_y_size = 8;
2584
- }
2585
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
2586
- {
2587
- m_scan_type = JPGD_YH2V1;
2588
- m_max_blocks_per_mcu = 4;
2589
- m_max_mcu_x_size = 16;
2590
- m_max_mcu_y_size = 8;
2591
- }
2592
- else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
2593
- {
2594
- m_scan_type = JPGD_YH1V2;
2595
- m_max_blocks_per_mcu = 4;
2596
- m_max_mcu_x_size = 8;
2597
- m_max_mcu_y_size = 16;
2598
- }
2599
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
2600
- {
2601
- m_scan_type = JPGD_YH2V2;
2602
- m_max_blocks_per_mcu = 6;
2603
- m_max_mcu_x_size = 16;
2604
- m_max_mcu_y_size = 16;
2605
- }
2606
- else
2607
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2608
- }
2609
- else
2610
- stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2611
-
2612
- m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2613
- m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2614
-
2615
- // These values are for the *destination* pixels: after conversion.
2616
- if (m_scan_type == JPGD_GRAYSCALE)
2617
- m_dest_bytes_per_pixel = 1;
2618
- else
2619
- m_dest_bytes_per_pixel = 4;
2620
-
2621
- m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2622
-
2623
- m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2624
-
2625
- // Initialize two scan line buffers.
2626
- m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
2627
- if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2628
- m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
2629
-
2630
- m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2631
-
2632
- // Should never happen
2633
- if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2634
- stop_decoding(JPGD_ASSERTION_ERROR);
2635
-
2636
- // Allocate the coefficient buffer, enough for one MCU
2637
- m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
2638
-
2639
- for (i = 0; i < m_max_blocks_per_mcu; i++)
2640
- m_mcu_block_max_zag[i] = 64;
2641
-
2642
- m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
2643
- m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2644
- m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2645
- // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor.
2646
- // BEGIN EPIC MOD
2647
- #if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
2648
- m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2649
- #else
2650
- m_freq_domain_chroma_upsample = 0;
2651
- #endif
2652
- // END EPIC MOD
2653
-
2654
- if (m_freq_domain_chroma_upsample)
2655
- m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64);
2656
- else
2657
- m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64);
2658
-
2659
- m_total_lines_left = m_image_y_size;
2660
-
2661
- m_mcu_lines_left = 0;
2662
-
2663
- create_look_ups();
2664
- }
2665
-
2666
- // The coeff_buf series of methods originally stored the coefficients
2667
- // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2668
- // was used to make this process more efficient. Now, we can store the entire
2669
- // thing in RAM.
2670
- jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
2671
- {
2672
- coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
2673
-
2674
- cb->block_num_x = block_num_x;
2675
- cb->block_num_y = block_num_y;
2676
- cb->block_len_x = block_len_x;
2677
- cb->block_len_y = block_len_y;
2678
- cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
2679
- cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true);
2680
- return cb;
2681
- }
2682
-
2683
- inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
2684
- {
2685
- JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
2686
- return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
2687
- }
2688
-
2689
- // The following methods decode the various types of m_blocks encountered
2690
- // in progressively encoded images.
2691
- void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2692
- {
2693
- int s, r;
2694
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2695
-
2696
- if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
2697
- {
2698
- r = pD->get_bits_no_markers(s);
2699
- s = HUFF_EXTEND(r, s);
2700
- }
2701
-
2702
- pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
2703
-
2704
- p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
2705
- }
2706
-
2707
- void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2708
- {
2709
- if (pD->get_bits_no_markers(1))
2710
- {
2711
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2712
-
2713
- p[0] |= (1 << pD->m_successive_low);
2714
- }
2715
- }
2716
-
2717
- void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2718
- {
2719
- int k, s, r;
2720
-
2721
- if (pD->m_eob_run)
2722
- {
2723
- pD->m_eob_run--;
2724
- return;
2725
- }
2726
-
2727
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2728
-
2729
- for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
2730
- {
2731
- s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2732
-
2733
- r = s >> 4;
2734
- s &= 15;
2735
-
2736
- if (s)
2737
- {
2738
- if ((k += r) > 63)
2739
- pD->stop_decoding(JPGD_DECODE_ERROR);
2740
-
2741
- r = pD->get_bits_no_markers(s);
2742
- s = HUFF_EXTEND(r, s);
2743
-
2744
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
2745
- }
2746
- else
2747
- {
2748
- if (r == 15)
2749
- {
2750
- if ((k += 15) > 63)
2751
- pD->stop_decoding(JPGD_DECODE_ERROR);
2752
- }
2753
- else
2754
- {
2755
- pD->m_eob_run = 1 << r;
2756
-
2757
- if (r)
2758
- pD->m_eob_run += pD->get_bits_no_markers(r);
2759
-
2760
- pD->m_eob_run--;
2761
-
2762
- break;
2763
- }
2764
- }
2765
- }
2766
- }
2767
-
2768
- void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2769
- {
2770
- int s, k, r;
2771
- int p1 = 1 << pD->m_successive_low;
2772
- int m1 = (-1) << pD->m_successive_low;
2773
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2774
-
2775
- k = pD->m_spectral_start;
2776
-
2777
- if (pD->m_eob_run == 0)
2778
- {
2779
- for ( ; k <= pD->m_spectral_end; k++)
2780
- {
2781
- s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2782
-
2783
- r = s >> 4;
2784
- s &= 15;
2785
-
2786
- if (s)
2787
- {
2788
- if (s != 1)
2789
- pD->stop_decoding(JPGD_DECODE_ERROR);
2790
-
2791
- if (pD->get_bits_no_markers(1))
2792
- s = p1;
2793
- else
2794
- s = m1;
2795
- }
2796
- else
2797
- {
2798
- if (r != 15)
2799
- {
2800
- pD->m_eob_run = 1 << r;
2801
-
2802
- if (r)
2803
- pD->m_eob_run += pD->get_bits_no_markers(r);
2804
-
2805
- break;
2806
- }
2807
- }
2808
-
2809
- do
2810
- {
2811
- // BEGIN EPIC MOD
2812
- JPGD_ASSERT(k < 64);
2813
- // END EPIC MOD
2814
-
2815
- jpgd_block_t *this_coef = p + g_ZAG[k];
2816
-
2817
- if (*this_coef != 0)
2818
- {
2819
- if (pD->get_bits_no_markers(1))
2820
- {
2821
- if ((*this_coef & p1) == 0)
2822
- {
2823
- if (*this_coef >= 0)
2824
- *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2825
- else
2826
- *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2827
- }
2828
- }
2829
- }
2830
- else
2831
- {
2832
- if (--r < 0)
2833
- break;
2834
- }
2835
-
2836
- k++;
2837
-
2838
- } while (k <= pD->m_spectral_end);
2839
-
2840
- if ((s) && (k < 64))
2841
- {
2842
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
2843
- }
2844
- }
2845
- }
2846
-
2847
- if (pD->m_eob_run > 0)
2848
- {
2849
- for ( ; k <= pD->m_spectral_end; k++)
2850
- {
2851
- // BEGIN EPIC MOD
2852
- JPGD_ASSERT(k < 64);
2853
- // END EPIC MOD
2854
-
2855
- jpgd_block_t *this_coef = p + g_ZAG[k];
2856
-
2857
- if (*this_coef != 0)
2858
- {
2859
- if (pD->get_bits_no_markers(1))
2860
- {
2861
- if ((*this_coef & p1) == 0)
2862
- {
2863
- if (*this_coef >= 0)
2864
- *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2865
- else
2866
- *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2867
- }
2868
- }
2869
- }
2870
- }
2871
-
2872
- pD->m_eob_run--;
2873
- }
2874
- }
2875
-
2876
- // Decode a scan in a progressively encoded image.
2877
- void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
2878
- {
2879
- int mcu_row, mcu_col, mcu_block;
2880
- int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
2881
-
2882
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
2883
-
2884
- for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2885
- {
2886
- int component_num, component_id;
2887
-
2888
- memset(block_x_mcu, 0, sizeof(block_x_mcu));
2889
-
2890
- for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2891
- {
2892
- int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2893
-
2894
- if ((m_restart_interval) && (m_restarts_left == 0))
2895
- process_restart();
2896
-
2897
- for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2898
- {
2899
- component_id = m_mcu_org[mcu_block];
2900
-
2901
- decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
2902
-
2903
- if (m_comps_in_scan == 1)
2904
- block_x_mcu[component_id]++;
2905
- else
2906
- {
2907
- if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
2908
- {
2909
- block_x_mcu_ofs = 0;
2910
-
2911
- if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
2912
- {
2913
- block_y_mcu_ofs = 0;
2914
- block_x_mcu[component_id] += m_comp_h_samp[component_id];
2915
- }
2916
- }
2917
- }
2918
- }
2919
-
2920
- m_restarts_left--;
2921
- }
2922
-
2923
- if (m_comps_in_scan == 1)
2924
- m_block_y_mcu[m_comp_list[0]]++;
2925
- else
2926
- {
2927
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2928
- {
2929
- component_id = m_comp_list[component_num];
2930
- m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
2931
- }
2932
- }
2933
- }
2934
- }
2935
-
2936
- // Decode a progressively encoded image.
2937
- void jpeg_decoder::init_progressive()
2938
- {
2939
- int i;
2940
-
2941
- if (m_comps_in_frame == 4)
2942
- stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2943
-
2944
- // Allocate the coefficient buffers.
2945
- for (i = 0; i < m_comps_in_frame; i++)
2946
- {
2947
- m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
2948
- m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
2949
- }
2950
-
2951
- for ( ; ; )
2952
- {
2953
- int dc_only_scan, refinement_scan;
2954
- pDecode_block_func decode_block_func;
2955
-
2956
- if (!init_scan())
2957
- break;
2958
-
2959
- dc_only_scan = (m_spectral_start == 0);
2960
- refinement_scan = (m_successive_high != 0);
2961
-
2962
- if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2963
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2964
-
2965
- if (dc_only_scan)
2966
- {
2967
- if (m_spectral_end)
2968
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2969
- }
2970
- else if (m_comps_in_scan != 1) /* AC scans can only contain one component */
2971
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2972
-
2973
- if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2974
- stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2975
-
2976
- if (dc_only_scan)
2977
- {
2978
- if (refinement_scan)
2979
- decode_block_func = decode_block_dc_refine;
2980
- else
2981
- decode_block_func = decode_block_dc_first;
2982
- }
2983
- else
2984
- {
2985
- if (refinement_scan)
2986
- decode_block_func = decode_block_ac_refine;
2987
- else
2988
- decode_block_func = decode_block_ac_first;
2989
- }
2990
-
2991
- decode_scan(decode_block_func);
2992
-
2993
- m_bits_left = 16;
2994
- get_bits(16);
2995
- get_bits(16);
2996
- }
2997
-
2998
- m_comps_in_scan = m_comps_in_frame;
2999
-
3000
- for (i = 0; i < m_comps_in_frame; i++)
3001
- m_comp_list[i] = i;
3002
-
3003
- calc_mcu_block_order();
3004
- }
3005
-
3006
- void jpeg_decoder::init_sequential()
3007
- {
3008
- if (!init_scan())
3009
- stop_decoding(JPGD_UNEXPECTED_MARKER);
3010
- }
3011
-
3012
- void jpeg_decoder::decode_start()
3013
- {
3014
- init_frame();
3015
-
3016
- if (m_progressive_flag)
3017
- init_progressive();
3018
- else
3019
- init_sequential();
3020
- }
3021
-
3022
- void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
3023
- {
3024
- init(pStream);
3025
- locate_sof_marker();
3026
- }
3027
-
3028
- jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
3029
- {
3030
- if (setjmp(m_jmp_state))
3031
- return;
3032
- decode_init(pStream);
3033
- }
3034
-
3035
- int jpeg_decoder::begin_decoding()
3036
- {
3037
- if (m_ready_flag)
3038
- return JPGD_SUCCESS;
3039
-
3040
- if (m_error_code)
3041
- return JPGD_FAILED;
3042
-
3043
- if (setjmp(m_jmp_state))
3044
- return JPGD_FAILED;
3045
-
3046
- decode_start();
3047
-
3048
- m_ready_flag = true;
3049
-
3050
- return JPGD_SUCCESS;
3051
- }
3052
-
3053
- jpeg_decoder::~jpeg_decoder()
3054
- {
3055
- free_all_blocks();
3056
- }
3057
-
3058
- jpeg_decoder_file_stream::jpeg_decoder_file_stream()
3059
- {
3060
- m_pFile = NULL;
3061
- m_eof_flag = false;
3062
- m_error_flag = false;
3063
- }
3064
-
3065
- void jpeg_decoder_file_stream::close()
3066
- {
3067
- if (m_pFile)
3068
- {
3069
- fclose(m_pFile);
3070
- m_pFile = NULL;
3071
- }
3072
-
3073
- m_eof_flag = false;
3074
- m_error_flag = false;
3075
- }
3076
-
3077
- jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
3078
- {
3079
- close();
3080
- }
3081
-
3082
- bool jpeg_decoder_file_stream::open(const char *Pfilename)
3083
- {
3084
- close();
3085
-
3086
- m_eof_flag = false;
3087
- m_error_flag = false;
3088
-
3089
- #if defined(_MSC_VER)
3090
- m_pFile = NULL;
3091
- fopen_s(&m_pFile, Pfilename, "rb");
3092
- #else
3093
- m_pFile = fopen(Pfilename, "rb");
3094
- #endif
3095
- return m_pFile != NULL;
3096
- }
3097
-
3098
- int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
3099
- {
3100
- if (!m_pFile)
3101
- return -1;
3102
-
3103
- if (m_eof_flag)
3104
- {
3105
- *pEOF_flag = true;
3106
- return 0;
3107
- }
3108
-
3109
- if (m_error_flag)
3110
- return -1;
3111
-
3112
- int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3113
- if (bytes_read < max_bytes_to_read)
3114
- {
3115
- if (ferror(m_pFile))
3116
- {
3117
- m_error_flag = true;
3118
- return -1;
3119
- }
3120
-
3121
- m_eof_flag = true;
3122
- *pEOF_flag = true;
3123
- }
3124
-
3125
- return bytes_read;
3126
- }
3127
-
3128
- bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size)
3129
- {
3130
- close();
3131
- m_pSrc_data = pSrc_data;
3132
- m_ofs = 0;
3133
- m_size = size;
3134
- return true;
3135
- }
3136
-
3137
- int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
3138
- {
3139
- *pEOF_flag = false;
3140
-
3141
- if (!m_pSrc_data)
3142
- return -1;
3143
-
3144
- uint bytes_remaining = m_size - m_ofs;
3145
- if ((uint)max_bytes_to_read > bytes_remaining)
3146
- {
3147
- max_bytes_to_read = bytes_remaining;
3148
- *pEOF_flag = true;
3149
- }
3150
-
3151
- memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
3152
- m_ofs += max_bytes_to_read;
3153
-
3154
- return max_bytes_to_read;
3155
- }
3156
-
3157
- unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps)
3158
- {
3159
- if (!actual_comps)
3160
- return NULL;
3161
- *actual_comps = 0;
3162
-
3163
- if ((!pStream) || (!width) || (!height) || (!req_comps))
3164
- return NULL;
3165
-
3166
- if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
3167
- return NULL;
3168
-
3169
- jpeg_decoder decoder(pStream);
3170
- if (decoder.get_error_code() != JPGD_SUCCESS)
3171
- return NULL;
3172
-
3173
- const int image_width = decoder.get_width(), image_height = decoder.get_height();
3174
- *width = image_width;
3175
- *height = image_height;
3176
- *actual_comps = decoder.get_num_components();
3177
-
3178
- if (decoder.begin_decoding() != JPGD_SUCCESS)
3179
- return NULL;
3180
-
3181
- const int dst_bpl = image_width * req_comps;
3182
-
3183
- uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
3184
- if (!pImage_data)
3185
- return NULL;
3186
-
3187
- for (int y = 0; y < image_height; y++)
3188
- {
3189
- const uint8* pScan_line = 0;
3190
- uint scan_line_len;
3191
- if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
3192
- {
3193
- jpgd_free(pImage_data);
3194
- return NULL;
3195
- }
3196
-
3197
- uint8 *pDst = pImage_data + y * dst_bpl;
3198
-
3199
- if (((req_comps == 4) && (decoder.get_num_components() == 3)) ||
3200
- ((req_comps == 1) && (decoder.get_num_components() == 1)))
3201
- {
3202
- memcpy(pDst, pScan_line, dst_bpl);
3203
- }
3204
- else if (decoder.get_num_components() == 1)
3205
- {
3206
- if (req_comps == 3)
3207
- {
3208
- for (int x = 0; x < image_width; x++)
3209
- {
3210
- uint8 luma = pScan_line[x];
3211
- pDst[0] = luma;
3212
- pDst[1] = luma;
3213
- pDst[2] = luma;
3214
- pDst += 3;
3215
- }
3216
- }
3217
- else
3218
- {
3219
- for (int x = 0; x < image_width; x++)
3220
- {
3221
- uint8 luma = pScan_line[x];
3222
- pDst[0] = luma;
3223
- pDst[1] = luma;
3224
- pDst[2] = luma;
3225
- pDst[3] = 255;
3226
- pDst += 4;
3227
- }
3228
- }
3229
- }
3230
- else if (decoder.get_num_components() == 3)
3231
- {
3232
- if (req_comps == 1)
3233
- {
3234
- const int YR = 19595, YG = 38470, YB = 7471;
3235
- for (int x = 0; x < image_width; x++)
3236
- {
3237
- int r = pScan_line[x*4+0];
3238
- int g = pScan_line[x*4+1];
3239
- int b = pScan_line[x*4+2];
3240
- *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
3241
- }
3242
- }
3243
- else
3244
- {
3245
- for (int x = 0; x < image_width; x++)
3246
- {
3247
- pDst[0] = pScan_line[x*4+0];
3248
- pDst[1] = pScan_line[x*4+1];
3249
- pDst[2] = pScan_line[x*4+2];
3250
- pDst += 3;
3251
- }
3252
- }
3253
- }
3254
- }
3255
-
3256
- return pImage_data;
3257
- }
3258
-
3259
- // BEGIN EPIC MOD
3260
- unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps, int format)
3261
- {
3262
- jpg_format = (ERGBFormatJPG)format;
3263
- // EMD EPIC MOD
3264
- jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
3265
- return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps);
3266
- }
3267
-
3268
- unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps)
3269
- {
3270
- jpgd::jpeg_decoder_file_stream file_stream;
3271
- if (!file_stream.open(pSrc_filename))
3272
- return NULL;
3273
- return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps);
3274
- }
3275
-
3276
- } // namespace jpgd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/libJPG/jpgd.h DELETED
@@ -1,316 +0,0 @@
1
- // jpgd.h - C++ class for JPEG decompression.
2
- // Public domain, Rich Geldreich <richgel99@gmail.com>
3
- #ifndef JPEG_DECODER_H
4
- #define JPEG_DECODER_H
5
-
6
- #include <stdlib.h>
7
- #include <stdio.h>
8
- #include <setjmp.h>
9
-
10
- namespace jpgd
11
- {
12
- typedef unsigned char uint8;
13
- typedef signed short int16;
14
- typedef unsigned short uint16;
15
- typedef unsigned int uint;
16
- typedef signed int int32;
17
-
18
- // Loads a JPEG image from a memory buffer or a file.
19
- // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
20
- // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
21
- // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly.
22
- // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
23
- // BEGIN EPIC MOD
24
- //unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps);
25
- unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps, int format);
26
- // END EPIC MOD
27
- unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps);
28
-
29
- // Success/failure error codes.
30
- enum jpgd_status
31
- {
32
- JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
33
- JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
34
- JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
35
- JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
36
- JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
37
- JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
38
- JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
39
- JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
40
- JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
41
- };
42
-
43
- // Input stream interface.
44
- // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
45
- // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
46
- // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
47
- // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
48
- class jpeg_decoder_stream
49
- {
50
- public:
51
- jpeg_decoder_stream() { }
52
- virtual ~jpeg_decoder_stream() { }
53
-
54
- // The read() method is called when the internal input buffer is empty.
55
- // Parameters:
56
- // pBuf - input buffer
57
- // max_bytes_to_read - maximum bytes that can be written to pBuf
58
- // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
59
- // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
60
- // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
61
- virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
62
- };
63
-
64
- // stdio FILE stream class.
65
- class jpeg_decoder_file_stream : public jpeg_decoder_stream
66
- {
67
- jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
68
- jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
69
-
70
- FILE *m_pFile;
71
- bool m_eof_flag, m_error_flag;
72
-
73
- public:
74
- jpeg_decoder_file_stream();
75
- virtual ~jpeg_decoder_file_stream();
76
-
77
- bool open(const char *Pfilename);
78
- void close();
79
-
80
- virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
81
- };
82
-
83
- // Memory stream class.
84
- class jpeg_decoder_mem_stream : public jpeg_decoder_stream
85
- {
86
- const uint8 *m_pSrc_data;
87
- uint m_ofs, m_size;
88
-
89
- public:
90
- jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
91
- jpeg_decoder_mem_stream(const uint8 *pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }
92
-
93
- virtual ~jpeg_decoder_mem_stream() { }
94
-
95
- bool open(const uint8 *pSrc_data, uint size);
96
- void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }
97
-
98
- virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
99
- };
100
-
101
- // Loads JPEG file from a jpeg_decoder_stream.
102
- unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps);
103
-
104
- enum
105
- {
106
- JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
107
- JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384
108
- };
109
-
110
- typedef int16 jpgd_quant_t;
111
- typedef int16 jpgd_block_t;
112
-
113
- class jpeg_decoder
114
- {
115
- public:
116
- // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
117
- // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
118
- jpeg_decoder(jpeg_decoder_stream *pStream);
119
-
120
- ~jpeg_decoder();
121
-
122
- // Call this method after constructing the object to begin decompression.
123
- // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
124
- int begin_decoding();
125
-
126
- // Returns the next scan line.
127
- // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
128
- // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
129
- // Returns JPGD_SUCCESS if a scan line has been returned.
130
- // Returns JPGD_DONE if all scan lines have been returned.
131
- // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
132
- int decode(const void** pScan_line, uint* pScan_line_len);
133
-
134
- inline jpgd_status get_error_code() const { return m_error_code; }
135
-
136
- inline int get_width() const { return m_image_x_size; }
137
- inline int get_height() const { return m_image_y_size; }
138
-
139
- inline int get_num_components() const { return m_comps_in_frame; }
140
-
141
- inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
142
- inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
143
-
144
- // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
145
- inline int get_total_bytes_read() const { return m_total_bytes_read; }
146
-
147
- private:
148
- jpeg_decoder(const jpeg_decoder &);
149
- jpeg_decoder &operator =(const jpeg_decoder &);
150
-
151
- typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
152
-
153
- struct huff_tables
154
- {
155
- bool ac_table;
156
- uint look_up[256];
157
- uint look_up2[256];
158
- uint8 code_size[256];
159
- uint tree[512];
160
- };
161
-
162
- struct coeff_buf
163
- {
164
- uint8 *pData;
165
- int block_num_x, block_num_y;
166
- int block_len_x, block_len_y;
167
- int block_size;
168
- };
169
-
170
- struct mem_block
171
- {
172
- mem_block *m_pNext;
173
- size_t m_used_count;
174
- size_t m_size;
175
- char m_data[1];
176
- };
177
-
178
- jmp_buf m_jmp_state;
179
- mem_block *m_pMem_blocks;
180
- int m_image_x_size;
181
- int m_image_y_size;
182
- jpeg_decoder_stream *m_pStream;
183
- int m_progressive_flag;
184
- uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES];
185
- uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size
186
- uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size
187
- jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
188
- int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
189
- int m_comps_in_frame; // # of components in frame
190
- int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor
191
- int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor
192
- int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector
193
- int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID
194
- int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
195
- int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
196
- int m_comps_in_scan; // # of components in scan
197
- int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
198
- int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector
199
- int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector
200
- int m_spectral_start; // spectral selection start
201
- int m_spectral_end; // spectral selection end
202
- int m_successive_low; // successive approximation low
203
- int m_successive_high; // successive approximation high
204
- int m_max_mcu_x_size; // MCU's max. X size in pixels
205
- int m_max_mcu_y_size; // MCU's max. Y size in pixels
206
- int m_blocks_per_mcu;
207
- int m_max_blocks_per_row;
208
- int m_mcus_per_row, m_mcus_per_col;
209
- int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
210
- int m_total_lines_left; // total # lines left in image
211
- int m_mcu_lines_left; // total # lines left in this MCU
212
- int m_real_dest_bytes_per_scan_line;
213
- int m_dest_bytes_per_scan_line; // rounded up
214
- int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y)
215
- huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
216
- coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
217
- coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
218
- int m_eob_run;
219
- int m_block_y_mcu[JPGD_MAX_COMPONENTS];
220
- uint8* m_pIn_buf_ofs;
221
- int m_in_buf_left;
222
- int m_tem_flag;
223
- bool m_eof_flag;
224
- uint8 m_in_buf_pad_start[128];
225
- uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128];
226
- uint8 m_in_buf_pad_end[128];
227
- int m_bits_left;
228
- uint m_bit_buf;
229
- int m_restart_interval;
230
- int m_restarts_left;
231
- int m_next_restart_num;
232
- int m_max_mcus_per_row;
233
- int m_max_blocks_per_mcu;
234
- int m_expanded_blocks_per_mcu;
235
- int m_expanded_blocks_per_row;
236
- int m_expanded_blocks_per_component;
237
- bool m_freq_domain_chroma_upsample;
238
- int m_max_mcus_per_col;
239
- uint m_last_dc_val[JPGD_MAX_COMPONENTS];
240
- jpgd_block_t* m_pMCU_coefficients;
241
- int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
242
- uint8* m_pSample_buf;
243
- int m_crr[256];
244
- int m_cbb[256];
245
- int m_crg[256];
246
- int m_cbg[256];
247
- uint8* m_pScan_line_0;
248
- uint8* m_pScan_line_1;
249
- jpgd_status m_error_code;
250
- bool m_ready_flag;
251
- int m_total_bytes_read;
252
-
253
- void free_all_blocks();
254
- // BEGIN EPIC MOD
255
- UE_NORETURN void stop_decoding(jpgd_status status);
256
- // END EPIC MOD
257
- void *alloc(size_t n, bool zero = false);
258
- void word_clear(void *p, uint16 c, uint n);
259
- void prep_in_buffer();
260
- void read_dht_marker();
261
- void read_dqt_marker();
262
- void read_sof_marker();
263
- void skip_variable_marker();
264
- void read_dri_marker();
265
- void read_sos_marker();
266
- int next_marker();
267
- int process_markers();
268
- void locate_soi_marker();
269
- void locate_sof_marker();
270
- int locate_sos_marker();
271
- void init(jpeg_decoder_stream * pStream);
272
- void create_look_ups();
273
- void fix_in_buffer();
274
- void transform_mcu(int mcu_row);
275
- void transform_mcu_expand(int mcu_row);
276
- coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
277
- inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
278
- void load_next_row();
279
- void decode_next_row();
280
- void make_huff_table(int index, huff_tables *pH);
281
- void check_quant_tables();
282
- void check_huff_tables();
283
- void calc_mcu_block_order();
284
- int init_scan();
285
- void init_frame();
286
- void process_restart();
287
- void decode_scan(pDecode_block_func decode_block_func);
288
- void init_progressive();
289
- void init_sequential();
290
- void decode_start();
291
- void decode_init(jpeg_decoder_stream * pStream);
292
- void H2V2Convert();
293
- void H2V1Convert();
294
- void H1V2Convert();
295
- void H1V1Convert();
296
- void gray_convert();
297
- void expanded_convert();
298
- void find_eoi();
299
- inline uint get_char();
300
- inline uint get_char(bool *pPadding_flag);
301
- inline void stuff_char(uint8 q);
302
- inline uint8 get_octet();
303
- inline uint get_bits(int num_bits);
304
- inline uint get_bits_no_markers(int numbits);
305
- inline int huff_decode(huff_tables *pH);
306
- inline int huff_decode(huff_tables *pH, int& extrabits);
307
- static inline uint8 clamp(int i);
308
- static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
309
- static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
310
- static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
311
- static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
312
- };
313
-
314
- } // namespace jpgd
315
-
316
- #endif // JPEG_DECODER_H
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/libJPG/jpge.cpp DELETED
@@ -1,1049 +0,0 @@
1
- // jpge.cpp - C++ class for JPEG compression.
2
- // Public domain, Rich Geldreich <richgel99@gmail.com>
3
- // v1.01, Dec. 18, 2010 - Initial release
4
- // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
5
- // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
6
- // Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
7
- // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
8
- // Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
9
- // Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
10
-
11
- #include "jpge.h"
12
-
13
- #include <stdlib.h>
14
- #include <string.h>
15
- #if PLATFORM_WINDOWS
16
- #include <malloc.h>
17
- #endif
18
-
19
- #define JPGE_MAX(a,b) (((a)>(b))?(a):(b))
20
- #define JPGE_MIN(a,b) (((a)<(b))?(a):(b))
21
-
22
- namespace jpge {
23
-
24
- static inline void *jpge_malloc(size_t nSize) { return FMemory::Malloc(nSize); }
25
- static inline void jpge_free(void *p) { FMemory::Free(p);; }
26
-
27
- // Various JPEG enums and tables.
28
- enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 };
29
- enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 };
30
-
31
- static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
32
- static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 };
33
- static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 };
34
- static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };
35
- static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
36
- static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d };
37
- static uint8 s_ac_lum_val[AC_LUM_CODES] =
38
- {
39
- 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
40
- 0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
41
- 0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
42
- 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
43
- 0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
44
- 0xf9,0xfa
45
- };
46
- static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };
47
- static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
48
- static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 };
49
- static uint8 s_ac_chroma_val[AC_CHROMA_CODES] =
50
- {
51
- 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
52
- 0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
53
- 0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
54
- 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
55
- 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
56
- 0xf9,0xfa
57
- };
58
-
59
- // Low-level helper functions.
60
- template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
61
-
62
- const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329;
63
- static inline uint8 clamp(int i) { if (static_cast<uint>(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast<uint8>(i); }
64
-
65
- static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
66
- {
67
- for ( ; num_pixels; pDst += 3, pSrc += 3, num_pixels--)
68
- {
69
- const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
70
- pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
71
- pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
72
- pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
73
- }
74
- }
75
-
76
- static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
77
- {
78
- for ( ; num_pixels; pDst++, pSrc += 3, num_pixels--)
79
- pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
80
- }
81
-
82
- static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
83
- {
84
- for ( ; num_pixels; pDst += 3, pSrc += 4, num_pixels--)
85
- {
86
- const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
87
- pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
88
- pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
89
- pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
90
- }
91
- }
92
-
93
- static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
94
- {
95
- for ( ; num_pixels; pDst++, pSrc += 4, num_pixels--)
96
- pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
97
- }
98
-
99
- static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
100
- {
101
- for( ; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
102
- }
103
-
104
- // Forward DCT - DCT derived from jfdctint.
105
- #define CONST_BITS 13
106
- #define ROW_BITS 2
107
- #define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n))
108
- #define DCT_MUL(var, c) (static_cast<int16>(var) * static_cast<int32>(c))
109
- #define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \
110
- int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \
111
- int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \
112
- int32 u1 = DCT_MUL(t12 + t13, 4433); \
113
- s2 = u1 + DCT_MUL(t13, 6270); \
114
- s6 = u1 + DCT_MUL(t12, -15137); \
115
- u1 = t4 + t7; \
116
- int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \
117
- int32 z5 = DCT_MUL(u3 + u4, 9633); \
118
- t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \
119
- t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \
120
- u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \
121
- u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \
122
- u3 += z5; u4 += z5; \
123
- s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3;
124
-
125
- static void DCT2D(int32 *p)
126
- {
127
- int32 c, *q = p;
128
- for (c = 7; c >= 0; c--, q += 8)
129
- {
130
- int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
131
- DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
132
- q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
133
- q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
134
- }
135
- for (q = p, c = 7; c >= 0; c--, q++)
136
- {
137
- int32 s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
138
- DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
139
- q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
140
- q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
141
- }
142
- }
143
-
144
- struct sym_freq { uint m_key, m_sym_index; };
145
-
146
- // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
147
- static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1)
148
- {
149
- const uint cMaxPasses = 4;
150
- uint32 hist[256 * cMaxPasses]; clear_obj(hist);
151
- for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256*2 + ((freq >> 16) & 0xFF)]++; hist[256*3 + ((freq >> 24) & 0xFF)]++; }
152
- sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1;
153
- uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
154
- for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
155
- {
156
- const uint32* pHist = &hist[pass << 8];
157
- uint offsets[256], cur_ofs = 0;
158
- for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
159
- for (uint i = 0; i < num_syms; i++)
160
- pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
161
- sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
162
- }
163
- return pCur_syms;
164
- }
165
-
166
- // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
167
- static void calculate_minimum_redundancy(sym_freq *A, int n)
168
- {
169
- int root, leaf, next, avbl, used, dpth;
170
- if (n==0) return; else if (n==1) { A[0].m_key = 1; return; }
171
- A[0].m_key += A[1].m_key; root = 0; leaf = 2;
172
- for (next=1; next < n-1; next++)
173
- {
174
- if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
175
- if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
176
- }
177
- A[n-2].m_key = 0;
178
- for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
179
- avbl = 1; used = dpth = 0; root = n-2; next = n-1;
180
- while (avbl>0)
181
- {
182
- while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; }
183
- while (avbl>used) { A[next--].m_key = dpth; avbl--; }
184
- avbl = 2*used; dpth++; used = 0;
185
- }
186
- }
187
-
188
- // Limits canonical Huffman code table's max code size to max_code_size.
189
- static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
190
- {
191
- if (code_list_len <= 1) return;
192
-
193
- for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
194
-
195
- uint32 total = 0;
196
- for (int i = max_code_size; i > 0; i--)
197
- total += (((uint32)pNum_codes[i]) << (max_code_size - i));
198
-
199
- while (total != (1UL << max_code_size))
200
- {
201
- pNum_codes[max_code_size]--;
202
- for (int i = max_code_size - 1; i > 0; i--)
203
- {
204
- if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
205
- }
206
- total--;
207
- }
208
- }
209
-
210
- // Generates an optimized offman table.
211
- void jpeg_encoder::optimize_huffman_table(int table_num, int table_len)
212
- {
213
- sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS];
214
- syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's
215
- int num_used_syms = 1;
216
- const uint32 *pSym_count = &m_huff_count[table_num][0];
217
- for (int i = 0; i < table_len; i++)
218
- if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; }
219
- sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1);
220
- calculate_minimum_redundancy(pSyms, num_used_syms);
221
-
222
- // Count the # of symbols of each code size.
223
- int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes);
224
- for (int i = 0; i < num_used_syms; i++)
225
- num_codes[pSyms[i].m_key]++;
226
-
227
- const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
228
- huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT);
229
-
230
- // Compute m_huff_bits array, which contains the # of symbols per code size.
231
- clear_obj(m_huff_bits[table_num]);
232
- for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++)
233
- m_huff_bits[table_num][i] = static_cast<uint8>(num_codes[i]);
234
-
235
- // Remove the dummy symbol added above, which must be in largest bucket.
236
- for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--)
237
- {
238
- if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
239
- }
240
-
241
- // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
242
- for (int i = num_used_syms - 1; i >= 1; i--)
243
- m_huff_val[table_num][num_used_syms - 1 - i] = static_cast<uint8>(pSyms[i].m_sym_index - 1);
244
- }
245
-
246
- // JPEG marker generation.
247
- void jpeg_encoder::emit_byte(uint8 i)
248
- {
249
- m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i);
250
- }
251
-
252
- void jpeg_encoder::emit_word(uint i)
253
- {
254
- emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF));
255
- }
256
-
257
- void jpeg_encoder::emit_marker(int marker)
258
- {
259
- emit_byte(uint8(0xFF)); emit_byte(uint8(marker));
260
- }
261
-
262
- // Emit JFIF marker
263
- void jpeg_encoder::emit_jfif_app0()
264
- {
265
- emit_marker(M_APP0);
266
- emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1);
267
- emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
268
- emit_byte(0);
269
- emit_byte(1); /* Major version */
270
- emit_byte(1); /* Minor version */
271
- emit_byte(0); /* Density unit */
272
- emit_word(1);
273
- emit_word(1);
274
- emit_byte(0); /* No thumbnail image */
275
- emit_byte(0);
276
- }
277
-
278
- // Emit quantization tables
279
- void jpeg_encoder::emit_dqt()
280
- {
281
- for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++)
282
- {
283
- emit_marker(M_DQT);
284
- emit_word(64 + 1 + 2);
285
- emit_byte(static_cast<uint8>(i));
286
- for (int j = 0; j < 64; j++)
287
- emit_byte(static_cast<uint8>(m_quantization_tables[i][j]));
288
- }
289
- }
290
-
291
- // Emit start of frame marker
292
- void jpeg_encoder::emit_sof()
293
- {
294
- emit_marker(M_SOF0); /* baseline */
295
- emit_word(3 * m_num_components + 2 + 5 + 1);
296
- emit_byte(8); /* precision */
297
- emit_word(m_image_y);
298
- emit_word(m_image_x);
299
- emit_byte(m_num_components);
300
- for (int i = 0; i < m_num_components; i++)
301
- {
302
- emit_byte(static_cast<uint8>(i + 1)); /* component ID */
303
- emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */
304
- emit_byte(i > 0); /* quant. table num */
305
- }
306
- }
307
-
308
- // Emit Huffman table.
309
- void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag)
310
- {
311
- emit_marker(M_DHT);
312
-
313
- int length = 0;
314
- for (int i = 1; i <= 16; i++)
315
- length += bits[i];
316
-
317
- emit_word(length + 2 + 1 + 16);
318
- emit_byte(static_cast<uint8>(index + (ac_flag << 4)));
319
-
320
- for (int i = 1; i <= 16; i++)
321
- emit_byte(bits[i]);
322
-
323
- for (int i = 0; i < length; i++)
324
- emit_byte(val[i]);
325
- }
326
-
327
- // Emit all Huffman tables.
328
- void jpeg_encoder::emit_dhts()
329
- {
330
- emit_dht(m_huff_bits[0+0], m_huff_val[0+0], 0, false);
331
- emit_dht(m_huff_bits[2+0], m_huff_val[2+0], 0, true);
332
- if (m_num_components == 3)
333
- {
334
- emit_dht(m_huff_bits[0+1], m_huff_val[0+1], 1, false);
335
- emit_dht(m_huff_bits[2+1], m_huff_val[2+1], 1, true);
336
- }
337
- }
338
-
339
- // emit start of scan
340
- void jpeg_encoder::emit_sos()
341
- {
342
- emit_marker(M_SOS);
343
- emit_word(2 * m_num_components + 2 + 1 + 3);
344
- emit_byte(m_num_components);
345
- for (int i = 0; i < m_num_components; i++)
346
- {
347
- emit_byte(static_cast<uint8>(i + 1));
348
- if (i == 0)
349
- emit_byte((0 << 4) + 0);
350
- else
351
- emit_byte((1 << 4) + 1);
352
- }
353
- emit_byte(0); /* spectral selection */
354
- emit_byte(63);
355
- emit_byte(0);
356
- }
357
-
358
- // Emit all markers at beginning of image file.
359
- void jpeg_encoder::emit_markers()
360
- {
361
- emit_marker(M_SOI);
362
- emit_jfif_app0();
363
- emit_dqt();
364
- emit_sof();
365
- emit_dhts();
366
- emit_sos();
367
- }
368
-
369
- // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
370
- void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val)
371
- {
372
- int i, l, last_p, si;
373
- uint8 huff_size[257];
374
- uint huff_code[257];
375
- uint code;
376
-
377
- int p = 0;
378
- for (l = 1; l <= 16; l++)
379
- for (i = 1; i <= bits[l]; i++)
380
- huff_size[p++] = (char)l;
381
-
382
- huff_size[p] = 0; last_p = p; // write sentinel
383
-
384
- code = 0; si = huff_size[0]; p = 0;
385
-
386
- while (huff_size[p])
387
- {
388
- while (huff_size[p] == si)
389
- huff_code[p++] = code++;
390
- code <<= 1;
391
- si++;
392
- }
393
-
394
- memset(codes, 0, sizeof(codes[0])*256);
395
- memset(code_sizes, 0, sizeof(code_sizes[0])*256);
396
- for (p = 0; p < last_p; p++)
397
- {
398
- codes[val[p]] = huff_code[p];
399
- code_sizes[val[p]] = huff_size[p];
400
- }
401
- }
402
-
403
- // Quantization table generation.
404
- void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc)
405
- {
406
- int32 q;
407
- if (m_params.m_quality < 50)
408
- q = 5000 / m_params.m_quality;
409
- else
410
- q = 200 - m_params.m_quality * 2;
411
- for (int i = 0; i < 64; i++)
412
- {
413
- int32 j = *pSrc++; j = (j * q + 50L) / 100L;
414
- *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
415
- }
416
- }
417
-
418
- // Higher-level methods.
419
- void jpeg_encoder::first_pass_init()
420
- {
421
- m_bit_buffer = 0; m_bits_in = 0;
422
- memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0]));
423
- m_mcu_y_ofs = 0;
424
- m_pass_num = 1;
425
- }
426
-
427
- bool jpeg_encoder::second_pass_init()
428
- {
429
- compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0], m_huff_val[0+0]);
430
- compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0], m_huff_val[2+0]);
431
- if (m_num_components > 1)
432
- {
433
- compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1], m_huff_val[0+1]);
434
- compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1], m_huff_val[2+1]);
435
- }
436
- first_pass_init();
437
- emit_markers();
438
- m_pass_num = 2;
439
- return true;
440
- }
441
-
442
- bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels)
443
- {
444
- m_num_components = 3;
445
- switch (m_params.m_subsampling)
446
- {
447
- case Y_ONLY:
448
- {
449
- m_num_components = 1;
450
- m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
451
- m_mcu_x = 8; m_mcu_y = 8;
452
- break;
453
- }
454
- case H1V1:
455
- {
456
- m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
457
- m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
458
- m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
459
- m_mcu_x = 8; m_mcu_y = 8;
460
- break;
461
- }
462
- case H2V1:
463
- {
464
- m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
465
- m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
466
- m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
467
- m_mcu_x = 16; m_mcu_y = 8;
468
- break;
469
- }
470
- case H2V2:
471
- {
472
- m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
473
- m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
474
- m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
475
- m_mcu_x = 16; m_mcu_y = 16;
476
- }
477
- }
478
-
479
- m_image_x = p_x_res; m_image_y = p_y_res;
480
- m_image_bpp = src_channels;
481
- m_image_bpl = m_image_x * src_channels;
482
- m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1));
483
- m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1));
484
- m_image_bpl_xlt = m_image_x * m_num_components;
485
- m_image_bpl_mcu = m_image_x_mcu * m_num_components;
486
- m_mcus_per_row = m_image_x_mcu / m_mcu_x;
487
-
488
- if ((m_mcu_lines[0] = static_cast<uint8*>(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false;
489
- for (int i = 1; i < m_mcu_y; i++)
490
- m_mcu_lines[i] = m_mcu_lines[i-1] + m_image_bpl_mcu;
491
-
492
- compute_quant_table(m_quantization_tables[0], s_std_lum_quant);
493
- compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant);
494
-
495
- m_out_buf_left = JPGE_OUT_BUF_SIZE;
496
- m_pOut_buf = m_out_buf;
497
-
498
- if (m_params.m_two_pass_flag)
499
- {
500
- clear_obj(m_huff_count);
501
- first_pass_init();
502
- }
503
- else
504
- {
505
- memcpy(m_huff_bits[0+0], s_dc_lum_bits, 17); memcpy(m_huff_val [0+0], s_dc_lum_val, DC_LUM_CODES);
506
- memcpy(m_huff_bits[2+0], s_ac_lum_bits, 17); memcpy(m_huff_val [2+0], s_ac_lum_val, AC_LUM_CODES);
507
- memcpy(m_huff_bits[0+1], s_dc_chroma_bits, 17); memcpy(m_huff_val [0+1], s_dc_chroma_val, DC_CHROMA_CODES);
508
- memcpy(m_huff_bits[2+1], s_ac_chroma_bits, 17); memcpy(m_huff_val [2+1], s_ac_chroma_val, AC_CHROMA_CODES);
509
- if (!second_pass_init()) return false; // in effect, skip over the first pass
510
- }
511
- return m_all_stream_writes_succeeded;
512
- }
513
-
514
- void jpeg_encoder::load_block_8_8_grey(int x)
515
- {
516
- uint8 *pSrc;
517
- sample_array_t *pDst = m_sample_array;
518
- x <<= 3;
519
- for (int i = 0; i < 8; i++, pDst += 8)
520
- {
521
- pSrc = m_mcu_lines[i] + x;
522
- pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128;
523
- pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128;
524
- }
525
- }
526
-
527
- void jpeg_encoder::load_block_8_8(int x, int y, int c)
528
- {
529
- uint8 *pSrc;
530
- sample_array_t *pDst = m_sample_array;
531
- x = (x * (8 * 3)) + c;
532
- y <<= 3;
533
- for (int i = 0; i < 8; i++, pDst += 8)
534
- {
535
- pSrc = m_mcu_lines[y + i] + x;
536
- pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128;
537
- pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128;
538
- }
539
- }
540
-
541
- void jpeg_encoder::load_block_16_8(int x, int c)
542
- {
543
- uint8 *pSrc1, *pSrc2;
544
- sample_array_t *pDst = m_sample_array;
545
- x = (x * (16 * 3)) + c;
546
- int a = 0, b = 2;
547
- for (int i = 0; i < 16; i += 2, pDst += 8)
548
- {
549
- pSrc1 = m_mcu_lines[i + 0] + x;
550
- pSrc2 = m_mcu_lines[i + 1] + x;
551
- pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3] + pSrc2[ 0 * 3] + pSrc2[ 1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3] + pSrc2[ 2 * 3] + pSrc2[ 3 * 3] + b) >> 2) - 128;
552
- pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3] + pSrc2[ 4 * 3] + pSrc2[ 5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3] + pSrc2[ 6 * 3] + pSrc2[ 7 * 3] + b) >> 2) - 128;
553
- pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3] + pSrc2[ 8 * 3] + pSrc2[ 9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128;
554
- pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128;
555
- int temp = a; a = b; b = temp;
556
- }
557
- }
558
-
559
- void jpeg_encoder::load_block_16_8_8(int x, int c)
560
- {
561
- uint8 *pSrc1;
562
- sample_array_t *pDst = m_sample_array;
563
- x = (x * (16 * 3)) + c;
564
- for (int i = 0; i < 8; i++, pDst += 8)
565
- {
566
- pSrc1 = m_mcu_lines[i + 0] + x;
567
- pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3]) >> 1) - 128;
568
- pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3]) >> 1) - 128;
569
- pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128;
570
- pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128;
571
- }
572
- }
573
-
574
- void jpeg_encoder::load_quantized_coefficients(int component_num)
575
- {
576
- int32 *q = m_quantization_tables[component_num > 0];
577
- int16 *pDst = m_coefficient_array;
578
- for (int i = 0; i < 64; i++)
579
- {
580
- sample_array_t j = m_sample_array[s_zag[i]];
581
- if (j < 0)
582
- {
583
- if ((j = -j + (*q >> 1)) < *q)
584
- *pDst++ = 0;
585
- else
586
- *pDst++ = static_cast<int16>(-(j / *q));
587
- }
588
- else
589
- {
590
- if ((j = j + (*q >> 1)) < *q)
591
- *pDst++ = 0;
592
- else
593
- *pDst++ = static_cast<int16>((j / *q));
594
- }
595
- q++;
596
- }
597
- }
598
-
599
- void jpeg_encoder::flush_output_buffer()
600
- {
601
- if (m_out_buf_left != JPGE_OUT_BUF_SIZE)
602
- m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left);
603
- m_pOut_buf = m_out_buf;
604
- m_out_buf_left = JPGE_OUT_BUF_SIZE;
605
- }
606
-
607
- void jpeg_encoder::put_bits(uint bits, uint len)
608
- {
609
- m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len)));
610
- while (m_bits_in >= 8)
611
- {
612
- uint8 c;
613
- #define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
614
- JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF));
615
- if (c == 0xFF) JPGE_PUT_BYTE(0);
616
- m_bit_buffer <<= 8;
617
- m_bits_in -= 8;
618
- }
619
- }
620
-
621
- void jpeg_encoder::code_coefficients_pass_one(int component_num)
622
- {
623
- if (component_num >= 3) return; // just to shut up static analysis
624
- int i, run_len, nbits, temp1;
625
- int16 *src = m_coefficient_array;
626
- uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0];
627
-
628
- temp1 = src[0] - m_last_dc_val[component_num];
629
- m_last_dc_val[component_num] = src[0];
630
- if (temp1 < 0) temp1 = -temp1;
631
-
632
- nbits = 0;
633
- while (temp1)
634
- {
635
- nbits++; temp1 >>= 1;
636
- }
637
-
638
- dc_count[nbits]++;
639
- for (run_len = 0, i = 1; i < 64; i++)
640
- {
641
- if ((temp1 = m_coefficient_array[i]) == 0)
642
- run_len++;
643
- else
644
- {
645
- while (run_len >= 16)
646
- {
647
- ac_count[0xF0]++;
648
- run_len -= 16;
649
- }
650
- if (temp1 < 0) temp1 = -temp1;
651
- nbits = 1;
652
- while (temp1 >>= 1) nbits++;
653
- ac_count[(run_len << 4) + nbits]++;
654
- run_len = 0;
655
- }
656
- }
657
- if (run_len) ac_count[0]++;
658
- }
659
-
660
- void jpeg_encoder::code_coefficients_pass_two(int component_num)
661
- {
662
- int i, j, run_len, nbits, temp1, temp2;
663
- int16 *pSrc = m_coefficient_array;
664
- uint *codes[2];
665
- uint8 *code_sizes[2];
666
-
667
- if (component_num == 0)
668
- {
669
- codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0];
670
- code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0];
671
- }
672
- else
673
- {
674
- codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1];
675
- code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1];
676
- }
677
-
678
- temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num];
679
- m_last_dc_val[component_num] = pSrc[0];
680
-
681
- if (temp1 < 0)
682
- {
683
- temp1 = -temp1; temp2--;
684
- }
685
-
686
- nbits = 0;
687
- while (temp1)
688
- {
689
- nbits++; temp1 >>= 1;
690
- }
691
-
692
- put_bits(codes[0][nbits], code_sizes[0][nbits]);
693
- if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits);
694
-
695
- for (run_len = 0, i = 1; i < 64; i++)
696
- {
697
- if ((temp1 = m_coefficient_array[i]) == 0)
698
- run_len++;
699
- else
700
- {
701
- while (run_len >= 16)
702
- {
703
- put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
704
- run_len -= 16;
705
- }
706
- if ((temp2 = temp1) < 0)
707
- {
708
- temp1 = -temp1;
709
- temp2--;
710
- }
711
- nbits = 1;
712
- while (temp1 >>= 1)
713
- nbits++;
714
- j = (run_len << 4) + nbits;
715
- put_bits(codes[1][j], code_sizes[1][j]);
716
- put_bits(temp2 & ((1 << nbits) - 1), nbits);
717
- run_len = 0;
718
- }
719
- }
720
- if (run_len)
721
- put_bits(codes[1][0], code_sizes[1][0]);
722
- }
723
-
724
- void jpeg_encoder::code_block(int component_num)
725
- {
726
- DCT2D(m_sample_array);
727
- load_quantized_coefficients(component_num);
728
- if (m_pass_num == 1)
729
- code_coefficients_pass_one(component_num);
730
- else
731
- code_coefficients_pass_two(component_num);
732
- }
733
-
734
- void jpeg_encoder::process_mcu_row()
735
- {
736
- if (m_num_components == 1)
737
- {
738
- for (int i = 0; i < m_mcus_per_row; i++)
739
- {
740
- load_block_8_8_grey(i); code_block(0);
741
- }
742
- }
743
- else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
744
- {
745
- for (int i = 0; i < m_mcus_per_row; i++)
746
- {
747
- load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
748
- }
749
- }
750
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
751
- {
752
- for (int i = 0; i < m_mcus_per_row; i++)
753
- {
754
- load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
755
- load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
756
- }
757
- }
758
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
759
- {
760
- for (int i = 0; i < m_mcus_per_row; i++)
761
- {
762
- load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
763
- load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0);
764
- load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
765
- }
766
- }
767
- }
768
-
769
- bool jpeg_encoder::terminate_pass_one()
770
- {
771
- optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
772
- if (m_num_components > 1)
773
- {
774
- optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
775
- }
776
- return second_pass_init();
777
- }
778
-
779
- bool jpeg_encoder::terminate_pass_two()
780
- {
781
- put_bits(0x7F, 7);
782
- flush_output_buffer();
783
- emit_marker(M_EOI);
784
- m_pass_num++; // purposely bump up m_pass_num, for debugging
785
- return true;
786
- }
787
-
788
- bool jpeg_encoder::process_end_of_image()
789
- {
790
- if (m_mcu_y_ofs)
791
- {
792
- if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
793
- {
794
- for (int i = m_mcu_y_ofs; i < m_mcu_y; i++)
795
- memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu);
796
- }
797
-
798
- process_mcu_row();
799
- }
800
-
801
- if (m_pass_num == 1)
802
- return terminate_pass_one();
803
- else
804
- return terminate_pass_two();
805
- }
806
-
807
- void jpeg_encoder::load_mcu(const void *pSrc)
808
- {
809
- const uint8* Psrc = reinterpret_cast<const uint8*>(pSrc);
810
-
811
- uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
812
-
813
- if (m_num_components == 1)
814
- {
815
- if (m_image_bpp == 4)
816
- RGBA_to_Y(pDst, Psrc, m_image_x);
817
- else if (m_image_bpp == 3)
818
- RGB_to_Y(pDst, Psrc, m_image_x);
819
- else
820
- memcpy(pDst, Psrc, m_image_x);
821
- }
822
- else
823
- {
824
- if (m_image_bpp == 4)
825
- RGBA_to_YCC(pDst, Psrc, m_image_x);
826
- else if (m_image_bpp == 3)
827
- RGB_to_YCC(pDst, Psrc, m_image_x);
828
- else
829
- Y_to_YCC(pDst, Psrc, m_image_x);
830
- }
831
-
832
- // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
833
- if (m_num_components == 1)
834
- memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x);
835
- else
836
- {
837
- const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2];
838
- uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt;
839
- for (int i = m_image_x; i < m_image_x_mcu; i++)
840
- {
841
- *q++ = y; *q++ = cb; *q++ = cr;
842
- }
843
- }
844
-
845
- if (++m_mcu_y_ofs == m_mcu_y)
846
- {
847
- process_mcu_row();
848
- m_mcu_y_ofs = 0;
849
- }
850
- }
851
-
852
- void jpeg_encoder::clear()
853
- {
854
- m_mcu_lines[0] = NULL;
855
- m_pass_num = 0;
856
- m_all_stream_writes_succeeded = true;
857
- }
858
-
859
- jpeg_encoder::jpeg_encoder()
860
- {
861
- clear();
862
- }
863
-
864
- jpeg_encoder::~jpeg_encoder()
865
- {
866
- deinit();
867
- }
868
-
869
- bool jpeg_encoder::init(output_stream *pStream, int64_t width, int64_t height, int64_t src_channels, const params &comp_params)
870
- {
871
- deinit();
872
- if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check_valid())) return false;
873
- m_pStream = pStream;
874
- m_params = comp_params;
875
- return jpg_open(width, height, src_channels);
876
- }
877
-
878
- void jpeg_encoder::deinit()
879
- {
880
- jpge_free(m_mcu_lines[0]);
881
- clear();
882
- }
883
-
884
- bool jpeg_encoder::process_scanline(const void* pScanline)
885
- {
886
- if ((m_pass_num < 1) || (m_pass_num > 2)) return false;
887
- if (m_all_stream_writes_succeeded)
888
- {
889
- if (!pScanline)
890
- {
891
- if (!process_end_of_image()) return false;
892
- }
893
- else
894
- {
895
- load_mcu(pScanline);
896
- }
897
- }
898
- return m_all_stream_writes_succeeded;
899
- }
900
-
901
- // Higher level wrappers/examples (optional).
902
- #include <stdio.h>
903
-
904
- class cfile_stream : public output_stream
905
- {
906
- cfile_stream(const cfile_stream &);
907
- cfile_stream &operator= (const cfile_stream &);
908
-
909
- FILE* m_pFile;
910
- bool m_bStatus;
911
-
912
- public:
913
- cfile_stream() : m_pFile(NULL), m_bStatus(false) { }
914
-
915
- virtual ~cfile_stream()
916
- {
917
- close();
918
- }
919
-
920
- bool open(const char *pFilename)
921
- {
922
- close();
923
- #if defined(_MSC_VER)
924
- if (fopen_s(&m_pFile, pFilename, "wb") != 0)
925
- {
926
- return false;
927
- }
928
- #else
929
- m_pFile = fopen(pFilename, "wb");
930
- #endif
931
- m_bStatus = (m_pFile != NULL);
932
- return m_bStatus;
933
- }
934
-
935
- bool close()
936
- {
937
- if (m_pFile)
938
- {
939
- if (fclose(m_pFile) == EOF)
940
- {
941
- m_bStatus = false;
942
- }
943
- m_pFile = NULL;
944
- }
945
- return m_bStatus;
946
- }
947
-
948
- virtual bool put_buf(const void* pBuf, int64_t len)
949
- {
950
- m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1);
951
- return m_bStatus;
952
- }
953
-
954
- uint get_size() const
955
- {
956
- return m_pFile ? ftell(m_pFile) : 0;
957
- }
958
- };
959
-
960
- // Writes JPEG image to file.
961
- bool compress_image_to_jpeg_file(const char *pFilename, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params)
962
- {
963
- cfile_stream dst_stream;
964
- if (!dst_stream.open(pFilename))
965
- return false;
966
-
967
- jpge::jpeg_encoder dst_image;
968
- if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
969
- return false;
970
-
971
- for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
972
- {
973
- for (int64_t i = 0; i < height; i++)
974
- {
975
- // i, width, and num_channels are all 64bit
976
- const uint8* pBuf = pImage_data + i * width * num_channels;
977
- if (!dst_image.process_scanline(pBuf))
978
- return false;
979
- }
980
- if (!dst_image.process_scanline(NULL))
981
- return false;
982
- }
983
-
984
- dst_image.deinit();
985
-
986
- return dst_stream.close();
987
- }
988
-
989
- class memory_stream : public output_stream
990
- {
991
- memory_stream(const memory_stream &);
992
- memory_stream &operator= (const memory_stream &);
993
-
994
- uint8 *m_pBuf;
995
- uint64_t m_buf_size, m_buf_ofs;
996
-
997
- public:
998
- memory_stream(void *pBuf, uint64_t buf_size) : m_pBuf(static_cast<uint8*>(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { }
999
-
1000
- virtual ~memory_stream() { }
1001
-
1002
- virtual bool put_buf(const void* pBuf, int64_t len)
1003
- {
1004
- uint64_t buf_remaining = m_buf_size - m_buf_ofs;
1005
- if ((uint64_t)len > buf_remaining)
1006
- return false;
1007
- memcpy(m_pBuf + m_buf_ofs, pBuf, len);
1008
- m_buf_ofs += len;
1009
- return true;
1010
- }
1011
-
1012
- uint64_t get_size() const
1013
- {
1014
- return m_buf_ofs;
1015
- }
1016
- };
1017
-
1018
- bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int64_t &buf_size, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params)
1019
- {
1020
- if ((!pDstBuf) || (!buf_size))
1021
- return false;
1022
-
1023
- memory_stream dst_stream(pDstBuf, buf_size);
1024
-
1025
- buf_size = 0;
1026
-
1027
- jpge::jpeg_encoder dst_image;
1028
- if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
1029
- return false;
1030
-
1031
- for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
1032
- {
1033
- for (int64_t i = 0; i < height; i++)
1034
- {
1035
- const uint8* pScanline = pImage_data + i * width * num_channels;
1036
- if (!dst_image.process_scanline(pScanline))
1037
- return false;
1038
- }
1039
- if (!dst_image.process_scanline(NULL))
1040
- return false;
1041
- }
1042
-
1043
- dst_image.deinit();
1044
-
1045
- buf_size = dst_stream.get_size();
1046
- return true;
1047
- }
1048
-
1049
- } // namespace jpge
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/libJPG/jpge.h DELETED
@@ -1,172 +0,0 @@
1
-
2
- // jpge.h - C++ class for JPEG compression.
3
- // Public domain, Rich Geldreich <richgel99@gmail.com>
4
- // Alex Evans: Added RGBA support, linear memory allocator.
5
- #ifndef JPEG_ENCODER_H
6
- #define JPEG_ENCODER_H
7
-
8
- #include <stdint.h>
9
-
10
- namespace jpge
11
- {
12
- typedef unsigned char uint8;
13
- typedef signed short int16;
14
- typedef signed int int32;
15
- typedef unsigned short uint16;
16
- typedef unsigned int uint32;
17
- typedef unsigned int uint;
18
-
19
- // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
20
- enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 };
21
-
22
- // JPEG compression parameters structure.
23
- struct params
24
- {
25
- inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false) { }
26
-
27
- inline bool check_valid() const
28
- {
29
- if ((m_quality < 1) || (m_quality > 100)) return false;
30
- if ((uint)m_subsampling > (uint)H2V2) return false;
31
- return true;
32
- }
33
-
34
- // Quality: 1-100, higher is better. Typical values are around 50-95.
35
- int m_quality;
36
-
37
- // m_subsampling:
38
- // 0 = Y (grayscale) only
39
- // 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
40
- // 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
41
- // 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
42
- subsampling_t m_subsampling;
43
-
44
- // Disables CbCr discrimination - only intended for testing.
45
- // If true, the Y quantization table is also used for the CbCr channels.
46
- bool m_no_chroma_discrim_flag;
47
-
48
- bool m_two_pass_flag;
49
- };
50
-
51
- // Writes JPEG image to a file.
52
- // num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels.
53
- bool compress_image_to_jpeg_file(const char *pFilename, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params = params());
54
-
55
- // Writes JPEG image to memory buffer.
56
- // On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes.
57
- // If return value is true, buf_size will be set to the size of the compressed data.
58
- bool compress_image_to_jpeg_file_in_memory(void *pBuf, int64_t &buf_size, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params = params());
59
-
60
- // Output stream abstract class - used by the jpeg_encoder class to write to the output stream.
61
- // put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts.
62
- class output_stream
63
- {
64
- public:
65
- virtual ~output_stream() { };
66
- virtual bool put_buf(const void* Pbuf, int64_t len) = 0;
67
- template<class T> inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); }
68
- };
69
-
70
- // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
71
- class jpeg_encoder
72
- {
73
- public:
74
- jpeg_encoder();
75
- ~jpeg_encoder();
76
-
77
- // Initializes the compressor.
78
- // pStream: The stream object to use for writing compressed data.
79
- // params - Compression parameters structure, defined above.
80
- // width, height - Image dimensions.
81
- // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
82
- // Returns false on out of memory or if a stream write fails.
83
- bool init(output_stream *pStream, int64_t width, int64_t height, int64_t src_channels, const params &comp_params = params());
84
-
85
- const params &get_params() const { return m_params; }
86
-
87
- // Deinitializes the compressor, freeing any allocated memory. May be called at any time.
88
- void deinit();
89
-
90
- uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; }
91
- inline uint get_cur_pass() { return m_pass_num; }
92
-
93
- // Call this method with each source scanline.
94
- // width * src_channels bytes per scanline is expected (RGB or Y format).
95
- // You must call with NULL after all scanlines are processed to finish compression.
96
- // Returns false on out of memory or if a stream write fails.
97
- bool process_scanline(const void* pScanline);
98
-
99
- private:
100
- jpeg_encoder(const jpeg_encoder &);
101
- jpeg_encoder &operator =(const jpeg_encoder &);
102
-
103
- typedef int32 sample_array_t;
104
-
105
- output_stream *m_pStream;
106
- params m_params;
107
- uint8 m_num_components;
108
- uint8 m_comp_h_samp[3], m_comp_v_samp[3];
109
- int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
110
- int m_image_x_mcu, m_image_y_mcu;
111
- int m_image_bpl_xlt, m_image_bpl_mcu;
112
- int m_mcus_per_row;
113
- int m_mcu_x, m_mcu_y;
114
- uint8 *m_mcu_lines[16];
115
- uint8 m_mcu_y_ofs;
116
- sample_array_t m_sample_array[64];
117
- int16 m_coefficient_array[64];
118
- int32 m_quantization_tables[2][64];
119
- uint m_huff_codes[4][256];
120
- uint8 m_huff_code_sizes[4][256];
121
- uint8 m_huff_bits[4][17];
122
- uint8 m_huff_val[4][256];
123
- uint32 m_huff_count[4][256];
124
- int m_last_dc_val[3];
125
- enum { JPGE_OUT_BUF_SIZE = 2048 };
126
- uint8 m_out_buf[JPGE_OUT_BUF_SIZE];
127
- uint8 *m_pOut_buf;
128
- uint m_out_buf_left;
129
- uint32 m_bit_buffer;
130
- uint m_bits_in;
131
- uint8 m_pass_num;
132
- bool m_all_stream_writes_succeeded;
133
-
134
- void optimize_huffman_table(int table_num, int table_len);
135
- void emit_byte(uint8 i);
136
- void emit_word(uint i);
137
- void emit_marker(int marker);
138
- void emit_jfif_app0();
139
- void emit_dqt();
140
- void emit_sof();
141
- void emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag);
142
- void emit_dhts();
143
- void emit_sos();
144
- void emit_markers();
145
- void compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val);
146
- void compute_quant_table(int32 *dst, int16 *src);
147
- void adjust_quant_table(int32 *dst, int32 *src);
148
- void first_pass_init();
149
- bool second_pass_init();
150
- bool jpg_open(int p_x_res, int p_y_res, int src_channels);
151
- void load_block_8_8_grey(int x);
152
- void load_block_8_8(int x, int y, int c);
153
- void load_block_16_8(int x, int c);
154
- void load_block_16_8_8(int x, int c);
155
- void load_quantized_coefficients(int component_num);
156
- void flush_output_buffer();
157
- void put_bits(uint bits, uint len);
158
- void code_coefficients_pass_one(int component_num);
159
- void code_coefficients_pass_two(int component_num);
160
- void code_block(int component_num);
161
- void process_mcu_row();
162
- bool terminate_pass_one();
163
- bool terminate_pass_two();
164
- bool process_end_of_image();
165
- void load_mcu(const void* src);
166
- void clear();
167
- void init();
168
- };
169
-
170
- } // namespace jpge
171
-
172
- #endif // JPEG_ENCODER
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/libJPG/来源 DELETED
@@ -1,3 +0,0 @@
1
- jpge.h - C++ class for JPEG compression.
2
- Public domain, Rich Geldreich <richgel99@gmail.com>
3
- Alex Evans: Added RGBA support, linear memory allocator.
 
 
 
 
crazy_functions/test_project/cpp/longcode/jpgd.cpp DELETED
@@ -1,3276 +0,0 @@
1
- // jpgd.cpp - C++ class for JPEG decompression.
2
- // Public domain, Rich Geldreich <richgel99@gmail.com>
3
- // Last updated Apr. 16, 2011
4
- // Alex Evans: Linear memory allocator (taken from jpge.h).
5
- //
6
- // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
7
- //
8
- // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
9
- // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
10
- // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
11
-
12
- #include "jpgd.h"
13
- #include <string.h>
14
-
15
- #include <assert.h>
16
- // BEGIN EPIC MOD
17
- #define JPGD_ASSERT(x) { assert(x); CA_ASSUME(x); } (void)0
18
- // END EPIC MOD
19
-
20
- #ifdef _MSC_VER
21
- #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
22
- #endif
23
-
24
- // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
25
- // This is slower, but results in higher quality on images with highly saturated colors.
26
- #define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
27
-
28
- #define JPGD_TRUE (1)
29
- #define JPGD_FALSE (0)
30
-
31
- #define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
32
- #define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
33
-
34
- namespace jpgd {
35
-
36
- static inline void *jpgd_malloc(size_t nSize) { return FMemory::Malloc(nSize); }
37
- static inline void jpgd_free(void *p) { FMemory::Free(p); }
38
-
39
- // BEGIN EPIC MOD
40
- //@UE3 - use UE3 BGRA encoding instead of assuming RGBA
41
- // stolen from IImageWrapper.h
42
- enum ERGBFormatJPG
43
- {
44
- Invalid = -1,
45
- RGBA = 0,
46
- BGRA = 1,
47
- Gray = 2,
48
- };
49
- static ERGBFormatJPG jpg_format;
50
- // END EPIC MOD
51
-
52
- // DCT coefficients are stored in this sequence.
53
- static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
54
-
55
- enum JPEG_MARKER
56
- {
57
- M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
58
- M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
59
- M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
60
- M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
61
- M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
62
- };
63
-
64
- enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
65
-
66
- #define CONST_BITS 13
67
- #define PASS1_BITS 2
68
- #define SCALEDONE ((int32)1)
69
-
70
- #define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */
71
- #define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */
72
- #define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */
73
- #define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */
74
- #define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */
75
- #define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */
76
- #define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */
77
- #define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */
78
- #define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */
79
- #define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */
80
- #define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */
81
- #define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */
82
-
83
- #define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n))
84
- #define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
85
-
86
- #define MULTIPLY(var, cnst) ((var) * (cnst))
87
-
88
- #define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
89
-
90
- // Compiler creates a fast path 1D IDCT for X non-zero columns
91
- template <int NONZERO_COLS>
92
- struct Row
93
- {
94
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
95
- {
96
- // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
97
- #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
98
-
99
- const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
100
-
101
- const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
102
- const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
103
- const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
104
-
105
- const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
106
- const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
107
-
108
- const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
109
-
110
- const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
111
-
112
- const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
113
- const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
114
-
115
- const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
116
- const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
117
- const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
118
- const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
119
-
120
- const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
121
- const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
122
- const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
123
- const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
124
-
125
- pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
126
- pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
127
- pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
128
- pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
129
- pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
130
- pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
131
- pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
132
- pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
133
- }
134
- };
135
-
136
- template <>
137
- struct Row<0>
138
- {
139
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
140
- {
141
- #ifdef _MSC_VER
142
- pTemp; pSrc;
143
- #endif
144
- }
145
- };
146
-
147
- template <>
148
- struct Row<1>
149
- {
150
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
151
- {
152
- const int dcval = (pSrc[0] << PASS1_BITS);
153
-
154
- pTemp[0] = dcval;
155
- pTemp[1] = dcval;
156
- pTemp[2] = dcval;
157
- pTemp[3] = dcval;
158
- pTemp[4] = dcval;
159
- pTemp[5] = dcval;
160
- pTemp[6] = dcval;
161
- pTemp[7] = dcval;
162
- }
163
- };
164
-
165
- // Compiler creates a fast path 1D IDCT for X non-zero rows
166
- template <int NONZERO_ROWS>
167
- struct Col
168
- {
169
- static void idct(uint8* pDst_ptr, const int* pTemp)
170
- {
171
- // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
172
- #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
173
-
174
- const int z2 = ACCESS_ROW(2);
175
- const int z3 = ACCESS_ROW(6);
176
-
177
- const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
178
- const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
179
- const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
180
-
181
- const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
182
- const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
183
-
184
- const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
185
-
186
- const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
187
-
188
- const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
189
- const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
190
-
191
- const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
192
- const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
193
- const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
194
- const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
195
-
196
- const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
197
- const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
198
- const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
199
- const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
200
-
201
- int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
202
- pDst_ptr[8*0] = (uint8)CLAMP(i);
203
-
204
- i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
205
- pDst_ptr[8*7] = (uint8)CLAMP(i);
206
-
207
- i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
208
- pDst_ptr[8*1] = (uint8)CLAMP(i);
209
-
210
- i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
211
- pDst_ptr[8*6] = (uint8)CLAMP(i);
212
-
213
- i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
214
- pDst_ptr[8*2] = (uint8)CLAMP(i);
215
-
216
- i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
217
- pDst_ptr[8*5] = (uint8)CLAMP(i);
218
-
219
- i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
220
- pDst_ptr[8*3] = (uint8)CLAMP(i);
221
-
222
- i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
223
- pDst_ptr[8*4] = (uint8)CLAMP(i);
224
- }
225
- };
226
-
227
- template <>
228
- struct Col<1>
229
- {
230
- static void idct(uint8* pDst_ptr, const int* pTemp)
231
- {
232
- int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
233
- const uint8 dcval_clamped = (uint8)CLAMP(dcval);
234
- pDst_ptr[0*8] = dcval_clamped;
235
- pDst_ptr[1*8] = dcval_clamped;
236
- pDst_ptr[2*8] = dcval_clamped;
237
- pDst_ptr[3*8] = dcval_clamped;
238
- pDst_ptr[4*8] = dcval_clamped;
239
- pDst_ptr[5*8] = dcval_clamped;
240
- pDst_ptr[6*8] = dcval_clamped;
241
- pDst_ptr[7*8] = dcval_clamped;
242
- }
243
- };
244
-
245
- static const uint8 s_idct_row_table[] =
246
- {
247
- 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
248
- 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
249
- 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
250
- 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
251
- 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
252
- 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
253
- 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
254
- 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
255
- };
256
-
257
- static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
258
-
259
- void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
260
- {
261
- JPGD_ASSERT(block_max_zag >= 1);
262
- JPGD_ASSERT(block_max_zag <= 64);
263
-
264
- if (block_max_zag == 1)
265
- {
266
- int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
267
- k = CLAMP(k);
268
- k = k | (k<<8);
269
- k = k | (k<<16);
270
-
271
- for (int i = 8; i > 0; i--)
272
- {
273
- *(int*)&pDst_ptr[0] = k;
274
- *(int*)&pDst_ptr[4] = k;
275
- pDst_ptr += 8;
276
- }
277
- return;
278
- }
279
-
280
- int temp[64];
281
-
282
- const jpgd_block_t* pSrc = pSrc_ptr;
283
- int* pTemp = temp;
284
-
285
- const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
286
- int i;
287
- for (i = 8; i > 0; i--, pRow_tab++)
288
- {
289
- switch (*pRow_tab)
290
- {
291
- case 0: Row<0>::idct(pTemp, pSrc); break;
292
- case 1: Row<1>::idct(pTemp, pSrc); break;
293
- case 2: Row<2>::idct(pTemp, pSrc); break;
294
- case 3: Row<3>::idct(pTemp, pSrc); break;
295
- case 4: Row<4>::idct(pTemp, pSrc); break;
296
- case 5: Row<5>::idct(pTemp, pSrc); break;
297
- case 6: Row<6>::idct(pTemp, pSrc); break;
298
- case 7: Row<7>::idct(pTemp, pSrc); break;
299
- case 8: Row<8>::idct(pTemp, pSrc); break;
300
- }
301
-
302
- pSrc += 8;
303
- pTemp += 8;
304
- }
305
-
306
- pTemp = temp;
307
-
308
- const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
309
- for (i = 8; i > 0; i--)
310
- {
311
- switch (nonzero_rows)
312
- {
313
- case 1: Col<1>::idct(pDst_ptr, pTemp); break;
314
- case 2: Col<2>::idct(pDst_ptr, pTemp); break;
315
- case 3: Col<3>::idct(pDst_ptr, pTemp); break;
316
- case 4: Col<4>::idct(pDst_ptr, pTemp); break;
317
- case 5: Col<5>::idct(pDst_ptr, pTemp); break;
318
- case 6: Col<6>::idct(pDst_ptr, pTemp); break;
319
- case 7: Col<7>::idct(pDst_ptr, pTemp); break;
320
- case 8: Col<8>::idct(pDst_ptr, pTemp); break;
321
- }
322
-
323
- pTemp++;
324
- pDst_ptr++;
325
- }
326
- }
327
-
328
- void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
329
- {
330
- int temp[64];
331
- int* pTemp = temp;
332
- const jpgd_block_t* pSrc = pSrc_ptr;
333
-
334
- for (int i = 4; i > 0; i--)
335
- {
336
- Row<4>::idct(pTemp, pSrc);
337
- pSrc += 8;
338
- pTemp += 8;
339
- }
340
-
341
- pTemp = temp;
342
- for (int i = 8; i > 0; i--)
343
- {
344
- Col<4>::idct(pDst_ptr, pTemp);
345
- pTemp++;
346
- pDst_ptr++;
347
- }
348
- }
349
-
350
- // Retrieve one character from the input stream.
351
- inline uint jpeg_decoder::get_char()
352
- {
353
- // Any bytes remaining in buffer?
354
- if (!m_in_buf_left)
355
- {
356
- // Try to get more bytes.
357
- prep_in_buffer();
358
- // Still nothing to get?
359
- if (!m_in_buf_left)
360
- {
361
- // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
362
- int t = m_tem_flag;
363
- m_tem_flag ^= 1;
364
- if (t)
365
- return 0xD9;
366
- else
367
- return 0xFF;
368
- }
369
- }
370
-
371
- uint c = *m_pIn_buf_ofs++;
372
- m_in_buf_left--;
373
-
374
- return c;
375
- }
376
-
377
- // Same as previous method, except can indicate if the character is a pad character or not.
378
- inline uint jpeg_decoder::get_char(bool *pPadding_flag)
379
- {
380
- if (!m_in_buf_left)
381
- {
382
- prep_in_buffer();
383
- if (!m_in_buf_left)
384
- {
385
- *pPadding_flag = true;
386
- int t = m_tem_flag;
387
- m_tem_flag ^= 1;
388
- if (t)
389
- return 0xD9;
390
- else
391
- return 0xFF;
392
- }
393
- }
394
-
395
- *pPadding_flag = false;
396
-
397
- uint c = *m_pIn_buf_ofs++;
398
- m_in_buf_left--;
399
-
400
- return c;
401
- }
402
-
403
- // Inserts a previously retrieved character back into the input buffer.
404
- inline void jpeg_decoder::stuff_char(uint8 q)
405
- {
406
- *(--m_pIn_buf_ofs) = q;
407
- m_in_buf_left++;
408
- }
409
-
410
- // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
411
- inline uint8 jpeg_decoder::get_octet()
412
- {
413
- bool padding_flag;
414
- int c = get_char(&padding_flag);
415
-
416
- if (c == 0xFF)
417
- {
418
- if (padding_flag)
419
- return 0xFF;
420
-
421
- c = get_char(&padding_flag);
422
- if (padding_flag)
423
- {
424
- stuff_char(0xFF);
425
- return 0xFF;
426
- }
427
-
428
- if (c == 0x00)
429
- return 0xFF;
430
- else
431
- {
432
- stuff_char(static_cast<uint8>(c));
433
- stuff_char(0xFF);
434
- return 0xFF;
435
- }
436
- }
437
-
438
- return static_cast<uint8>(c);
439
- }
440
-
441
- // Retrieves a variable number of bits from the input stream. Does not recognize markers.
442
- inline uint jpeg_decoder::get_bits(int num_bits)
443
- {
444
- if (!num_bits)
445
- return 0;
446
-
447
- uint i = m_bit_buf >> (32 - num_bits);
448
-
449
- if ((m_bits_left -= num_bits) <= 0)
450
- {
451
- m_bit_buf <<= (num_bits += m_bits_left);
452
-
453
- uint c1 = get_char();
454
- uint c2 = get_char();
455
- m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
456
-
457
- m_bit_buf <<= -m_bits_left;
458
-
459
- m_bits_left += 16;
460
-
461
- JPGD_ASSERT(m_bits_left >= 0);
462
- }
463
- else
464
- m_bit_buf <<= num_bits;
465
-
466
- return i;
467
- }
468
-
469
- // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
470
- inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
471
- {
472
- if (!num_bits)
473
- return 0;
474
-
475
- uint i = m_bit_buf >> (32 - num_bits);
476
-
477
- if ((m_bits_left -= num_bits) <= 0)
478
- {
479
- m_bit_buf <<= (num_bits += m_bits_left);
480
-
481
- if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
482
- {
483
- uint c1 = get_octet();
484
- uint c2 = get_octet();
485
- m_bit_buf |= (c1 << 8) | c2;
486
- }
487
- else
488
- {
489
- m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
490
- m_in_buf_left -= 2;
491
- m_pIn_buf_ofs += 2;
492
- }
493
-
494
- m_bit_buf <<= -m_bits_left;
495
-
496
- m_bits_left += 16;
497
-
498
- JPGD_ASSERT(m_bits_left >= 0);
499
- }
500
- else
501
- m_bit_buf <<= num_bits;
502
-
503
- return i;
504
- }
505
-
506
- // Decodes a Huffman encoded symbol.
507
- inline int jpeg_decoder::huff_decode(huff_tables *pH)
508
- {
509
- int symbol;
510
-
511
- // Check first 8-bits: do we have a complete symbol?
512
- if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
513
- {
514
- // Decode more bits, use a tree traversal to find symbol.
515
- int ofs = 23;
516
- do
517
- {
518
- symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
519
- ofs--;
520
- } while (symbol < 0);
521
-
522
- get_bits_no_markers(8 + (23 - ofs));
523
- }
524
- else
525
- get_bits_no_markers(pH->code_size[symbol]);
526
-
527
- return symbol;
528
- }
529
-
530
- // Decodes a Huffman encoded symbol.
531
- inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
532
- {
533
- int symbol;
534
-
535
- // Check first 8-bits: do we have a complete symbol?
536
- if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
537
- {
538
- // Use a tree traversal to find symbol.
539
- int ofs = 23;
540
- do
541
- {
542
- symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
543
- ofs--;
544
- } while (symbol < 0);
545
-
546
- get_bits_no_markers(8 + (23 - ofs));
547
-
548
- extra_bits = get_bits_no_markers(symbol & 0xF);
549
- }
550
- else
551
- {
552
- JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
553
-
554
- if (symbol & 0x8000)
555
- {
556
- get_bits_no_markers((symbol >> 8) & 31);
557
- extra_bits = symbol >> 16;
558
- }
559
- else
560
- {
561
- int code_size = (symbol >> 8) & 31;
562
- int num_extra_bits = symbol & 0xF;
563
- int bits = code_size + num_extra_bits;
564
- if (bits <= (m_bits_left + 16))
565
- extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
566
- else
567
- {
568
- get_bits_no_markers(code_size);
569
- extra_bits = get_bits_no_markers(num_extra_bits);
570
- }
571
- }
572
-
573
- symbol &= 0xFF;
574
- }
575
-
576
- return symbol;
577
- }
578
-
579
- // Tables and macro used to fully decode the DPCM differences.
580
- static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
581
- static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 };
582
- static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) };
583
- #define HUFF_EXTEND(x,s) ((x) < s_extend_test[s] ? (x) + s_extend_offset[s] : (x))
584
-
585
- // Clamps a value between 0-255.
586
- inline uint8 jpeg_decoder::clamp(int i)
587
- {
588
- if (static_cast<uint>(i) > 255)
589
- i = (((~i) >> 31) & 0xFF);
590
-
591
- return static_cast<uint8>(i);
592
- }
593
-
594
- namespace DCT_Upsample
595
- {
596
- struct Matrix44
597
- {
598
- typedef int Element_Type;
599
- enum { NUM_ROWS = 4, NUM_COLS = 4 };
600
-
601
- Element_Type v[NUM_ROWS][NUM_COLS];
602
-
603
- inline int rows() const { return NUM_ROWS; }
604
- inline int cols() const { return NUM_COLS; }
605
-
606
- inline const Element_Type & at(int r, int c) const { return v[r][c]; }
607
- inline Element_Type & at(int r, int c) { return v[r][c]; }
608
-
609
- inline Matrix44() { }
610
-
611
- inline Matrix44& operator += (const Matrix44& a)
612
- {
613
- for (int r = 0; r < NUM_ROWS; r++)
614
- {
615
- at(r, 0) += a.at(r, 0);
616
- at(r, 1) += a.at(r, 1);
617
- at(r, 2) += a.at(r, 2);
618
- at(r, 3) += a.at(r, 3);
619
- }
620
- return *this;
621
- }
622
-
623
- inline Matrix44& operator -= (const Matrix44& a)
624
- {
625
- for (int r = 0; r < NUM_ROWS; r++)
626
- {
627
- at(r, 0) -= a.at(r, 0);
628
- at(r, 1) -= a.at(r, 1);
629
- at(r, 2) -= a.at(r, 2);
630
- at(r, 3) -= a.at(r, 3);
631
- }
632
- return *this;
633
- }
634
-
635
- friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
636
- {
637
- Matrix44 ret;
638
- for (int r = 0; r < NUM_ROWS; r++)
639
- {
640
- ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
641
- ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
642
- ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
643
- ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
644
- }
645
- return ret;
646
- }
647
-
648
- friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
649
- {
650
- Matrix44 ret;
651
- for (int r = 0; r < NUM_ROWS; r++)
652
- {
653
- ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
654
- ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
655
- ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
656
- ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
657
- }
658
- return ret;
659
- }
660
-
661
- static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
662
- {
663
- for (int r = 0; r < 4; r++)
664
- {
665
- pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
666
- pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
667
- pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
668
- pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
669
- }
670
- }
671
-
672
- static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
673
- {
674
- for (int r = 0; r < 4; r++)
675
- {
676
- pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
677
- pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
678
- pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
679
- pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
680
- }
681
- }
682
- };
683
-
684
- const int FRACT_BITS = 10;
685
- const int SCALE = 1 << FRACT_BITS;
686
-
687
- typedef int Temp_Type;
688
- #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
689
- #define F(i) ((int)((i) * SCALE + .5f))
690
-
691
- // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
692
- #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
693
-
694
- // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
695
- template<int NUM_ROWS, int NUM_COLS>
696
- struct P_Q
697
- {
698
- static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
699
- {
700
- // 4x8 = 4x8 times 8x8, matrix 0 is constant
701
- const Temp_Type X000 = AT(0, 0);
702
- const Temp_Type X001 = AT(0, 1);
703
- const Temp_Type X002 = AT(0, 2);
704
- const Temp_Type X003 = AT(0, 3);
705
- const Temp_Type X004 = AT(0, 4);
706
- const Temp_Type X005 = AT(0, 5);
707
- const Temp_Type X006 = AT(0, 6);
708
- const Temp_Type X007 = AT(0, 7);
709
- const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
710
- const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
711
- const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
712
- const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
713
- const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
714
- const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
715
- const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
716
- const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
717
- const Temp_Type X020 = AT(4, 0);
718
- const Temp_Type X021 = AT(4, 1);
719
- const Temp_Type X022 = AT(4, 2);
720
- const Temp_Type X023 = AT(4, 3);
721
- const Temp_Type X024 = AT(4, 4);
722
- const Temp_Type X025 = AT(4, 5);
723
- const Temp_Type X026 = AT(4, 6);
724
- const Temp_Type X027 = AT(4, 7);
725
- const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
726
- const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
727
- const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
728
- const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
729
- const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
730
- const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
731
- const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
732
- const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
733
-
734
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
735
- P.at(0, 0) = X000;
736
- P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
737
- P.at(0, 2) = X004;
738
- P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
739
- P.at(1, 0) = X010;
740
- P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
741
- P.at(1, 2) = X014;
742
- P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
743
- P.at(2, 0) = X020;
744
- P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
745
- P.at(2, 2) = X024;
746
- P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
747
- P.at(3, 0) = X030;
748
- P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
749
- P.at(3, 2) = X034;
750
- P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
751
- // 40 muls 24 adds
752
-
753
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
754
- Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
755
- Q.at(0, 1) = X002;
756
- Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
757
- Q.at(0, 3) = X006;
758
- Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
759
- Q.at(1, 1) = X012;
760
- Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
761
- Q.at(1, 3) = X016;
762
- Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
763
- Q.at(2, 1) = X022;
764
- Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
765
- Q.at(2, 3) = X026;
766
- Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
767
- Q.at(3, 1) = X032;
768
- Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
769
- Q.at(3, 3) = X036;
770
- // 40 muls 24 adds
771
- }
772
- };
773
-
774
- template<int NUM_ROWS, int NUM_COLS>
775
- struct R_S
776
- {
777
- static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
778
- {
779
- // 4x8 = 4x8 times 8x8, matrix 0 is constant
780
- const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
781
- const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
782
- const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
783
- const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
784
- const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
785
- const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
786
- const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
787
- const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
788
- const Temp_Type X110 = AT(2, 0);
789
- const Temp_Type X111 = AT(2, 1);
790
- const Temp_Type X112 = AT(2, 2);
791
- const Temp_Type X113 = AT(2, 3);
792
- const Temp_Type X114 = AT(2, 4);
793
- const Temp_Type X115 = AT(2, 5);
794
- const Temp_Type X116 = AT(2, 6);
795
- const Temp_Type X117 = AT(2, 7);
796
- const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
797
- const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
798
- const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
799
- const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
800
- const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
801
- const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
802
- const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
803
- const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
804
- const Temp_Type X130 = AT(6, 0);
805
- const Temp_Type X131 = AT(6, 1);
806
- const Temp_Type X132 = AT(6, 2);
807
- const Temp_Type X133 = AT(6, 3);
808
- const Temp_Type X134 = AT(6, 4);
809
- const Temp_Type X135 = AT(6, 5);
810
- const Temp_Type X136 = AT(6, 6);
811
- const Temp_Type X137 = AT(6, 7);
812
- // 80 muls 48 adds
813
-
814
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
815
- R.at(0, 0) = X100;
816
- R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
817
- R.at(0, 2) = X104;
818
- R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
819
- R.at(1, 0) = X110;
820
- R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
821
- R.at(1, 2) = X114;
822
- R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
823
- R.at(2, 0) = X120;
824
- R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
825
- R.at(2, 2) = X124;
826
- R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
827
- R.at(3, 0) = X130;
828
- R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
829
- R.at(3, 2) = X134;
830
- R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
831
- // 40 muls 24 adds
832
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
833
- S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
834
- S.at(0, 1) = X102;
835
- S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
836
- S.at(0, 3) = X106;
837
- S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
838
- S.at(1, 1) = X112;
839
- S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
840
- S.at(1, 3) = X116;
841
- S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
842
- S.at(2, 1) = X122;
843
- S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
844
- S.at(2, 3) = X126;
845
- S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
846
- S.at(3, 1) = X132;
847
- S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
848
- S.at(3, 3) = X136;
849
- // 40 muls 24 adds
850
- }
851
- };
852
- } // end namespace DCT_Upsample
853
-
854
- // Unconditionally frees all allocated m_blocks.
855
- void jpeg_decoder::free_all_blocks()
856
- {
857
- m_pStream = NULL;
858
- for (mem_block *b = m_pMem_blocks; b; )
859
- {
860
- mem_block *n = b->m_pNext;
861
- jpgd_free(b);
862
- b = n;
863
- }
864
- m_pMem_blocks = NULL;
865
- }
866
-
867
- // This method handles all errors.
868
- // It could easily be changed to use C++ exceptions.
869
- void jpeg_decoder::stop_decoding(jpgd_status status)
870
- {
871
- m_error_code = status;
872
- free_all_blocks();
873
- longjmp(m_jmp_state, status);
874
-
875
- // we shouldn't get here as longjmp shouldn't return, but we put it here to make it explicit
876
- // that this function doesn't return, otherwise we get this error:
877
- //
878
- // error : function declared 'noreturn' should not return
879
- exit(1);
880
- }
881
-
882
- void *jpeg_decoder::alloc(size_t nSize, bool zero)
883
- {
884
- nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
885
- char *rv = NULL;
886
- for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext)
887
- {
888
- if ((b->m_used_count + nSize) <= b->m_size)
889
- {
890
- rv = b->m_data + b->m_used_count;
891
- b->m_used_count += nSize;
892
- break;
893
- }
894
- }
895
- if (!rv)
896
- {
897
- int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
898
- mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
899
- if (!b) stop_decoding(JPGD_NOTENOUGHMEM);
900
- b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
901
- b->m_used_count = nSize;
902
- b->m_size = capacity;
903
- rv = b->m_data;
904
- }
905
- if (zero) memset(rv, 0, nSize);
906
- return rv;
907
- }
908
-
909
- void jpeg_decoder::word_clear(void *p, uint16 c, uint n)
910
- {
911
- uint8 *pD = (uint8*)p;
912
- const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
913
- while (n)
914
- {
915
- pD[0] = l; pD[1] = h; pD += 2;
916
- n--;
917
- }
918
- }
919
-
920
- // Refill the input buffer.
921
- // This method will sit in a loop until (A) the buffer is full or (B)
922
- // the stream's read() method reports and end of file condition.
923
- void jpeg_decoder::prep_in_buffer()
924
- {
925
- m_in_buf_left = 0;
926
- m_pIn_buf_ofs = m_in_buf;
927
-
928
- if (m_eof_flag)
929
- return;
930
-
931
- do
932
- {
933
- int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
934
- if (bytes_read == -1)
935
- stop_decoding(JPGD_STREAM_READ);
936
-
937
- m_in_buf_left += bytes_read;
938
- } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
939
-
940
- m_total_bytes_read += m_in_buf_left;
941
-
942
- // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
943
- // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
944
- word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
945
- }
946
-
947
- // Read a Huffman code table.
948
- void jpeg_decoder::read_dht_marker()
949
- {
950
- int i, index, count;
951
- uint8 huff_num[17];
952
- uint8 huff_val[256];
953
-
954
- uint num_left = get_bits(16);
955
-
956
- if (num_left < 2)
957
- stop_decoding(JPGD_BAD_DHT_MARKER);
958
-
959
- num_left -= 2;
960
-
961
- while (num_left)
962
- {
963
- index = get_bits(8);
964
-
965
- huff_num[0] = 0;
966
-
967
- count = 0;
968
-
969
- for (i = 1; i <= 16; i++)
970
- {
971
- huff_num[i] = static_cast<uint8>(get_bits(8));
972
- count += huff_num[i];
973
- }
974
-
975
- if (count > 255)
976
- stop_decoding(JPGD_BAD_DHT_COUNTS);
977
-
978
- for (i = 0; i < count; i++)
979
- huff_val[i] = static_cast<uint8>(get_bits(8));
980
-
981
- i = 1 + 16 + count;
982
-
983
- if (num_left < (uint)i)
984
- stop_decoding(JPGD_BAD_DHT_MARKER);
985
-
986
- num_left -= i;
987
-
988
- if ((index & 0x10) > 0x10)
989
- stop_decoding(JPGD_BAD_DHT_INDEX);
990
-
991
- index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
992
-
993
- if (index >= JPGD_MAX_HUFF_TABLES)
994
- stop_decoding(JPGD_BAD_DHT_INDEX);
995
-
996
- if (!m_huff_num[index])
997
- m_huff_num[index] = (uint8 *)alloc(17);
998
-
999
- if (!m_huff_val[index])
1000
- m_huff_val[index] = (uint8 *)alloc(256);
1001
-
1002
- m_huff_ac[index] = (index & 0x10) != 0;
1003
- memcpy(m_huff_num[index], huff_num, 17);
1004
- memcpy(m_huff_val[index], huff_val, 256);
1005
- }
1006
- }
1007
-
1008
- // Read a quantization table.
1009
- void jpeg_decoder::read_dqt_marker()
1010
- {
1011
- int n, i, prec;
1012
- uint num_left;
1013
- uint temp;
1014
-
1015
- num_left = get_bits(16);
1016
-
1017
- if (num_left < 2)
1018
- stop_decoding(JPGD_BAD_DQT_MARKER);
1019
-
1020
- num_left -= 2;
1021
-
1022
- while (num_left)
1023
- {
1024
- n = get_bits(8);
1025
- prec = n >> 4;
1026
- n &= 0x0F;
1027
-
1028
- if (n >= JPGD_MAX_QUANT_TABLES)
1029
- stop_decoding(JPGD_BAD_DQT_TABLE);
1030
-
1031
- if (!m_quant[n])
1032
- m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
1033
-
1034
- // read quantization entries, in zag order
1035
- for (i = 0; i < 64; i++)
1036
- {
1037
- temp = get_bits(8);
1038
-
1039
- if (prec)
1040
- temp = (temp << 8) + get_bits(8);
1041
-
1042
- m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
1043
- }
1044
-
1045
- i = 64 + 1;
1046
-
1047
- if (prec)
1048
- i += 64;
1049
-
1050
- if (num_left < (uint)i)
1051
- stop_decoding(JPGD_BAD_DQT_LENGTH);
1052
-
1053
- num_left -= i;
1054
- }
1055
- }
1056
-
1057
- // Read the start of frame (SOF) marker.
1058
- void jpeg_decoder::read_sof_marker()
1059
- {
1060
- int i;
1061
- uint num_left;
1062
-
1063
- num_left = get_bits(16);
1064
-
1065
- if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */
1066
- stop_decoding(JPGD_BAD_PRECISION);
1067
-
1068
- m_image_y_size = get_bits(16);
1069
-
1070
- if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1071
- stop_decoding(JPGD_BAD_HEIGHT);
1072
-
1073
- m_image_x_size = get_bits(16);
1074
-
1075
- if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1076
- stop_decoding(JPGD_BAD_WIDTH);
1077
-
1078
- m_comps_in_frame = get_bits(8);
1079
-
1080
- if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1081
- stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1082
-
1083
- if (num_left != (uint)(m_comps_in_frame * 3 + 8))
1084
- stop_decoding(JPGD_BAD_SOF_LENGTH);
1085
-
1086
- for (i = 0; i < m_comps_in_frame; i++)
1087
- {
1088
- m_comp_ident[i] = get_bits(8);
1089
- m_comp_h_samp[i] = get_bits(4);
1090
- m_comp_v_samp[i] = get_bits(4);
1091
- m_comp_quant[i] = get_bits(8);
1092
- }
1093
- }
1094
-
1095
- // Used to skip unrecognized markers.
1096
- void jpeg_decoder::skip_variable_marker()
1097
- {
1098
- uint num_left;
1099
-
1100
- num_left = get_bits(16);
1101
-
1102
- if (num_left < 2)
1103
- stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1104
-
1105
- num_left -= 2;
1106
-
1107
- while (num_left)
1108
- {
1109
- get_bits(8);
1110
- num_left--;
1111
- }
1112
- }
1113
-
1114
- // Read a define restart interval (DRI) marker.
1115
- void jpeg_decoder::read_dri_marker()
1116
- {
1117
- if (get_bits(16) != 4)
1118
- stop_decoding(JPGD_BAD_DRI_LENGTH);
1119
-
1120
- m_restart_interval = get_bits(16);
1121
- }
1122
-
1123
- // Read a start of scan (SOS) marker.
1124
- void jpeg_decoder::read_sos_marker()
1125
- {
1126
- uint num_left;
1127
- int i, ci, n, c, cc;
1128
-
1129
- num_left = get_bits(16);
1130
-
1131
- n = get_bits(8);
1132
-
1133
- m_comps_in_scan = n;
1134
-
1135
- num_left -= 3;
1136
-
1137
- if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1138
- stop_decoding(JPGD_BAD_SOS_LENGTH);
1139
-
1140
- for (i = 0; i < n; i++)
1141
- {
1142
- cc = get_bits(8);
1143
- c = get_bits(8);
1144
- num_left -= 2;
1145
-
1146
- for (ci = 0; ci < m_comps_in_frame; ci++)
1147
- if (cc == m_comp_ident[ci])
1148
- break;
1149
-
1150
- if (ci >= m_comps_in_frame)
1151
- stop_decoding(JPGD_BAD_SOS_COMP_ID);
1152
-
1153
- m_comp_list[i] = ci;
1154
- m_comp_dc_tab[ci] = (c >> 4) & 15;
1155
- m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1156
- }
1157
-
1158
- m_spectral_start = get_bits(8);
1159
- m_spectral_end = get_bits(8);
1160
- m_successive_high = get_bits(4);
1161
- m_successive_low = get_bits(4);
1162
-
1163
- if (!m_progressive_flag)
1164
- {
1165
- m_spectral_start = 0;
1166
- m_spectral_end = 63;
1167
- }
1168
-
1169
- num_left -= 3;
1170
-
1171
- while (num_left) /* read past whatever is num_left */
1172
- {
1173
- get_bits(8);
1174
- num_left--;
1175
- }
1176
- }
1177
-
1178
- // Finds the next marker.
1179
- int jpeg_decoder::next_marker()
1180
- {
1181
- uint c, bytes;
1182
-
1183
- bytes = 0;
1184
-
1185
- do
1186
- {
1187
- do
1188
- {
1189
- bytes++;
1190
- c = get_bits(8);
1191
- } while (c != 0xFF);
1192
-
1193
- do
1194
- {
1195
- c = get_bits(8);
1196
- } while (c == 0xFF);
1197
-
1198
- } while (c == 0);
1199
-
1200
- // If bytes > 0 here, there where extra bytes before the marker (not good).
1201
-
1202
- return c;
1203
- }
1204
-
1205
- // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1206
- // encountered.
1207
- int jpeg_decoder::process_markers()
1208
- {
1209
- int c;
1210
-
1211
- for ( ; ; )
1212
- {
1213
- c = next_marker();
1214
-
1215
- switch (c)
1216
- {
1217
- case M_SOF0:
1218
- case M_SOF1:
1219
- case M_SOF2:
1220
- case M_SOF3:
1221
- case M_SOF5:
1222
- case M_SOF6:
1223
- case M_SOF7:
1224
- // case M_JPG:
1225
- case M_SOF9:
1226
- case M_SOF10:
1227
- case M_SOF11:
1228
- case M_SOF13:
1229
- case M_SOF14:
1230
- case M_SOF15:
1231
- case M_SOI:
1232
- case M_EOI:
1233
- case M_SOS:
1234
- {
1235
- return c;
1236
- }
1237
- case M_DHT:
1238
- {
1239
- read_dht_marker();
1240
- break;
1241
- }
1242
- // No arithmitic support - dumb patents!
1243
- case M_DAC:
1244
- {
1245
- stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1246
- break;
1247
- }
1248
- case M_DQT:
1249
- {
1250
- read_dqt_marker();
1251
- break;
1252
- }
1253
- case M_DRI:
1254
- {
1255
- read_dri_marker();
1256
- break;
1257
- }
1258
- //case M_APP0: /* no need to read the JFIF marker */
1259
-
1260
- case M_JPG:
1261
- case M_RST0: /* no parameters */
1262
- case M_RST1:
1263
- case M_RST2:
1264
- case M_RST3:
1265
- case M_RST4:
1266
- case M_RST5:
1267
- case M_RST6:
1268
- case M_RST7:
1269
- case M_TEM:
1270
- {
1271
- stop_decoding(JPGD_UNEXPECTED_MARKER);
1272
- break;
1273
- }
1274
- default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1275
- {
1276
- skip_variable_marker();
1277
- break;
1278
- }
1279
- }
1280
- }
1281
- }
1282
-
1283
- // Finds the start of image (SOI) marker.
1284
- // This code is rather defensive: it only checks the first 512 bytes to avoid
1285
- // false positives.
1286
- void jpeg_decoder::locate_soi_marker()
1287
- {
1288
- uint lastchar, thischar;
1289
- uint bytesleft;
1290
-
1291
- lastchar = get_bits(8);
1292
-
1293
- thischar = get_bits(8);
1294
-
1295
- /* ok if it's a normal JPEG file without a special header */
1296
-
1297
- if ((lastchar == 0xFF) && (thischar == M_SOI))
1298
- return;
1299
-
1300
- bytesleft = 4096; //512;
1301
-
1302
- for ( ; ; )
1303
- {
1304
- if (--bytesleft == 0)
1305
- stop_decoding(JPGD_NOT_JPEG);
1306
-
1307
- lastchar = thischar;
1308
-
1309
- thischar = get_bits(8);
1310
-
1311
- if (lastchar == 0xFF)
1312
- {
1313
- if (thischar == M_SOI)
1314
- break;
1315
- else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1316
- stop_decoding(JPGD_NOT_JPEG);
1317
- }
1318
- }
1319
-
1320
- // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1321
- thischar = (m_bit_buf >> 24) & 0xFF;
1322
-
1323
- if (thischar != 0xFF)
1324
- stop_decoding(JPGD_NOT_JPEG);
1325
- }
1326
-
1327
- // Find a start of frame (SOF) marker.
1328
- void jpeg_decoder::locate_sof_marker()
1329
- {
1330
- locate_soi_marker();
1331
-
1332
- int c = process_markers();
1333
-
1334
- switch (c)
1335
- {
1336
- case M_SOF2:
1337
- m_progressive_flag = JPGD_TRUE;
1338
- case M_SOF0: /* baseline DCT */
1339
- case M_SOF1: /* extended sequential DCT */
1340
- {
1341
- read_sof_marker();
1342
- break;
1343
- }
1344
- case M_SOF9: /* Arithmitic coding */
1345
- {
1346
- stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1347
- break;
1348
- }
1349
- default:
1350
- {
1351
- stop_decoding(JPGD_UNSUPPORTED_MARKER);
1352
- break;
1353
- }
1354
- }
1355
- }
1356
-
1357
- // Find a start of scan (SOS) marker.
1358
- int jpeg_decoder::locate_sos_marker()
1359
- {
1360
- int c;
1361
-
1362
- c = process_markers();
1363
-
1364
- if (c == M_EOI)
1365
- return JPGD_FALSE;
1366
- else if (c != M_SOS)
1367
- stop_decoding(JPGD_UNEXPECTED_MARKER);
1368
-
1369
- read_sos_marker();
1370
-
1371
- return JPGD_TRUE;
1372
- }
1373
-
1374
- // Reset everything to default/uninitialized state.
1375
- void jpeg_decoder::init(jpeg_decoder_stream *pStream)
1376
- {
1377
- m_pMem_blocks = NULL;
1378
- m_error_code = JPGD_SUCCESS;
1379
- m_ready_flag = false;
1380
- m_image_x_size = m_image_y_size = 0;
1381
- m_pStream = pStream;
1382
- m_progressive_flag = JPGD_FALSE;
1383
-
1384
- memset(m_huff_ac, 0, sizeof(m_huff_ac));
1385
- memset(m_huff_num, 0, sizeof(m_huff_num));
1386
- memset(m_huff_val, 0, sizeof(m_huff_val));
1387
- memset(m_quant, 0, sizeof(m_quant));
1388
-
1389
- m_scan_type = 0;
1390
- m_comps_in_frame = 0;
1391
-
1392
- memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
1393
- memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
1394
- memset(m_comp_quant, 0, sizeof(m_comp_quant));
1395
- memset(m_comp_ident, 0, sizeof(m_comp_ident));
1396
- memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
1397
- memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
1398
-
1399
- m_comps_in_scan = 0;
1400
- memset(m_comp_list, 0, sizeof(m_comp_list));
1401
- memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
1402
- memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
1403
-
1404
- m_spectral_start = 0;
1405
- m_spectral_end = 0;
1406
- m_successive_low = 0;
1407
- m_successive_high = 0;
1408
- m_max_mcu_x_size = 0;
1409
- m_max_mcu_y_size = 0;
1410
- m_blocks_per_mcu = 0;
1411
- m_max_blocks_per_row = 0;
1412
- m_mcus_per_row = 0;
1413
- m_mcus_per_col = 0;
1414
- m_expanded_blocks_per_component = 0;
1415
- m_expanded_blocks_per_mcu = 0;
1416
- m_expanded_blocks_per_row = 0;
1417
- m_freq_domain_chroma_upsample = false;
1418
-
1419
- memset(m_mcu_org, 0, sizeof(m_mcu_org));
1420
-
1421
- m_total_lines_left = 0;
1422
- m_mcu_lines_left = 0;
1423
- m_real_dest_bytes_per_scan_line = 0;
1424
- m_dest_bytes_per_scan_line = 0;
1425
- m_dest_bytes_per_pixel = 0;
1426
-
1427
- memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
1428
-
1429
- memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
1430
- memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
1431
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1432
-
1433
- m_eob_run = 0;
1434
-
1435
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1436
-
1437
- m_pIn_buf_ofs = m_in_buf;
1438
- m_in_buf_left = 0;
1439
- m_eof_flag = false;
1440
- m_tem_flag = 0;
1441
-
1442
- memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
1443
- memset(m_in_buf, 0, sizeof(m_in_buf));
1444
- memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
1445
-
1446
- m_restart_interval = 0;
1447
- m_restarts_left = 0;
1448
- m_next_restart_num = 0;
1449
-
1450
- m_max_mcus_per_row = 0;
1451
- m_max_blocks_per_mcu = 0;
1452
- m_max_mcus_per_col = 0;
1453
-
1454
- memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
1455
- m_pMCU_coefficients = NULL;
1456
- m_pSample_buf = NULL;
1457
-
1458
- m_total_bytes_read = 0;
1459
-
1460
- m_pScan_line_0 = NULL;
1461
- m_pScan_line_1 = NULL;
1462
-
1463
- // Ready the input buffer.
1464
- prep_in_buffer();
1465
-
1466
- // Prime the bit buffer.
1467
- m_bits_left = 16;
1468
- m_bit_buf = 0;
1469
-
1470
- get_bits(16);
1471
- get_bits(16);
1472
-
1473
- for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1474
- m_mcu_block_max_zag[i] = 64;
1475
- }
1476
-
1477
- #define SCALEBITS 16
1478
- #define ONE_HALF ((int) 1 << (SCALEBITS-1))
1479
- #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
1480
-
1481
- // Create a few tables that allow us to quickly convert YCbCr to RGB.
1482
- void jpeg_decoder::create_look_ups()
1483
- {
1484
- for (int i = 0; i <= 255; i++)
1485
- {
1486
- int k = i - 128;
1487
- m_crr[i] = ( FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS;
1488
- m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
1489
- m_crg[i] = (-FIX(0.71414f)) * k;
1490
- m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
1491
- }
1492
- }
1493
-
1494
- // This method throws back into the stream any bytes that where read
1495
- // into the bit buffer during initial marker scanning.
1496
- void jpeg_decoder::fix_in_buffer()
1497
- {
1498
- // In case any 0xFF's where pulled into the buffer during marker scanning.
1499
- JPGD_ASSERT((m_bits_left & 7) == 0);
1500
-
1501
- if (m_bits_left == 16)
1502
- stuff_char( (uint8)(m_bit_buf & 0xFF));
1503
-
1504
- if (m_bits_left >= 8)
1505
- stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF));
1506
-
1507
- stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
1508
- stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
1509
-
1510
- m_bits_left = 16;
1511
- get_bits_no_markers(16);
1512
- get_bits_no_markers(16);
1513
- }
1514
-
1515
- void jpeg_decoder::transform_mcu(int mcu_row)
1516
- {
1517
- jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1518
- uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1519
-
1520
- for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1521
- {
1522
- idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1523
- pSrc_ptr += 64;
1524
- pDst_ptr += 64;
1525
- }
1526
- }
1527
-
1528
- static const uint8 s_max_rc[64] =
1529
- {
1530
- 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1531
- 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1532
- 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1533
- 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1534
- };
1535
-
1536
- void jpeg_decoder::transform_mcu_expand(int mcu_row)
1537
- {
1538
- jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1539
- uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1540
-
1541
- // Y IDCT
1542
- int mcu_block;
1543
- for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1544
- {
1545
- idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1546
- pSrc_ptr += 64;
1547
- pDst_ptr += 64;
1548
- }
1549
-
1550
- // Chroma IDCT, with upsampling
1551
- jpgd_block_t temp_block[64];
1552
-
1553
- for (int i = 0; i < 2; i++)
1554
- {
1555
- DCT_Upsample::Matrix44 P, Q, R, S;
1556
-
1557
- JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
1558
- JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
1559
-
1560
- switch (s_max_rc[m_mcu_block_max_zag[mcu_block++] - 1])
1561
- {
1562
- case 1*16+1:
1563
- DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
1564
- DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
1565
- break;
1566
- case 1*16+2:
1567
- DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
1568
- DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
1569
- break;
1570
- case 2*16+2:
1571
- DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
1572
- DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
1573
- break;
1574
- case 3*16+2:
1575
- DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
1576
- DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
1577
- break;
1578
- case 3*16+3:
1579
- DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
1580
- DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
1581
- break;
1582
- case 3*16+4:
1583
- DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
1584
- DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
1585
- break;
1586
- case 4*16+4:
1587
- DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
1588
- DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
1589
- break;
1590
- case 5*16+4:
1591
- DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
1592
- DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
1593
- break;
1594
- case 5*16+5:
1595
- DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
1596
- DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
1597
- break;
1598
- case 5*16+6:
1599
- DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
1600
- DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
1601
- break;
1602
- case 6*16+6:
1603
- DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
1604
- DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
1605
- break;
1606
- case 7*16+6:
1607
- DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
1608
- DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
1609
- break;
1610
- case 7*16+7:
1611
- DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
1612
- DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
1613
- break;
1614
- case 7*16+8:
1615
- DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
1616
- DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
1617
- break;
1618
- case 8*16+8:
1619
- DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
1620
- DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
1621
- break;
1622
- default:
1623
- JPGD_ASSERT(false);
1624
- }
1625
-
1626
- DCT_Upsample::Matrix44 a(P + Q); P -= Q;
1627
- DCT_Upsample::Matrix44& b = P;
1628
- DCT_Upsample::Matrix44 c(R + S); R -= S;
1629
- DCT_Upsample::Matrix44& d = R;
1630
-
1631
- DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
1632
- idct_4x4(temp_block, pDst_ptr);
1633
- pDst_ptr += 64;
1634
-
1635
- DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
1636
- idct_4x4(temp_block, pDst_ptr);
1637
- pDst_ptr += 64;
1638
-
1639
- DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
1640
- idct_4x4(temp_block, pDst_ptr);
1641
- pDst_ptr += 64;
1642
-
1643
- DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
1644
- idct_4x4(temp_block, pDst_ptr);
1645
- pDst_ptr += 64;
1646
-
1647
- pSrc_ptr += 64;
1648
- }
1649
- }
1650
-
1651
- // Loads and dequantizes the next row of (already decoded) coefficients.
1652
- // Progressive images only.
1653
- void jpeg_decoder::load_next_row()
1654
- {
1655
- int i;
1656
- jpgd_block_t *p;
1657
- jpgd_quant_t *q;
1658
- int mcu_row, mcu_block, row_block = 0;
1659
- int component_num, component_id;
1660
- int block_x_mcu[JPGD_MAX_COMPONENTS];
1661
-
1662
- memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
1663
-
1664
- for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1665
- {
1666
- int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1667
-
1668
- for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1669
- {
1670
- component_id = m_mcu_org[mcu_block];
1671
- q = m_quant[m_comp_quant[component_id]];
1672
-
1673
- p = m_pMCU_coefficients + 64 * mcu_block;
1674
-
1675
- jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1676
- jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1677
- p[0] = pDC[0];
1678
- memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
1679
-
1680
- for (i = 63; i > 0; i--)
1681
- if (p[g_ZAG[i]])
1682
- break;
1683
-
1684
- m_mcu_block_max_zag[mcu_block] = i + 1;
1685
-
1686
- for ( ; i >= 0; i--)
1687
- if (p[g_ZAG[i]])
1688
- p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
1689
-
1690
- row_block++;
1691
-
1692
- if (m_comps_in_scan == 1)
1693
- block_x_mcu[component_id]++;
1694
- else
1695
- {
1696
- if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
1697
- {
1698
- block_x_mcu_ofs = 0;
1699
-
1700
- if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
1701
- {
1702
- block_y_mcu_ofs = 0;
1703
-
1704
- block_x_mcu[component_id] += m_comp_h_samp[component_id];
1705
- }
1706
- }
1707
- }
1708
- }
1709
-
1710
- if (m_freq_domain_chroma_upsample)
1711
- transform_mcu_expand(mcu_row);
1712
- else
1713
- transform_mcu(mcu_row);
1714
- }
1715
-
1716
- if (m_comps_in_scan == 1)
1717
- m_block_y_mcu[m_comp_list[0]]++;
1718
- else
1719
- {
1720
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1721
- {
1722
- component_id = m_comp_list[component_num];
1723
-
1724
- m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
1725
- }
1726
- }
1727
- }
1728
-
1729
- // Restart interval processing.
1730
- void jpeg_decoder::process_restart()
1731
- {
1732
- int i;
1733
- int c = 0;
1734
-
1735
- // Align to a byte boundry
1736
- // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1737
- //get_bits_no_markers(m_bits_left & 7);
1738
-
1739
- // Let's scan a little bit to find the marker, but not _too_ far.
1740
- // 1536 is a "fudge factor" that determines how much to scan.
1741
- for (i = 1536; i > 0; i--)
1742
- if (get_char() == 0xFF)
1743
- break;
1744
-
1745
- if (i == 0)
1746
- stop_decoding(JPGD_BAD_RESTART_MARKER);
1747
-
1748
- for ( ; i > 0; i--)
1749
- if ((c = get_char()) != 0xFF)
1750
- break;
1751
-
1752
- if (i == 0)
1753
- stop_decoding(JPGD_BAD_RESTART_MARKER);
1754
-
1755
- // Is it the expected marker? If not, something bad happened.
1756
- if (c != (m_next_restart_num + M_RST0))
1757
- stop_decoding(JPGD_BAD_RESTART_MARKER);
1758
-
1759
- // Reset each component's DC prediction values.
1760
- memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
1761
-
1762
- m_eob_run = 0;
1763
-
1764
- m_restarts_left = m_restart_interval;
1765
-
1766
- m_next_restart_num = (m_next_restart_num + 1) & 7;
1767
-
1768
- // Get the bit buffer going again...
1769
-
1770
- m_bits_left = 16;
1771
- get_bits_no_markers(16);
1772
- get_bits_no_markers(16);
1773
- }
1774
-
1775
- static inline int dequantize_ac(int c, int q) { c *= q; return c; }
1776
-
1777
- // Decodes and dequantizes the next row of coefficients.
1778
- void jpeg_decoder::decode_next_row()
1779
- {
1780
- int row_block = 0;
1781
-
1782
- for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1783
- {
1784
- if ((m_restart_interval) && (m_restarts_left == 0))
1785
- process_restart();
1786
-
1787
- jpgd_block_t* p = m_pMCU_coefficients;
1788
- for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1789
- {
1790
- int component_id = m_mcu_org[mcu_block];
1791
- jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
1792
-
1793
- int r, s;
1794
- s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
1795
- s = HUFF_EXTEND(r, s);
1796
-
1797
- m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
1798
-
1799
- p[0] = static_cast<jpgd_block_t>(s * q[0]);
1800
-
1801
- int prev_num_set = m_mcu_block_max_zag[mcu_block];
1802
-
1803
- huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
1804
-
1805
- int k;
1806
- for (k = 1; k < 64; k++)
1807
- {
1808
- int extra_bits;
1809
- s = huff_decode(pH, extra_bits);
1810
-
1811
- r = s >> 4;
1812
- s &= 15;
1813
-
1814
- if (s)
1815
- {
1816
- if (r)
1817
- {
1818
- if ((k + r) > 63)
1819
- stop_decoding(JPGD_DECODE_ERROR);
1820
-
1821
- if (k < prev_num_set)
1822
- {
1823
- int n = JPGD_MIN(r, prev_num_set - k);
1824
- int kt = k;
1825
- while (n--)
1826
- p[g_ZAG[kt++]] = 0;
1827
- }
1828
-
1829
- k += r;
1830
- }
1831
-
1832
- s = HUFF_EXTEND(extra_bits, s);
1833
-
1834
- JPGD_ASSERT(k < 64);
1835
-
1836
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
1837
- }
1838
- else
1839
- {
1840
- if (r == 15)
1841
- {
1842
- if ((k + 16) > 64)
1843
- stop_decoding(JPGD_DECODE_ERROR);
1844
-
1845
- if (k < prev_num_set)
1846
- {
1847
- int n = JPGD_MIN(16, prev_num_set - k);
1848
- int kt = k;
1849
- while (n--)
1850
- {
1851
- JPGD_ASSERT(kt <= 63);
1852
- p[g_ZAG[kt++]] = 0;
1853
- }
1854
- }
1855
-
1856
- k += 16 - 1; // - 1 because the loop counter is k
1857
- // BEGIN EPIC MOD
1858
- JPGD_ASSERT(k < 64 && p[g_ZAG[k]] == 0);
1859
- // END EPIC MOD
1860
- }
1861
- else
1862
- break;
1863
- }
1864
- }
1865
-
1866
- if (k < prev_num_set)
1867
- {
1868
- int kt = k;
1869
- while (kt < prev_num_set)
1870
- p[g_ZAG[kt++]] = 0;
1871
- }
1872
-
1873
- m_mcu_block_max_zag[mcu_block] = k;
1874
-
1875
- row_block++;
1876
- }
1877
-
1878
- if (m_freq_domain_chroma_upsample)
1879
- transform_mcu_expand(mcu_row);
1880
- else
1881
- transform_mcu(mcu_row);
1882
-
1883
- m_restarts_left--;
1884
- }
1885
- }
1886
-
1887
- // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1888
- void jpeg_decoder::H1V1Convert()
1889
- {
1890
- int row = m_max_mcu_y_size - m_mcu_lines_left;
1891
- uint8 *d = m_pScan_line_0;
1892
- uint8 *s = m_pSample_buf + row * 8;
1893
-
1894
- for (int i = m_max_mcus_per_row; i > 0; i--)
1895
- {
1896
- for (int j = 0; j < 8; j++)
1897
- {
1898
- int y = s[j];
1899
- int cb = s[64+j];
1900
- int cr = s[128+j];
1901
-
1902
- if (jpg_format == ERGBFormatJPG::BGRA)
1903
- {
1904
- d[0] = clamp(y + m_cbb[cb]);
1905
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1906
- d[2] = clamp(y + m_crr[cr]);
1907
- d[3] = 255;
1908
- }
1909
- else
1910
- {
1911
- d[0] = clamp(y + m_crr[cr]);
1912
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1913
- d[2] = clamp(y + m_cbb[cb]);
1914
- d[3] = 255;
1915
- }
1916
- d += 4;
1917
- }
1918
-
1919
- s += 64*3;
1920
- }
1921
- }
1922
-
1923
- // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
1924
- void jpeg_decoder::H2V1Convert()
1925
- {
1926
- int row = m_max_mcu_y_size - m_mcu_lines_left;
1927
- uint8 *d0 = m_pScan_line_0;
1928
- uint8 *y = m_pSample_buf + row * 8;
1929
- uint8 *c = m_pSample_buf + 2*64 + row * 8;
1930
-
1931
- for (int i = m_max_mcus_per_row; i > 0; i--)
1932
- {
1933
- for (int l = 0; l < 2; l++)
1934
- {
1935
- for (int j = 0; j < 4; j++)
1936
- {
1937
- int cb = c[0];
1938
- int cr = c[64];
1939
-
1940
- int rc = m_crr[cr];
1941
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1942
- int bc = m_cbb[cb];
1943
-
1944
- int yy = y[j<<1];
1945
- if (jpg_format == ERGBFormatJPG::BGRA)
1946
- {
1947
- d0[0] = clamp(yy+bc);
1948
- d0[1] = clamp(yy+gc);
1949
- d0[2] = clamp(yy+rc);
1950
- d0[3] = 255;
1951
- yy = y[(j<<1)+1];
1952
- d0[4] = clamp(yy+bc);
1953
- d0[5] = clamp(yy+gc);
1954
- d0[6] = clamp(yy+rc);
1955
- d0[7] = 255;
1956
- }
1957
- else
1958
- {
1959
- d0[0] = clamp(yy+rc);
1960
- d0[1] = clamp(yy+gc);
1961
- d0[2] = clamp(yy+bc);
1962
- d0[3] = 255;
1963
- yy = y[(j<<1)+1];
1964
- d0[4] = clamp(yy+rc);
1965
- d0[5] = clamp(yy+gc);
1966
- d0[6] = clamp(yy+bc);
1967
- d0[7] = 255;
1968
- }
1969
-
1970
- d0 += 8;
1971
-
1972
- c++;
1973
- }
1974
- y += 64;
1975
- }
1976
-
1977
- y += 64*4 - 64*2;
1978
- c += 64*4 - 8;
1979
- }
1980
- }
1981
-
1982
- // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
1983
- void jpeg_decoder::H1V2Convert()
1984
- {
1985
- int row = m_max_mcu_y_size - m_mcu_lines_left;
1986
- uint8 *d0 = m_pScan_line_0;
1987
- uint8 *d1 = m_pScan_line_1;
1988
- uint8 *y;
1989
- uint8 *c;
1990
-
1991
- if (row < 8)
1992
- y = m_pSample_buf + row * 8;
1993
- else
1994
- y = m_pSample_buf + 64*1 + (row & 7) * 8;
1995
-
1996
- c = m_pSample_buf + 64*2 + (row >> 1) * 8;
1997
-
1998
- for (int i = m_max_mcus_per_row; i > 0; i--)
1999
- {
2000
- for (int j = 0; j < 8; j++)
2001
- {
2002
- int cb = c[0+j];
2003
- int cr = c[64+j];
2004
-
2005
- int rc = m_crr[cr];
2006
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2007
- int bc = m_cbb[cb];
2008
-
2009
- int yy = y[j];
2010
- if (jpg_format == ERGBFormatJPG::BGRA)
2011
- {
2012
- d0[0] = clamp(yy+bc);
2013
- d0[1] = clamp(yy+gc);
2014
- d0[2] = clamp(yy+rc);
2015
- d0[3] = 255;
2016
- yy = y[8+j];
2017
- d1[0] = clamp(yy+bc);
2018
- d1[1] = clamp(yy+gc);
2019
- d1[2] = clamp(yy+rc);
2020
- d1[3] = 255;
2021
- }
2022
- else
2023
- {
2024
- d0[0] = clamp(yy+rc);
2025
- d0[1] = clamp(yy+gc);
2026
- d0[2] = clamp(yy+bc);
2027
- d0[3] = 255;
2028
- yy = y[8+j];
2029
- d1[0] = clamp(yy+rc);
2030
- d1[1] = clamp(yy+gc);
2031
- d1[2] = clamp(yy+bc);
2032
- d1[3] = 255;
2033
- }
2034
-
2035
- d0 += 4;
2036
- d1 += 4;
2037
- }
2038
-
2039
- y += 64*4;
2040
- c += 64*4;
2041
- }
2042
- }
2043
-
2044
- // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2045
- void jpeg_decoder::H2V2Convert()
2046
- {
2047
- int row = m_max_mcu_y_size - m_mcu_lines_left;
2048
- uint8 *d0 = m_pScan_line_0;
2049
- uint8 *d1 = m_pScan_line_1;
2050
- uint8 *y;
2051
- uint8 *c;
2052
-
2053
- if (row < 8)
2054
- y = m_pSample_buf + row * 8;
2055
- else
2056
- y = m_pSample_buf + 64*2 + (row & 7) * 8;
2057
-
2058
- c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2059
-
2060
- for (int i = m_max_mcus_per_row; i > 0; i--)
2061
- {
2062
- for (int l = 0; l < 2; l++)
2063
- {
2064
- for (int j = 0; j < 8; j += 2)
2065
- {
2066
- int cb = c[0];
2067
- int cr = c[64];
2068
-
2069
- int rc = m_crr[cr];
2070
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2071
- int bc = m_cbb[cb];
2072
-
2073
- int yy = y[j];
2074
- if (jpg_format == ERGBFormatJPG::BGRA)
2075
- {
2076
- d0[0] = clamp(yy+bc);
2077
- d0[1] = clamp(yy+gc);
2078
- d0[2] = clamp(yy+rc);
2079
- d0[3] = 255;
2080
- yy = y[j+1];
2081
- d0[4] = clamp(yy+bc);
2082
- d0[5] = clamp(yy+gc);
2083
- d0[6] = clamp(yy+rc);
2084
- d0[7] = 255;
2085
- yy = y[j+8];
2086
- d1[0] = clamp(yy+bc);
2087
- d1[1] = clamp(yy+gc);
2088
- d1[2] = clamp(yy+rc);
2089
- d1[3] = 255;
2090
- yy = y[j+8+1];
2091
- d1[4] = clamp(yy+bc);
2092
- d1[5] = clamp(yy+gc);
2093
- d1[6] = clamp(yy+rc);
2094
- d1[7] = 255;
2095
- }
2096
- else
2097
- {
2098
- d0[0] = clamp(yy+rc);
2099
- d0[1] = clamp(yy+gc);
2100
- d0[2] = clamp(yy+bc);
2101
- d0[3] = 255;
2102
- yy = y[j+1];
2103
- d0[4] = clamp(yy+rc);
2104
- d0[5] = clamp(yy+gc);
2105
- d0[6] = clamp(yy+bc);
2106
- d0[7] = 255;
2107
- yy = y[j+8];
2108
- d1[0] = clamp(yy+rc);
2109
- d1[1] = clamp(yy+gc);
2110
- d1[2] = clamp(yy+bc);
2111
- d1[3] = 255;
2112
- yy = y[j+8+1];
2113
- d1[4] = clamp(yy+rc);
2114
- d1[5] = clamp(yy+gc);
2115
- d1[6] = clamp(yy+bc);
2116
- d1[7] = 255;
2117
- }
2118
-
2119
- d0 += 8;
2120
- d1 += 8;
2121
-
2122
- c++;
2123
- }
2124
- y += 64;
2125
- }
2126
-
2127
- y += 64*6 - 64*2;
2128
- c += 64*6 - 8;
2129
- }
2130
- }
2131
-
2132
- // Y (1 block per MCU) to 8-bit grayscale
2133
- void jpeg_decoder::gray_convert()
2134
- {
2135
- int row = m_max_mcu_y_size - m_mcu_lines_left;
2136
- uint8 *d = m_pScan_line_0;
2137
- uint8 *s = m_pSample_buf + row * 8;
2138
-
2139
- for (int i = m_max_mcus_per_row; i > 0; i--)
2140
- {
2141
- *(uint *)d = *(uint *)s;
2142
- *(uint *)(&d[4]) = *(uint *)(&s[4]);
2143
-
2144
- s += 64;
2145
- d += 8;
2146
- }
2147
- }
2148
-
2149
- void jpeg_decoder::expanded_convert()
2150
- {
2151
- int row = m_max_mcu_y_size - m_mcu_lines_left;
2152
-
2153
- uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
2154
-
2155
- uint8* d = m_pScan_line_0;
2156
-
2157
- for (int i = m_max_mcus_per_row; i > 0; i--)
2158
- {
2159
- for (int k = 0; k < m_max_mcu_x_size; k += 8)
2160
- {
2161
- const int Y_ofs = k * 8;
2162
- const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2163
- const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2164
- for (int j = 0; j < 8; j++)
2165
- {
2166
- int y = Py[Y_ofs + j];
2167
- int cb = Py[Cb_ofs + j];
2168
- int cr = Py[Cr_ofs + j];
2169
-
2170
- if (jpg_format == ERGBFormatJPG::BGRA)
2171
- {
2172
- d[0] = clamp(y + m_cbb[cb]);
2173
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
2174
- d[2] = clamp(y + m_crr[cr]);
2175
- d[3] = 255;
2176
- }
2177
- else
2178
- {
2179
- d[0] = clamp(y + m_crr[cr]);
2180
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
2181
- d[2] = clamp(y + m_cbb[cb]);
2182
- d[3] = 255;
2183
- }
2184
-
2185
- d += 4;
2186
- }
2187
- }
2188
-
2189
- Py += 64 * m_expanded_blocks_per_mcu;
2190
- }
2191
- }
2192
-
2193
- // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2194
- void jpeg_decoder::find_eoi()
2195
- {
2196
- if (!m_progressive_flag)
2197
- {
2198
- // Attempt to read the EOI marker.
2199
- //get_bits_no_markers(m_bits_left & 7);
2200
-
2201
- // Prime the bit buffer
2202
- m_bits_left = 16;
2203
- get_bits(16);
2204
- get_bits(16);
2205
-
2206
- // The next marker _should_ be EOI
2207
- process_markers();
2208
- }
2209
-
2210
- m_total_bytes_read -= m_in_buf_left;
2211
- }
2212
-
2213
- int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
2214
- {
2215
- if ((m_error_code) || (!m_ready_flag))
2216
- return JPGD_FAILED;
2217
-
2218
- if (m_total_lines_left == 0)
2219
- return JPGD_DONE;
2220
-
2221
- if (m_mcu_lines_left == 0)
2222
- {
2223
- if (setjmp(m_jmp_state))
2224
- return JPGD_FAILED;
2225
-
2226
- if (m_progressive_flag)
2227
- load_next_row();
2228
- else
2229
- decode_next_row();
2230
-
2231
- // Find the EOI marker if that was the last row.
2232
- if (m_total_lines_left <= m_max_mcu_y_size)
2233
- find_eoi();
2234
-
2235
- m_mcu_lines_left = m_max_mcu_y_size;
2236
- }
2237
-
2238
- if (m_freq_domain_chroma_upsample)
2239
- {
2240
- expanded_convert();
2241
- *pScan_line = m_pScan_line_0;
2242
- }
2243
- else
2244
- {
2245
- switch (m_scan_type)
2246
- {
2247
- case JPGD_YH2V2:
2248
- {
2249
- if ((m_mcu_lines_left & 1) == 0)
2250
- {
2251
- H2V2Convert();
2252
- *pScan_line = m_pScan_line_0;
2253
- }
2254
- else
2255
- *pScan_line = m_pScan_line_1;
2256
-
2257
- break;
2258
- }
2259
- case JPGD_YH2V1:
2260
- {
2261
- H2V1Convert();
2262
- *pScan_line = m_pScan_line_0;
2263
- break;
2264
- }
2265
- case JPGD_YH1V2:
2266
- {
2267
- if ((m_mcu_lines_left & 1) == 0)
2268
- {
2269
- H1V2Convert();
2270
- *pScan_line = m_pScan_line_0;
2271
- }
2272
- else
2273
- *pScan_line = m_pScan_line_1;
2274
-
2275
- break;
2276
- }
2277
- case JPGD_YH1V1:
2278
- {
2279
- H1V1Convert();
2280
- *pScan_line = m_pScan_line_0;
2281
- break;
2282
- }
2283
- case JPGD_GRAYSCALE:
2284
- {
2285
- gray_convert();
2286
- *pScan_line = m_pScan_line_0;
2287
-
2288
- break;
2289
- }
2290
- }
2291
- }
2292
-
2293
- *pScan_line_len = m_real_dest_bytes_per_scan_line;
2294
-
2295
- m_mcu_lines_left--;
2296
- m_total_lines_left--;
2297
-
2298
- return JPGD_SUCCESS;
2299
- }
2300
-
2301
- // Creates the tables needed for efficient Huffman decoding.
2302
- void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
2303
- {
2304
- int p, i, l, si;
2305
- uint8 huffsize[257];
2306
- uint huffcode[257];
2307
- uint code;
2308
- uint subtree;
2309
- int code_size;
2310
- int lastp;
2311
- int nextfreeentry;
2312
- int currententry;
2313
-
2314
- pH->ac_table = m_huff_ac[index] != 0;
2315
-
2316
- p = 0;
2317
-
2318
- for (l = 1; l <= 16; l++)
2319
- {
2320
- for (i = 1; i <= m_huff_num[index][l]; i++)
2321
- huffsize[p++] = static_cast<uint8>(l);
2322
- }
2323
-
2324
- huffsize[p] = 0;
2325
-
2326
- lastp = p;
2327
-
2328
- code = 0;
2329
- si = huffsize[0];
2330
- p = 0;
2331
-
2332
- while (huffsize[p])
2333
- {
2334
- while (huffsize[p] == si)
2335
- {
2336
- huffcode[p++] = code;
2337
- code++;
2338
- }
2339
-
2340
- code <<= 1;
2341
- si++;
2342
- }
2343
-
2344
- memset(pH->look_up, 0, sizeof(pH->look_up));
2345
- memset(pH->look_up2, 0, sizeof(pH->look_up2));
2346
- memset(pH->tree, 0, sizeof(pH->tree));
2347
- memset(pH->code_size, 0, sizeof(pH->code_size));
2348
-
2349
- nextfreeentry = -1;
2350
-
2351
- p = 0;
2352
-
2353
- while (p < lastp)
2354
- {
2355
- i = m_huff_val[index][p];
2356
- code = huffcode[p];
2357
- code_size = huffsize[p];
2358
-
2359
- pH->code_size[i] = static_cast<uint8>(code_size);
2360
-
2361
- if (code_size <= 8)
2362
- {
2363
- code <<= (8 - code_size);
2364
-
2365
- for (l = 1 << (8 - code_size); l > 0; l--)
2366
- {
2367
- JPGD_ASSERT(i < 256);
2368
-
2369
- pH->look_up[code] = i;
2370
-
2371
- bool has_extrabits = false;
2372
- int extra_bits = 0;
2373
- int num_extra_bits = i & 15;
2374
-
2375
- int bits_to_fetch = code_size;
2376
- if (num_extra_bits)
2377
- {
2378
- int total_codesize = code_size + num_extra_bits;
2379
- if (total_codesize <= 8)
2380
- {
2381
- has_extrabits = true;
2382
- extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2383
- JPGD_ASSERT(extra_bits <= 0x7FFF);
2384
- bits_to_fetch += num_extra_bits;
2385
- }
2386
- }
2387
-
2388
- if (!has_extrabits)
2389
- pH->look_up2[code] = i | (bits_to_fetch << 8);
2390
- else
2391
- pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2392
-
2393
- code++;
2394
- }
2395
- }
2396
- else
2397
- {
2398
- subtree = (code >> (code_size - 8)) & 0xFF;
2399
-
2400
- currententry = pH->look_up[subtree];
2401
-
2402
- if (currententry == 0)
2403
- {
2404
- pH->look_up[subtree] = currententry = nextfreeentry;
2405
- pH->look_up2[subtree] = currententry = nextfreeentry;
2406
-
2407
- nextfreeentry -= 2;
2408
- }
2409
-
2410
- code <<= (16 - (code_size - 8));
2411
-
2412
- for (l = code_size; l > 9; l--)
2413
- {
2414
- if ((code & 0x8000) == 0)
2415
- currententry--;
2416
-
2417
- if (pH->tree[-currententry - 1] == 0)
2418
- {
2419
- pH->tree[-currententry - 1] = nextfreeentry;
2420
-
2421
- currententry = nextfreeentry;
2422
-
2423
- nextfreeentry -= 2;
2424
- }
2425
- else
2426
- currententry = pH->tree[-currententry - 1];
2427
-
2428
- code <<= 1;
2429
- }
2430
-
2431
- if ((code & 0x8000) == 0)
2432
- currententry--;
2433
-
2434
- pH->tree[-currententry - 1] = i;
2435
- }
2436
-
2437
- p++;
2438
- }
2439
- }
2440
-
2441
- // Verifies the quantization tables needed for this scan are available.
2442
- void jpeg_decoder::check_quant_tables()
2443
- {
2444
- for (int i = 0; i < m_comps_in_scan; i++)
2445
- if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL)
2446
- stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2447
- }
2448
-
2449
- // Verifies that all the Huffman tables needed for this scan are available.
2450
- void jpeg_decoder::check_huff_tables()
2451
- {
2452
- for (int i = 0; i < m_comps_in_scan; i++)
2453
- {
2454
- if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL))
2455
- stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2456
-
2457
- if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL))
2458
- stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2459
- }
2460
-
2461
- for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2462
- if (m_huff_num[i])
2463
- {
2464
- if (!m_pHuff_tabs[i])
2465
- m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
2466
-
2467
- make_huff_table(i, m_pHuff_tabs[i]);
2468
- }
2469
- }
2470
-
2471
- // Determines the component order inside each MCU.
2472
- // Also calcs how many MCU's are on each row, etc.
2473
- void jpeg_decoder::calc_mcu_block_order()
2474
- {
2475
- int component_num, component_id;
2476
- int max_h_samp = 0, max_v_samp = 0;
2477
-
2478
- for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2479
- {
2480
- if (m_comp_h_samp[component_id] > max_h_samp)
2481
- max_h_samp = m_comp_h_samp[component_id];
2482
-
2483
- if (m_comp_v_samp[component_id] > max_v_samp)
2484
- max_v_samp = m_comp_v_samp[component_id];
2485
- }
2486
-
2487
- for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2488
- {
2489
- m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2490
- m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2491
- }
2492
-
2493
- if (m_comps_in_scan == 1)
2494
- {
2495
- m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
2496
- m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
2497
- }
2498
- else
2499
- {
2500
- m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2501
- m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2502
- }
2503
-
2504
- if (m_comps_in_scan == 1)
2505
- {
2506
- m_mcu_org[0] = m_comp_list[0];
2507
-
2508
- m_blocks_per_mcu = 1;
2509
- }
2510
- else
2511
- {
2512
- m_blocks_per_mcu = 0;
2513
-
2514
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2515
- {
2516
- int num_blocks;
2517
-
2518
- component_id = m_comp_list[component_num];
2519
-
2520
- num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
2521
-
2522
- while (num_blocks--)
2523
- m_mcu_org[m_blocks_per_mcu++] = component_id;
2524
- }
2525
- }
2526
- }
2527
-
2528
- // Starts a new scan.
2529
- int jpeg_decoder::init_scan()
2530
- {
2531
- if (!locate_sos_marker())
2532
- return JPGD_FALSE;
2533
-
2534
- calc_mcu_block_order();
2535
-
2536
- check_huff_tables();
2537
-
2538
- check_quant_tables();
2539
-
2540
- memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
2541
-
2542
- m_eob_run = 0;
2543
-
2544
- if (m_restart_interval)
2545
- {
2546
- m_restarts_left = m_restart_interval;
2547
- m_next_restart_num = 0;
2548
- }
2549
-
2550
- fix_in_buffer();
2551
-
2552
- return JPGD_TRUE;
2553
- }
2554
-
2555
- // Starts a frame. Determines if the number of components or sampling factors
2556
- // are supported.
2557
- void jpeg_decoder::init_frame()
2558
- {
2559
- int i;
2560
-
2561
- if (m_comps_in_frame == 1)
2562
- {
2563
- if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
2564
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2565
-
2566
- m_scan_type = JPGD_GRAYSCALE;
2567
- m_max_blocks_per_mcu = 1;
2568
- m_max_mcu_x_size = 8;
2569
- m_max_mcu_y_size = 8;
2570
- }
2571
- else if (m_comps_in_frame == 3)
2572
- {
2573
- if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
2574
- ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) )
2575
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2576
-
2577
- if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
2578
- {
2579
- m_scan_type = JPGD_YH1V1;
2580
-
2581
- m_max_blocks_per_mcu = 3;
2582
- m_max_mcu_x_size = 8;
2583
- m_max_mcu_y_size = 8;
2584
- }
2585
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
2586
- {
2587
- m_scan_type = JPGD_YH2V1;
2588
- m_max_blocks_per_mcu = 4;
2589
- m_max_mcu_x_size = 16;
2590
- m_max_mcu_y_size = 8;
2591
- }
2592
- else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
2593
- {
2594
- m_scan_type = JPGD_YH1V2;
2595
- m_max_blocks_per_mcu = 4;
2596
- m_max_mcu_x_size = 8;
2597
- m_max_mcu_y_size = 16;
2598
- }
2599
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
2600
- {
2601
- m_scan_type = JPGD_YH2V2;
2602
- m_max_blocks_per_mcu = 6;
2603
- m_max_mcu_x_size = 16;
2604
- m_max_mcu_y_size = 16;
2605
- }
2606
- else
2607
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2608
- }
2609
- else
2610
- stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2611
-
2612
- m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2613
- m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2614
-
2615
- // These values are for the *destination* pixels: after conversion.
2616
- if (m_scan_type == JPGD_GRAYSCALE)
2617
- m_dest_bytes_per_pixel = 1;
2618
- else
2619
- m_dest_bytes_per_pixel = 4;
2620
-
2621
- m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2622
-
2623
- m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2624
-
2625
- // Initialize two scan line buffers.
2626
- m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
2627
- if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2628
- m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
2629
-
2630
- m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2631
-
2632
- // Should never happen
2633
- if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2634
- stop_decoding(JPGD_ASSERTION_ERROR);
2635
-
2636
- // Allocate the coefficient buffer, enough for one MCU
2637
- m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
2638
-
2639
- for (i = 0; i < m_max_blocks_per_mcu; i++)
2640
- m_mcu_block_max_zag[i] = 64;
2641
-
2642
- m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
2643
- m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2644
- m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2645
- // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor.
2646
- // BEGIN EPIC MOD
2647
- #if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
2648
- m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2649
- #else
2650
- m_freq_domain_chroma_upsample = 0;
2651
- #endif
2652
- // END EPIC MOD
2653
-
2654
- if (m_freq_domain_chroma_upsample)
2655
- m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64);
2656
- else
2657
- m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64);
2658
-
2659
- m_total_lines_left = m_image_y_size;
2660
-
2661
- m_mcu_lines_left = 0;
2662
-
2663
- create_look_ups();
2664
- }
2665
-
2666
- // The coeff_buf series of methods originally stored the coefficients
2667
- // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2668
- // was used to make this process more efficient. Now, we can store the entire
2669
- // thing in RAM.
2670
- jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
2671
- {
2672
- coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
2673
-
2674
- cb->block_num_x = block_num_x;
2675
- cb->block_num_y = block_num_y;
2676
- cb->block_len_x = block_len_x;
2677
- cb->block_len_y = block_len_y;
2678
- cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
2679
- cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true);
2680
- return cb;
2681
- }
2682
-
2683
- inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
2684
- {
2685
- JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
2686
- return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
2687
- }
2688
-
2689
- // The following methods decode the various types of m_blocks encountered
2690
- // in progressively encoded images.
2691
- void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2692
- {
2693
- int s, r;
2694
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2695
-
2696
- if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
2697
- {
2698
- r = pD->get_bits_no_markers(s);
2699
- s = HUFF_EXTEND(r, s);
2700
- }
2701
-
2702
- pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
2703
-
2704
- p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
2705
- }
2706
-
2707
- void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2708
- {
2709
- if (pD->get_bits_no_markers(1))
2710
- {
2711
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2712
-
2713
- p[0] |= (1 << pD->m_successive_low);
2714
- }
2715
- }
2716
-
2717
- void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2718
- {
2719
- int k, s, r;
2720
-
2721
- if (pD->m_eob_run)
2722
- {
2723
- pD->m_eob_run--;
2724
- return;
2725
- }
2726
-
2727
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2728
-
2729
- for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
2730
- {
2731
- s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2732
-
2733
- r = s >> 4;
2734
- s &= 15;
2735
-
2736
- if (s)
2737
- {
2738
- if ((k += r) > 63)
2739
- pD->stop_decoding(JPGD_DECODE_ERROR);
2740
-
2741
- r = pD->get_bits_no_markers(s);
2742
- s = HUFF_EXTEND(r, s);
2743
-
2744
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
2745
- }
2746
- else
2747
- {
2748
- if (r == 15)
2749
- {
2750
- if ((k += 15) > 63)
2751
- pD->stop_decoding(JPGD_DECODE_ERROR);
2752
- }
2753
- else
2754
- {
2755
- pD->m_eob_run = 1 << r;
2756
-
2757
- if (r)
2758
- pD->m_eob_run += pD->get_bits_no_markers(r);
2759
-
2760
- pD->m_eob_run--;
2761
-
2762
- break;
2763
- }
2764
- }
2765
- }
2766
- }
2767
-
2768
- void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2769
- {
2770
- int s, k, r;
2771
- int p1 = 1 << pD->m_successive_low;
2772
- int m1 = (-1) << pD->m_successive_low;
2773
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2774
-
2775
- k = pD->m_spectral_start;
2776
-
2777
- if (pD->m_eob_run == 0)
2778
- {
2779
- for ( ; k <= pD->m_spectral_end; k++)
2780
- {
2781
- s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2782
-
2783
- r = s >> 4;
2784
- s &= 15;
2785
-
2786
- if (s)
2787
- {
2788
- if (s != 1)
2789
- pD->stop_decoding(JPGD_DECODE_ERROR);
2790
-
2791
- if (pD->get_bits_no_markers(1))
2792
- s = p1;
2793
- else
2794
- s = m1;
2795
- }
2796
- else
2797
- {
2798
- if (r != 15)
2799
- {
2800
- pD->m_eob_run = 1 << r;
2801
-
2802
- if (r)
2803
- pD->m_eob_run += pD->get_bits_no_markers(r);
2804
-
2805
- break;
2806
- }
2807
- }
2808
-
2809
- do
2810
- {
2811
- // BEGIN EPIC MOD
2812
- JPGD_ASSERT(k < 64);
2813
- // END EPIC MOD
2814
-
2815
- jpgd_block_t *this_coef = p + g_ZAG[k];
2816
-
2817
- if (*this_coef != 0)
2818
- {
2819
- if (pD->get_bits_no_markers(1))
2820
- {
2821
- if ((*this_coef & p1) == 0)
2822
- {
2823
- if (*this_coef >= 0)
2824
- *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2825
- else
2826
- *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2827
- }
2828
- }
2829
- }
2830
- else
2831
- {
2832
- if (--r < 0)
2833
- break;
2834
- }
2835
-
2836
- k++;
2837
-
2838
- } while (k <= pD->m_spectral_end);
2839
-
2840
- if ((s) && (k < 64))
2841
- {
2842
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
2843
- }
2844
- }
2845
- }
2846
-
2847
- if (pD->m_eob_run > 0)
2848
- {
2849
- for ( ; k <= pD->m_spectral_end; k++)
2850
- {
2851
- // BEGIN EPIC MOD
2852
- JPGD_ASSERT(k < 64);
2853
- // END EPIC MOD
2854
-
2855
- jpgd_block_t *this_coef = p + g_ZAG[k];
2856
-
2857
- if (*this_coef != 0)
2858
- {
2859
- if (pD->get_bits_no_markers(1))
2860
- {
2861
- if ((*this_coef & p1) == 0)
2862
- {
2863
- if (*this_coef >= 0)
2864
- *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2865
- else
2866
- *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2867
- }
2868
- }
2869
- }
2870
- }
2871
-
2872
- pD->m_eob_run--;
2873
- }
2874
- }
2875
-
2876
- // Decode a scan in a progressively encoded image.
2877
- void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
2878
- {
2879
- int mcu_row, mcu_col, mcu_block;
2880
- int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
2881
-
2882
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
2883
-
2884
- for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2885
- {
2886
- int component_num, component_id;
2887
-
2888
- memset(block_x_mcu, 0, sizeof(block_x_mcu));
2889
-
2890
- for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2891
- {
2892
- int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2893
-
2894
- if ((m_restart_interval) && (m_restarts_left == 0))
2895
- process_restart();
2896
-
2897
- for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2898
- {
2899
- component_id = m_mcu_org[mcu_block];
2900
-
2901
- decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
2902
-
2903
- if (m_comps_in_scan == 1)
2904
- block_x_mcu[component_id]++;
2905
- else
2906
- {
2907
- if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
2908
- {
2909
- block_x_mcu_ofs = 0;
2910
-
2911
- if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
2912
- {
2913
- block_y_mcu_ofs = 0;
2914
- block_x_mcu[component_id] += m_comp_h_samp[component_id];
2915
- }
2916
- }
2917
- }
2918
- }
2919
-
2920
- m_restarts_left--;
2921
- }
2922
-
2923
- if (m_comps_in_scan == 1)
2924
- m_block_y_mcu[m_comp_list[0]]++;
2925
- else
2926
- {
2927
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2928
- {
2929
- component_id = m_comp_list[component_num];
2930
- m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
2931
- }
2932
- }
2933
- }
2934
- }
2935
-
2936
- // Decode a progressively encoded image.
2937
- void jpeg_decoder::init_progressive()
2938
- {
2939
- int i;
2940
-
2941
- if (m_comps_in_frame == 4)
2942
- stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2943
-
2944
- // Allocate the coefficient buffers.
2945
- for (i = 0; i < m_comps_in_frame; i++)
2946
- {
2947
- m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
2948
- m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
2949
- }
2950
-
2951
- for ( ; ; )
2952
- {
2953
- int dc_only_scan, refinement_scan;
2954
- pDecode_block_func decode_block_func;
2955
-
2956
- if (!init_scan())
2957
- break;
2958
-
2959
- dc_only_scan = (m_spectral_start == 0);
2960
- refinement_scan = (m_successive_high != 0);
2961
-
2962
- if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2963
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2964
-
2965
- if (dc_only_scan)
2966
- {
2967
- if (m_spectral_end)
2968
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2969
- }
2970
- else if (m_comps_in_scan != 1) /* AC scans can only contain one component */
2971
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2972
-
2973
- if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2974
- stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2975
-
2976
- if (dc_only_scan)
2977
- {
2978
- if (refinement_scan)
2979
- decode_block_func = decode_block_dc_refine;
2980
- else
2981
- decode_block_func = decode_block_dc_first;
2982
- }
2983
- else
2984
- {
2985
- if (refinement_scan)
2986
- decode_block_func = decode_block_ac_refine;
2987
- else
2988
- decode_block_func = decode_block_ac_first;
2989
- }
2990
-
2991
- decode_scan(decode_block_func);
2992
-
2993
- m_bits_left = 16;
2994
- get_bits(16);
2995
- get_bits(16);
2996
- }
2997
-
2998
- m_comps_in_scan = m_comps_in_frame;
2999
-
3000
- for (i = 0; i < m_comps_in_frame; i++)
3001
- m_comp_list[i] = i;
3002
-
3003
- calc_mcu_block_order();
3004
- }
3005
-
3006
- void jpeg_decoder::init_sequential()
3007
- {
3008
- if (!init_scan())
3009
- stop_decoding(JPGD_UNEXPECTED_MARKER);
3010
- }
3011
-
3012
- void jpeg_decoder::decode_start()
3013
- {
3014
- init_frame();
3015
-
3016
- if (m_progressive_flag)
3017
- init_progressive();
3018
- else
3019
- init_sequential();
3020
- }
3021
-
3022
- void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
3023
- {
3024
- init(pStream);
3025
- locate_sof_marker();
3026
- }
3027
-
3028
- jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
3029
- {
3030
- if (setjmp(m_jmp_state))
3031
- return;
3032
- decode_init(pStream);
3033
- }
3034
-
3035
- int jpeg_decoder::begin_decoding()
3036
- {
3037
- if (m_ready_flag)
3038
- return JPGD_SUCCESS;
3039
-
3040
- if (m_error_code)
3041
- return JPGD_FAILED;
3042
-
3043
- if (setjmp(m_jmp_state))
3044
- return JPGD_FAILED;
3045
-
3046
- decode_start();
3047
-
3048
- m_ready_flag = true;
3049
-
3050
- return JPGD_SUCCESS;
3051
- }
3052
-
3053
- jpeg_decoder::~jpeg_decoder()
3054
- {
3055
- free_all_blocks();
3056
- }
3057
-
3058
- jpeg_decoder_file_stream::jpeg_decoder_file_stream()
3059
- {
3060
- m_pFile = NULL;
3061
- m_eof_flag = false;
3062
- m_error_flag = false;
3063
- }
3064
-
3065
- void jpeg_decoder_file_stream::close()
3066
- {
3067
- if (m_pFile)
3068
- {
3069
- fclose(m_pFile);
3070
- m_pFile = NULL;
3071
- }
3072
-
3073
- m_eof_flag = false;
3074
- m_error_flag = false;
3075
- }
3076
-
3077
- jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
3078
- {
3079
- close();
3080
- }
3081
-
3082
- bool jpeg_decoder_file_stream::open(const char *Pfilename)
3083
- {
3084
- close();
3085
-
3086
- m_eof_flag = false;
3087
- m_error_flag = false;
3088
-
3089
- #if defined(_MSC_VER)
3090
- m_pFile = NULL;
3091
- fopen_s(&m_pFile, Pfilename, "rb");
3092
- #else
3093
- m_pFile = fopen(Pfilename, "rb");
3094
- #endif
3095
- return m_pFile != NULL;
3096
- }
3097
-
3098
- int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
3099
- {
3100
- if (!m_pFile)
3101
- return -1;
3102
-
3103
- if (m_eof_flag)
3104
- {
3105
- *pEOF_flag = true;
3106
- return 0;
3107
- }
3108
-
3109
- if (m_error_flag)
3110
- return -1;
3111
-
3112
- int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3113
- if (bytes_read < max_bytes_to_read)
3114
- {
3115
- if (ferror(m_pFile))
3116
- {
3117
- m_error_flag = true;
3118
- return -1;
3119
- }
3120
-
3121
- m_eof_flag = true;
3122
- *pEOF_flag = true;
3123
- }
3124
-
3125
- return bytes_read;
3126
- }
3127
-
3128
- bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size)
3129
- {
3130
- close();
3131
- m_pSrc_data = pSrc_data;
3132
- m_ofs = 0;
3133
- m_size = size;
3134
- return true;
3135
- }
3136
-
3137
- int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
3138
- {
3139
- *pEOF_flag = false;
3140
-
3141
- if (!m_pSrc_data)
3142
- return -1;
3143
-
3144
- uint bytes_remaining = m_size - m_ofs;
3145
- if ((uint)max_bytes_to_read > bytes_remaining)
3146
- {
3147
- max_bytes_to_read = bytes_remaining;
3148
- *pEOF_flag = true;
3149
- }
3150
-
3151
- memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
3152
- m_ofs += max_bytes_to_read;
3153
-
3154
- return max_bytes_to_read;
3155
- }
3156
-
3157
- unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps)
3158
- {
3159
- if (!actual_comps)
3160
- return NULL;
3161
- *actual_comps = 0;
3162
-
3163
- if ((!pStream) || (!width) || (!height) || (!req_comps))
3164
- return NULL;
3165
-
3166
- if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
3167
- return NULL;
3168
-
3169
- jpeg_decoder decoder(pStream);
3170
- if (decoder.get_error_code() != JPGD_SUCCESS)
3171
- return NULL;
3172
-
3173
- const int image_width = decoder.get_width(), image_height = decoder.get_height();
3174
- *width = image_width;
3175
- *height = image_height;
3176
- *actual_comps = decoder.get_num_components();
3177
-
3178
- if (decoder.begin_decoding() != JPGD_SUCCESS)
3179
- return NULL;
3180
-
3181
- const int dst_bpl = image_width * req_comps;
3182
-
3183
- uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
3184
- if (!pImage_data)
3185
- return NULL;
3186
-
3187
- for (int y = 0; y < image_height; y++)
3188
- {
3189
- const uint8* pScan_line = 0;
3190
- uint scan_line_len;
3191
- if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
3192
- {
3193
- jpgd_free(pImage_data);
3194
- return NULL;
3195
- }
3196
-
3197
- uint8 *pDst = pImage_data + y * dst_bpl;
3198
-
3199
- if (((req_comps == 4) && (decoder.get_num_components() == 3)) ||
3200
- ((req_comps == 1) && (decoder.get_num_components() == 1)))
3201
- {
3202
- memcpy(pDst, pScan_line, dst_bpl);
3203
- }
3204
- else if (decoder.get_num_components() == 1)
3205
- {
3206
- if (req_comps == 3)
3207
- {
3208
- for (int x = 0; x < image_width; x++)
3209
- {
3210
- uint8 luma = pScan_line[x];
3211
- pDst[0] = luma;
3212
- pDst[1] = luma;
3213
- pDst[2] = luma;
3214
- pDst += 3;
3215
- }
3216
- }
3217
- else
3218
- {
3219
- for (int x = 0; x < image_width; x++)
3220
- {
3221
- uint8 luma = pScan_line[x];
3222
- pDst[0] = luma;
3223
- pDst[1] = luma;
3224
- pDst[2] = luma;
3225
- pDst[3] = 255;
3226
- pDst += 4;
3227
- }
3228
- }
3229
- }
3230
- else if (decoder.get_num_components() == 3)
3231
- {
3232
- if (req_comps == 1)
3233
- {
3234
- const int YR = 19595, YG = 38470, YB = 7471;
3235
- for (int x = 0; x < image_width; x++)
3236
- {
3237
- int r = pScan_line[x*4+0];
3238
- int g = pScan_line[x*4+1];
3239
- int b = pScan_line[x*4+2];
3240
- *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
3241
- }
3242
- }
3243
- else
3244
- {
3245
- for (int x = 0; x < image_width; x++)
3246
- {
3247
- pDst[0] = pScan_line[x*4+0];
3248
- pDst[1] = pScan_line[x*4+1];
3249
- pDst[2] = pScan_line[x*4+2];
3250
- pDst += 3;
3251
- }
3252
- }
3253
- }
3254
- }
3255
-
3256
- return pImage_data;
3257
- }
3258
-
3259
- // BEGIN EPIC MOD
3260
- unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps, int format)
3261
- {
3262
- jpg_format = (ERGBFormatJPG)format;
3263
- // EMD EPIC MOD
3264
- jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
3265
- return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps);
3266
- }
3267
-
3268
- unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps)
3269
- {
3270
- jpgd::jpeg_decoder_file_stream file_stream;
3271
- if (!file_stream.open(pSrc_filename))
3272
- return NULL;
3273
- return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps);
3274
- }
3275
-
3276
- } // namespace jpgd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/longcode/jpge.cpp DELETED
@@ -1,1049 +0,0 @@
1
- // jpge.cpp - C++ class for JPEG compression.
2
- // Public domain, Rich Geldreich <richgel99@gmail.com>
3
- // v1.01, Dec. 18, 2010 - Initial release
4
- // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
5
- // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
6
- // Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
7
- // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
8
- // Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
9
- // Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
10
-
11
- #include "jpge.h"
12
-
13
- #include <stdlib.h>
14
- #include <string.h>
15
- #if PLATFORM_WINDOWS
16
- #include <malloc.h>
17
- #endif
18
-
19
- #define JPGE_MAX(a,b) (((a)>(b))?(a):(b))
20
- #define JPGE_MIN(a,b) (((a)<(b))?(a):(b))
21
-
22
- namespace jpge {
23
-
24
- static inline void *jpge_malloc(size_t nSize) { return FMemory::Malloc(nSize); }
25
- static inline void jpge_free(void *p) { FMemory::Free(p);; }
26
-
27
- // Various JPEG enums and tables.
28
- enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 };
29
- enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 };
30
-
31
- static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
32
- static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 };
33
- static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 };
34
- static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };
35
- static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
36
- static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d };
37
- static uint8 s_ac_lum_val[AC_LUM_CODES] =
38
- {
39
- 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
40
- 0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
41
- 0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
42
- 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
43
- 0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
44
- 0xf9,0xfa
45
- };
46
- static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };
47
- static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
48
- static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 };
49
- static uint8 s_ac_chroma_val[AC_CHROMA_CODES] =
50
- {
51
- 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
52
- 0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
53
- 0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
54
- 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
55
- 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
56
- 0xf9,0xfa
57
- };
58
-
59
- // Low-level helper functions.
60
- template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
61
-
62
- const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329;
63
- static inline uint8 clamp(int i) { if (static_cast<uint>(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast<uint8>(i); }
64
-
65
- static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
66
- {
67
- for ( ; num_pixels; pDst += 3, pSrc += 3, num_pixels--)
68
- {
69
- const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
70
- pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
71
- pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
72
- pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
73
- }
74
- }
75
-
76
- static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
77
- {
78
- for ( ; num_pixels; pDst++, pSrc += 3, num_pixels--)
79
- pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
80
- }
81
-
82
- static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
83
- {
84
- for ( ; num_pixels; pDst += 3, pSrc += 4, num_pixels--)
85
- {
86
- const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
87
- pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
88
- pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
89
- pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
90
- }
91
- }
92
-
93
- static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
94
- {
95
- for ( ; num_pixels; pDst++, pSrc += 4, num_pixels--)
96
- pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
97
- }
98
-
99
- static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
100
- {
101
- for( ; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
102
- }
103
-
104
- // Forward DCT - DCT derived from jfdctint.
105
- #define CONST_BITS 13
106
- #define ROW_BITS 2
107
- #define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n))
108
- #define DCT_MUL(var, c) (static_cast<int16>(var) * static_cast<int32>(c))
109
- #define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \
110
- int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \
111
- int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \
112
- int32 u1 = DCT_MUL(t12 + t13, 4433); \
113
- s2 = u1 + DCT_MUL(t13, 6270); \
114
- s6 = u1 + DCT_MUL(t12, -15137); \
115
- u1 = t4 + t7; \
116
- int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \
117
- int32 z5 = DCT_MUL(u3 + u4, 9633); \
118
- t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \
119
- t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \
120
- u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \
121
- u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \
122
- u3 += z5; u4 += z5; \
123
- s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3;
124
-
125
- static void DCT2D(int32 *p)
126
- {
127
- int32 c, *q = p;
128
- for (c = 7; c >= 0; c--, q += 8)
129
- {
130
- int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
131
- DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
132
- q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
133
- q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
134
- }
135
- for (q = p, c = 7; c >= 0; c--, q++)
136
- {
137
- int32 s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
138
- DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
139
- q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
140
- q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
141
- }
142
- }
143
-
144
- struct sym_freq { uint m_key, m_sym_index; };
145
-
146
- // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
147
- static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1)
148
- {
149
- const uint cMaxPasses = 4;
150
- uint32 hist[256 * cMaxPasses]; clear_obj(hist);
151
- for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256*2 + ((freq >> 16) & 0xFF)]++; hist[256*3 + ((freq >> 24) & 0xFF)]++; }
152
- sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1;
153
- uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
154
- for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
155
- {
156
- const uint32* pHist = &hist[pass << 8];
157
- uint offsets[256], cur_ofs = 0;
158
- for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
159
- for (uint i = 0; i < num_syms; i++)
160
- pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
161
- sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
162
- }
163
- return pCur_syms;
164
- }
165
-
166
- // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
167
- static void calculate_minimum_redundancy(sym_freq *A, int n)
168
- {
169
- int root, leaf, next, avbl, used, dpth;
170
- if (n==0) return; else if (n==1) { A[0].m_key = 1; return; }
171
- A[0].m_key += A[1].m_key; root = 0; leaf = 2;
172
- for (next=1; next < n-1; next++)
173
- {
174
- if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
175
- if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
176
- }
177
- A[n-2].m_key = 0;
178
- for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
179
- avbl = 1; used = dpth = 0; root = n-2; next = n-1;
180
- while (avbl>0)
181
- {
182
- while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; }
183
- while (avbl>used) { A[next--].m_key = dpth; avbl--; }
184
- avbl = 2*used; dpth++; used = 0;
185
- }
186
- }
187
-
188
- // Limits canonical Huffman code table's max code size to max_code_size.
189
- static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
190
- {
191
- if (code_list_len <= 1) return;
192
-
193
- for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
194
-
195
- uint32 total = 0;
196
- for (int i = max_code_size; i > 0; i--)
197
- total += (((uint32)pNum_codes[i]) << (max_code_size - i));
198
-
199
- while (total != (1UL << max_code_size))
200
- {
201
- pNum_codes[max_code_size]--;
202
- for (int i = max_code_size - 1; i > 0; i--)
203
- {
204
- if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
205
- }
206
- total--;
207
- }
208
- }
209
-
210
- // Generates an optimized offman table.
211
- void jpeg_encoder::optimize_huffman_table(int table_num, int table_len)
212
- {
213
- sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS];
214
- syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's
215
- int num_used_syms = 1;
216
- const uint32 *pSym_count = &m_huff_count[table_num][0];
217
- for (int i = 0; i < table_len; i++)
218
- if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; }
219
- sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1);
220
- calculate_minimum_redundancy(pSyms, num_used_syms);
221
-
222
- // Count the # of symbols of each code size.
223
- int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes);
224
- for (int i = 0; i < num_used_syms; i++)
225
- num_codes[pSyms[i].m_key]++;
226
-
227
- const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
228
- huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT);
229
-
230
- // Compute m_huff_bits array, which contains the # of symbols per code size.
231
- clear_obj(m_huff_bits[table_num]);
232
- for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++)
233
- m_huff_bits[table_num][i] = static_cast<uint8>(num_codes[i]);
234
-
235
- // Remove the dummy symbol added above, which must be in largest bucket.
236
- for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--)
237
- {
238
- if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
239
- }
240
-
241
- // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
242
- for (int i = num_used_syms - 1; i >= 1; i--)
243
- m_huff_val[table_num][num_used_syms - 1 - i] = static_cast<uint8>(pSyms[i].m_sym_index - 1);
244
- }
245
-
246
- // JPEG marker generation.
247
- void jpeg_encoder::emit_byte(uint8 i)
248
- {
249
- m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i);
250
- }
251
-
252
- void jpeg_encoder::emit_word(uint i)
253
- {
254
- emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF));
255
- }
256
-
257
- void jpeg_encoder::emit_marker(int marker)
258
- {
259
- emit_byte(uint8(0xFF)); emit_byte(uint8(marker));
260
- }
261
-
262
- // Emit JFIF marker
263
- void jpeg_encoder::emit_jfif_app0()
264
- {
265
- emit_marker(M_APP0);
266
- emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1);
267
- emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
268
- emit_byte(0);
269
- emit_byte(1); /* Major version */
270
- emit_byte(1); /* Minor version */
271
- emit_byte(0); /* Density unit */
272
- emit_word(1);
273
- emit_word(1);
274
- emit_byte(0); /* No thumbnail image */
275
- emit_byte(0);
276
- }
277
-
278
- // Emit quantization tables
279
- void jpeg_encoder::emit_dqt()
280
- {
281
- for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++)
282
- {
283
- emit_marker(M_DQT);
284
- emit_word(64 + 1 + 2);
285
- emit_byte(static_cast<uint8>(i));
286
- for (int j = 0; j < 64; j++)
287
- emit_byte(static_cast<uint8>(m_quantization_tables[i][j]));
288
- }
289
- }
290
-
291
- // Emit start of frame marker
292
- void jpeg_encoder::emit_sof()
293
- {
294
- emit_marker(M_SOF0); /* baseline */
295
- emit_word(3 * m_num_components + 2 + 5 + 1);
296
- emit_byte(8); /* precision */
297
- emit_word(m_image_y);
298
- emit_word(m_image_x);
299
- emit_byte(m_num_components);
300
- for (int i = 0; i < m_num_components; i++)
301
- {
302
- emit_byte(static_cast<uint8>(i + 1)); /* component ID */
303
- emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */
304
- emit_byte(i > 0); /* quant. table num */
305
- }
306
- }
307
-
308
- // Emit Huffman table.
309
- void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag)
310
- {
311
- emit_marker(M_DHT);
312
-
313
- int length = 0;
314
- for (int i = 1; i <= 16; i++)
315
- length += bits[i];
316
-
317
- emit_word(length + 2 + 1 + 16);
318
- emit_byte(static_cast<uint8>(index + (ac_flag << 4)));
319
-
320
- for (int i = 1; i <= 16; i++)
321
- emit_byte(bits[i]);
322
-
323
- for (int i = 0; i < length; i++)
324
- emit_byte(val[i]);
325
- }
326
-
327
- // Emit all Huffman tables.
328
- void jpeg_encoder::emit_dhts()
329
- {
330
- emit_dht(m_huff_bits[0+0], m_huff_val[0+0], 0, false);
331
- emit_dht(m_huff_bits[2+0], m_huff_val[2+0], 0, true);
332
- if (m_num_components == 3)
333
- {
334
- emit_dht(m_huff_bits[0+1], m_huff_val[0+1], 1, false);
335
- emit_dht(m_huff_bits[2+1], m_huff_val[2+1], 1, true);
336
- }
337
- }
338
-
339
- // emit start of scan
340
- void jpeg_encoder::emit_sos()
341
- {
342
- emit_marker(M_SOS);
343
- emit_word(2 * m_num_components + 2 + 1 + 3);
344
- emit_byte(m_num_components);
345
- for (int i = 0; i < m_num_components; i++)
346
- {
347
- emit_byte(static_cast<uint8>(i + 1));
348
- if (i == 0)
349
- emit_byte((0 << 4) + 0);
350
- else
351
- emit_byte((1 << 4) + 1);
352
- }
353
- emit_byte(0); /* spectral selection */
354
- emit_byte(63);
355
- emit_byte(0);
356
- }
357
-
358
- // Emit all markers at beginning of image file.
359
- void jpeg_encoder::emit_markers()
360
- {
361
- emit_marker(M_SOI);
362
- emit_jfif_app0();
363
- emit_dqt();
364
- emit_sof();
365
- emit_dhts();
366
- emit_sos();
367
- }
368
-
369
- // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
370
- void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val)
371
- {
372
- int i, l, last_p, si;
373
- uint8 huff_size[257];
374
- uint huff_code[257];
375
- uint code;
376
-
377
- int p = 0;
378
- for (l = 1; l <= 16; l++)
379
- for (i = 1; i <= bits[l]; i++)
380
- huff_size[p++] = (char)l;
381
-
382
- huff_size[p] = 0; last_p = p; // write sentinel
383
-
384
- code = 0; si = huff_size[0]; p = 0;
385
-
386
- while (huff_size[p])
387
- {
388
- while (huff_size[p] == si)
389
- huff_code[p++] = code++;
390
- code <<= 1;
391
- si++;
392
- }
393
-
394
- memset(codes, 0, sizeof(codes[0])*256);
395
- memset(code_sizes, 0, sizeof(code_sizes[0])*256);
396
- for (p = 0; p < last_p; p++)
397
- {
398
- codes[val[p]] = huff_code[p];
399
- code_sizes[val[p]] = huff_size[p];
400
- }
401
- }
402
-
403
- // Quantization table generation.
404
- void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc)
405
- {
406
- int32 q;
407
- if (m_params.m_quality < 50)
408
- q = 5000 / m_params.m_quality;
409
- else
410
- q = 200 - m_params.m_quality * 2;
411
- for (int i = 0; i < 64; i++)
412
- {
413
- int32 j = *pSrc++; j = (j * q + 50L) / 100L;
414
- *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
415
- }
416
- }
417
-
418
- // Higher-level methods.
419
- void jpeg_encoder::first_pass_init()
420
- {
421
- m_bit_buffer = 0; m_bits_in = 0;
422
- memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0]));
423
- m_mcu_y_ofs = 0;
424
- m_pass_num = 1;
425
- }
426
-
427
- bool jpeg_encoder::second_pass_init()
428
- {
429
- compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0], m_huff_val[0+0]);
430
- compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0], m_huff_val[2+0]);
431
- if (m_num_components > 1)
432
- {
433
- compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1], m_huff_val[0+1]);
434
- compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1], m_huff_val[2+1]);
435
- }
436
- first_pass_init();
437
- emit_markers();
438
- m_pass_num = 2;
439
- return true;
440
- }
441
-
442
- bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels)
443
- {
444
- m_num_components = 3;
445
- switch (m_params.m_subsampling)
446
- {
447
- case Y_ONLY:
448
- {
449
- m_num_components = 1;
450
- m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
451
- m_mcu_x = 8; m_mcu_y = 8;
452
- break;
453
- }
454
- case H1V1:
455
- {
456
- m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
457
- m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
458
- m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
459
- m_mcu_x = 8; m_mcu_y = 8;
460
- break;
461
- }
462
- case H2V1:
463
- {
464
- m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
465
- m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
466
- m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
467
- m_mcu_x = 16; m_mcu_y = 8;
468
- break;
469
- }
470
- case H2V2:
471
- {
472
- m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
473
- m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
474
- m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
475
- m_mcu_x = 16; m_mcu_y = 16;
476
- }
477
- }
478
-
479
- m_image_x = p_x_res; m_image_y = p_y_res;
480
- m_image_bpp = src_channels;
481
- m_image_bpl = m_image_x * src_channels;
482
- m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1));
483
- m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1));
484
- m_image_bpl_xlt = m_image_x * m_num_components;
485
- m_image_bpl_mcu = m_image_x_mcu * m_num_components;
486
- m_mcus_per_row = m_image_x_mcu / m_mcu_x;
487
-
488
- if ((m_mcu_lines[0] = static_cast<uint8*>(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false;
489
- for (int i = 1; i < m_mcu_y; i++)
490
- m_mcu_lines[i] = m_mcu_lines[i-1] + m_image_bpl_mcu;
491
-
492
- compute_quant_table(m_quantization_tables[0], s_std_lum_quant);
493
- compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant);
494
-
495
- m_out_buf_left = JPGE_OUT_BUF_SIZE;
496
- m_pOut_buf = m_out_buf;
497
-
498
- if (m_params.m_two_pass_flag)
499
- {
500
- clear_obj(m_huff_count);
501
- first_pass_init();
502
- }
503
- else
504
- {
505
- memcpy(m_huff_bits[0+0], s_dc_lum_bits, 17); memcpy(m_huff_val [0+0], s_dc_lum_val, DC_LUM_CODES);
506
- memcpy(m_huff_bits[2+0], s_ac_lum_bits, 17); memcpy(m_huff_val [2+0], s_ac_lum_val, AC_LUM_CODES);
507
- memcpy(m_huff_bits[0+1], s_dc_chroma_bits, 17); memcpy(m_huff_val [0+1], s_dc_chroma_val, DC_CHROMA_CODES);
508
- memcpy(m_huff_bits[2+1], s_ac_chroma_bits, 17); memcpy(m_huff_val [2+1], s_ac_chroma_val, AC_CHROMA_CODES);
509
- if (!second_pass_init()) return false; // in effect, skip over the first pass
510
- }
511
- return m_all_stream_writes_succeeded;
512
- }
513
-
514
- void jpeg_encoder::load_block_8_8_grey(int x)
515
- {
516
- uint8 *pSrc;
517
- sample_array_t *pDst = m_sample_array;
518
- x <<= 3;
519
- for (int i = 0; i < 8; i++, pDst += 8)
520
- {
521
- pSrc = m_mcu_lines[i] + x;
522
- pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128;
523
- pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128;
524
- }
525
- }
526
-
527
- void jpeg_encoder::load_block_8_8(int x, int y, int c)
528
- {
529
- uint8 *pSrc;
530
- sample_array_t *pDst = m_sample_array;
531
- x = (x * (8 * 3)) + c;
532
- y <<= 3;
533
- for (int i = 0; i < 8; i++, pDst += 8)
534
- {
535
- pSrc = m_mcu_lines[y + i] + x;
536
- pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128;
537
- pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128;
538
- }
539
- }
540
-
541
- void jpeg_encoder::load_block_16_8(int x, int c)
542
- {
543
- uint8 *pSrc1, *pSrc2;
544
- sample_array_t *pDst = m_sample_array;
545
- x = (x * (16 * 3)) + c;
546
- int a = 0, b = 2;
547
- for (int i = 0; i < 16; i += 2, pDst += 8)
548
- {
549
- pSrc1 = m_mcu_lines[i + 0] + x;
550
- pSrc2 = m_mcu_lines[i + 1] + x;
551
- pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3] + pSrc2[ 0 * 3] + pSrc2[ 1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3] + pSrc2[ 2 * 3] + pSrc2[ 3 * 3] + b) >> 2) - 128;
552
- pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3] + pSrc2[ 4 * 3] + pSrc2[ 5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3] + pSrc2[ 6 * 3] + pSrc2[ 7 * 3] + b) >> 2) - 128;
553
- pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3] + pSrc2[ 8 * 3] + pSrc2[ 9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128;
554
- pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128;
555
- int temp = a; a = b; b = temp;
556
- }
557
- }
558
-
559
- void jpeg_encoder::load_block_16_8_8(int x, int c)
560
- {
561
- uint8 *pSrc1;
562
- sample_array_t *pDst = m_sample_array;
563
- x = (x * (16 * 3)) + c;
564
- for (int i = 0; i < 8; i++, pDst += 8)
565
- {
566
- pSrc1 = m_mcu_lines[i + 0] + x;
567
- pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3]) >> 1) - 128;
568
- pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3]) >> 1) - 128;
569
- pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128;
570
- pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128;
571
- }
572
- }
573
-
574
- void jpeg_encoder::load_quantized_coefficients(int component_num)
575
- {
576
- int32 *q = m_quantization_tables[component_num > 0];
577
- int16 *pDst = m_coefficient_array;
578
- for (int i = 0; i < 64; i++)
579
- {
580
- sample_array_t j = m_sample_array[s_zag[i]];
581
- if (j < 0)
582
- {
583
- if ((j = -j + (*q >> 1)) < *q)
584
- *pDst++ = 0;
585
- else
586
- *pDst++ = static_cast<int16>(-(j / *q));
587
- }
588
- else
589
- {
590
- if ((j = j + (*q >> 1)) < *q)
591
- *pDst++ = 0;
592
- else
593
- *pDst++ = static_cast<int16>((j / *q));
594
- }
595
- q++;
596
- }
597
- }
598
-
599
- void jpeg_encoder::flush_output_buffer()
600
- {
601
- if (m_out_buf_left != JPGE_OUT_BUF_SIZE)
602
- m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left);
603
- m_pOut_buf = m_out_buf;
604
- m_out_buf_left = JPGE_OUT_BUF_SIZE;
605
- }
606
-
607
- void jpeg_encoder::put_bits(uint bits, uint len)
608
- {
609
- m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len)));
610
- while (m_bits_in >= 8)
611
- {
612
- uint8 c;
613
- #define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
614
- JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF));
615
- if (c == 0xFF) JPGE_PUT_BYTE(0);
616
- m_bit_buffer <<= 8;
617
- m_bits_in -= 8;
618
- }
619
- }
620
-
621
- void jpeg_encoder::code_coefficients_pass_one(int component_num)
622
- {
623
- if (component_num >= 3) return; // just to shut up static analysis
624
- int i, run_len, nbits, temp1;
625
- int16 *src = m_coefficient_array;
626
- uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0];
627
-
628
- temp1 = src[0] - m_last_dc_val[component_num];
629
- m_last_dc_val[component_num] = src[0];
630
- if (temp1 < 0) temp1 = -temp1;
631
-
632
- nbits = 0;
633
- while (temp1)
634
- {
635
- nbits++; temp1 >>= 1;
636
- }
637
-
638
- dc_count[nbits]++;
639
- for (run_len = 0, i = 1; i < 64; i++)
640
- {
641
- if ((temp1 = m_coefficient_array[i]) == 0)
642
- run_len++;
643
- else
644
- {
645
- while (run_len >= 16)
646
- {
647
- ac_count[0xF0]++;
648
- run_len -= 16;
649
- }
650
- if (temp1 < 0) temp1 = -temp1;
651
- nbits = 1;
652
- while (temp1 >>= 1) nbits++;
653
- ac_count[(run_len << 4) + nbits]++;
654
- run_len = 0;
655
- }
656
- }
657
- if (run_len) ac_count[0]++;
658
- }
659
-
660
- void jpeg_encoder::code_coefficients_pass_two(int component_num)
661
- {
662
- int i, j, run_len, nbits, temp1, temp2;
663
- int16 *pSrc = m_coefficient_array;
664
- uint *codes[2];
665
- uint8 *code_sizes[2];
666
-
667
- if (component_num == 0)
668
- {
669
- codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0];
670
- code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0];
671
- }
672
- else
673
- {
674
- codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1];
675
- code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1];
676
- }
677
-
678
- temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num];
679
- m_last_dc_val[component_num] = pSrc[0];
680
-
681
- if (temp1 < 0)
682
- {
683
- temp1 = -temp1; temp2--;
684
- }
685
-
686
- nbits = 0;
687
- while (temp1)
688
- {
689
- nbits++; temp1 >>= 1;
690
- }
691
-
692
- put_bits(codes[0][nbits], code_sizes[0][nbits]);
693
- if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits);
694
-
695
- for (run_len = 0, i = 1; i < 64; i++)
696
- {
697
- if ((temp1 = m_coefficient_array[i]) == 0)
698
- run_len++;
699
- else
700
- {
701
- while (run_len >= 16)
702
- {
703
- put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
704
- run_len -= 16;
705
- }
706
- if ((temp2 = temp1) < 0)
707
- {
708
- temp1 = -temp1;
709
- temp2--;
710
- }
711
- nbits = 1;
712
- while (temp1 >>= 1)
713
- nbits++;
714
- j = (run_len << 4) + nbits;
715
- put_bits(codes[1][j], code_sizes[1][j]);
716
- put_bits(temp2 & ((1 << nbits) - 1), nbits);
717
- run_len = 0;
718
- }
719
- }
720
- if (run_len)
721
- put_bits(codes[1][0], code_sizes[1][0]);
722
- }
723
-
724
- void jpeg_encoder::code_block(int component_num)
725
- {
726
- DCT2D(m_sample_array);
727
- load_quantized_coefficients(component_num);
728
- if (m_pass_num == 1)
729
- code_coefficients_pass_one(component_num);
730
- else
731
- code_coefficients_pass_two(component_num);
732
- }
733
-
734
- void jpeg_encoder::process_mcu_row()
735
- {
736
- if (m_num_components == 1)
737
- {
738
- for (int i = 0; i < m_mcus_per_row; i++)
739
- {
740
- load_block_8_8_grey(i); code_block(0);
741
- }
742
- }
743
- else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
744
- {
745
- for (int i = 0; i < m_mcus_per_row; i++)
746
- {
747
- load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
748
- }
749
- }
750
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
751
- {
752
- for (int i = 0; i < m_mcus_per_row; i++)
753
- {
754
- load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
755
- load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
756
- }
757
- }
758
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
759
- {
760
- for (int i = 0; i < m_mcus_per_row; i++)
761
- {
762
- load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
763
- load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0);
764
- load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
765
- }
766
- }
767
- }
768
-
769
- bool jpeg_encoder::terminate_pass_one()
770
- {
771
- optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
772
- if (m_num_components > 1)
773
- {
774
- optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
775
- }
776
- return second_pass_init();
777
- }
778
-
779
- bool jpeg_encoder::terminate_pass_two()
780
- {
781
- put_bits(0x7F, 7);
782
- flush_output_buffer();
783
- emit_marker(M_EOI);
784
- m_pass_num++; // purposely bump up m_pass_num, for debugging
785
- return true;
786
- }
787
-
788
- bool jpeg_encoder::process_end_of_image()
789
- {
790
- if (m_mcu_y_ofs)
791
- {
792
- if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
793
- {
794
- for (int i = m_mcu_y_ofs; i < m_mcu_y; i++)
795
- memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu);
796
- }
797
-
798
- process_mcu_row();
799
- }
800
-
801
- if (m_pass_num == 1)
802
- return terminate_pass_one();
803
- else
804
- return terminate_pass_two();
805
- }
806
-
807
- void jpeg_encoder::load_mcu(const void *pSrc)
808
- {
809
- const uint8* Psrc = reinterpret_cast<const uint8*>(pSrc);
810
-
811
- uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
812
-
813
- if (m_num_components == 1)
814
- {
815
- if (m_image_bpp == 4)
816
- RGBA_to_Y(pDst, Psrc, m_image_x);
817
- else if (m_image_bpp == 3)
818
- RGB_to_Y(pDst, Psrc, m_image_x);
819
- else
820
- memcpy(pDst, Psrc, m_image_x);
821
- }
822
- else
823
- {
824
- if (m_image_bpp == 4)
825
- RGBA_to_YCC(pDst, Psrc, m_image_x);
826
- else if (m_image_bpp == 3)
827
- RGB_to_YCC(pDst, Psrc, m_image_x);
828
- else
829
- Y_to_YCC(pDst, Psrc, m_image_x);
830
- }
831
-
832
- // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
833
- if (m_num_components == 1)
834
- memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x);
835
- else
836
- {
837
- const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2];
838
- uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt;
839
- for (int i = m_image_x; i < m_image_x_mcu; i++)
840
- {
841
- *q++ = y; *q++ = cb; *q++ = cr;
842
- }
843
- }
844
-
845
- if (++m_mcu_y_ofs == m_mcu_y)
846
- {
847
- process_mcu_row();
848
- m_mcu_y_ofs = 0;
849
- }
850
- }
851
-
852
- void jpeg_encoder::clear()
853
- {
854
- m_mcu_lines[0] = NULL;
855
- m_pass_num = 0;
856
- m_all_stream_writes_succeeded = true;
857
- }
858
-
859
- jpeg_encoder::jpeg_encoder()
860
- {
861
- clear();
862
- }
863
-
864
- jpeg_encoder::~jpeg_encoder()
865
- {
866
- deinit();
867
- }
868
-
869
- bool jpeg_encoder::init(output_stream *pStream, int64_t width, int64_t height, int64_t src_channels, const params &comp_params)
870
- {
871
- deinit();
872
- if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check_valid())) return false;
873
- m_pStream = pStream;
874
- m_params = comp_params;
875
- return jpg_open(width, height, src_channels);
876
- }
877
-
878
- void jpeg_encoder::deinit()
879
- {
880
- jpge_free(m_mcu_lines[0]);
881
- clear();
882
- }
883
-
884
- bool jpeg_encoder::process_scanline(const void* pScanline)
885
- {
886
- if ((m_pass_num < 1) || (m_pass_num > 2)) return false;
887
- if (m_all_stream_writes_succeeded)
888
- {
889
- if (!pScanline)
890
- {
891
- if (!process_end_of_image()) return false;
892
- }
893
- else
894
- {
895
- load_mcu(pScanline);
896
- }
897
- }
898
- return m_all_stream_writes_succeeded;
899
- }
900
-
901
- // Higher level wrappers/examples (optional).
902
- #include <stdio.h>
903
-
904
- class cfile_stream : public output_stream
905
- {
906
- cfile_stream(const cfile_stream &);
907
- cfile_stream &operator= (const cfile_stream &);
908
-
909
- FILE* m_pFile;
910
- bool m_bStatus;
911
-
912
- public:
913
- cfile_stream() : m_pFile(NULL), m_bStatus(false) { }
914
-
915
- virtual ~cfile_stream()
916
- {
917
- close();
918
- }
919
-
920
- bool open(const char *pFilename)
921
- {
922
- close();
923
- #if defined(_MSC_VER)
924
- if (fopen_s(&m_pFile, pFilename, "wb") != 0)
925
- {
926
- return false;
927
- }
928
- #else
929
- m_pFile = fopen(pFilename, "wb");
930
- #endif
931
- m_bStatus = (m_pFile != NULL);
932
- return m_bStatus;
933
- }
934
-
935
- bool close()
936
- {
937
- if (m_pFile)
938
- {
939
- if (fclose(m_pFile) == EOF)
940
- {
941
- m_bStatus = false;
942
- }
943
- m_pFile = NULL;
944
- }
945
- return m_bStatus;
946
- }
947
-
948
- virtual bool put_buf(const void* pBuf, int64_t len)
949
- {
950
- m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1);
951
- return m_bStatus;
952
- }
953
-
954
- uint get_size() const
955
- {
956
- return m_pFile ? ftell(m_pFile) : 0;
957
- }
958
- };
959
-
960
- // Writes JPEG image to file.
961
- bool compress_image_to_jpeg_file(const char *pFilename, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params)
962
- {
963
- cfile_stream dst_stream;
964
- if (!dst_stream.open(pFilename))
965
- return false;
966
-
967
- jpge::jpeg_encoder dst_image;
968
- if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
969
- return false;
970
-
971
- for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
972
- {
973
- for (int64_t i = 0; i < height; i++)
974
- {
975
- // i, width, and num_channels are all 64bit
976
- const uint8* pBuf = pImage_data + i * width * num_channels;
977
- if (!dst_image.process_scanline(pBuf))
978
- return false;
979
- }
980
- if (!dst_image.process_scanline(NULL))
981
- return false;
982
- }
983
-
984
- dst_image.deinit();
985
-
986
- return dst_stream.close();
987
- }
988
-
989
- class memory_stream : public output_stream
990
- {
991
- memory_stream(const memory_stream &);
992
- memory_stream &operator= (const memory_stream &);
993
-
994
- uint8 *m_pBuf;
995
- uint64_t m_buf_size, m_buf_ofs;
996
-
997
- public:
998
- memory_stream(void *pBuf, uint64_t buf_size) : m_pBuf(static_cast<uint8*>(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { }
999
-
1000
- virtual ~memory_stream() { }
1001
-
1002
- virtual bool put_buf(const void* pBuf, int64_t len)
1003
- {
1004
- uint64_t buf_remaining = m_buf_size - m_buf_ofs;
1005
- if ((uint64_t)len > buf_remaining)
1006
- return false;
1007
- memcpy(m_pBuf + m_buf_ofs, pBuf, len);
1008
- m_buf_ofs += len;
1009
- return true;
1010
- }
1011
-
1012
- uint64_t get_size() const
1013
- {
1014
- return m_buf_ofs;
1015
- }
1016
- };
1017
-
1018
- bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int64_t &buf_size, int64_t width, int64_t height, int64_t num_channels, const uint8 *pImage_data, const params &comp_params)
1019
- {
1020
- if ((!pDstBuf) || (!buf_size))
1021
- return false;
1022
-
1023
- memory_stream dst_stream(pDstBuf, buf_size);
1024
-
1025
- buf_size = 0;
1026
-
1027
- jpge::jpeg_encoder dst_image;
1028
- if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
1029
- return false;
1030
-
1031
- for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
1032
- {
1033
- for (int64_t i = 0; i < height; i++)
1034
- {
1035
- const uint8* pScanline = pImage_data + i * width * num_channels;
1036
- if (!dst_image.process_scanline(pScanline))
1037
- return false;
1038
- }
1039
- if (!dst_image.process_scanline(NULL))
1040
- return false;
1041
- }
1042
-
1043
- dst_image.deinit();
1044
-
1045
- buf_size = dst_stream.get_size();
1046
- return true;
1047
- }
1048
-
1049
- } // namespace jpge
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/cpp/longcode/prod_cons.h DELETED
@@ -1,433 +0,0 @@
1
- #pragma once
2
-
3
- #include <atomic>
4
- #include <utility>
5
- #include <cstring>
6
- #include <type_traits>
7
- #include <cstdint>
8
-
9
- #include "libipc/def.h"
10
-
11
- #include "libipc/platform/detail.h"
12
- #include "libipc/circ/elem_def.h"
13
- #include "libipc/utility/log.h"
14
- #include "libipc/utility/utility.h"
15
-
16
- namespace ipc {
17
-
18
- ////////////////////////////////////////////////////////////////
19
- /// producer-consumer implementation
20
- ////////////////////////////////////////////////////////////////
21
-
22
- template <typename Flag>
23
- struct prod_cons_impl;
24
-
25
- template <>
26
- struct prod_cons_impl<wr<relat::single, relat::single, trans::unicast>> {
27
-
28
- template <std::size_t DataSize, std::size_t AlignSize>
29
- struct elem_t {
30
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
31
- };
32
-
33
- alignas(cache_line_size) std::atomic<circ::u2_t> rd_; // read index
34
- alignas(cache_line_size) std::atomic<circ::u2_t> wt_; // write index
35
-
36
- constexpr circ::u2_t cursor() const noexcept {
37
- return 0;
38
- }
39
-
40
- template <typename W, typename F, typename E>
41
- bool push(W* /*wrapper*/, F&& f, E* elems) {
42
- auto cur_wt = circ::index_of(wt_.load(std::memory_order_relaxed));
43
- if (cur_wt == circ::index_of(rd_.load(std::memory_order_acquire) - 1)) {
44
- return false; // full
45
- }
46
- std::forward<F>(f)(&(elems[cur_wt].data_));
47
- wt_.fetch_add(1, std::memory_order_release);
48
- return true;
49
- }
50
-
51
- /**
52
- * In single-single-unicast, 'force_push' means 'no reader' or 'the only one reader is dead'.
53
- * So we could just disconnect all connections of receiver, and return false.
54
- */
55
- template <typename W, typename F, typename E>
56
- bool force_push(W* wrapper, F&&, E*) {
57
- wrapper->elems()->disconnect_receiver(~static_cast<circ::cc_t>(0u));
58
- return false;
59
- }
60
-
61
- template <typename W, typename F, typename R, typename E>
62
- bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E* elems) {
63
- auto cur_rd = circ::index_of(rd_.load(std::memory_order_relaxed));
64
- if (cur_rd == circ::index_of(wt_.load(std::memory_order_acquire))) {
65
- return false; // empty
66
- }
67
- std::forward<F>(f)(&(elems[cur_rd].data_));
68
- std::forward<R>(out)(true);
69
- rd_.fetch_add(1, std::memory_order_release);
70
- return true;
71
- }
72
- };
73
-
74
- template <>
75
- struct prod_cons_impl<wr<relat::single, relat::multi , trans::unicast>>
76
- : prod_cons_impl<wr<relat::single, relat::single, trans::unicast>> {
77
-
78
- template <typename W, typename F, typename E>
79
- bool force_push(W* wrapper, F&&, E*) {
80
- wrapper->elems()->disconnect_receiver(1);
81
- return false;
82
- }
83
-
84
- template <typename W, typename F, typename R,
85
- template <std::size_t, std::size_t> class E, std::size_t DS, std::size_t AS>
86
- bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E<DS, AS>* elems) {
87
- byte_t buff[DS];
88
- for (unsigned k = 0;;) {
89
- auto cur_rd = rd_.load(std::memory_order_relaxed);
90
- if (circ::index_of(cur_rd) ==
91
- circ::index_of(wt_.load(std::memory_order_acquire))) {
92
- return false; // empty
93
- }
94
- std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff));
95
- if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) {
96
- std::forward<F>(f)(buff);
97
- std::forward<R>(out)(true);
98
- return true;
99
- }
100
- ipc::yield(k);
101
- }
102
- }
103
- };
104
-
105
- template <>
106
- struct prod_cons_impl<wr<relat::multi , relat::multi, trans::unicast>>
107
- : prod_cons_impl<wr<relat::single, relat::multi, trans::unicast>> {
108
-
109
- using flag_t = std::uint64_t;
110
-
111
- template <std::size_t DataSize, std::size_t AlignSize>
112
- struct elem_t {
113
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
114
- std::atomic<flag_t> f_ct_ { 0 }; // commit flag
115
- };
116
-
117
- alignas(cache_line_size) std::atomic<circ::u2_t> ct_; // commit index
118
-
119
- template <typename W, typename F, typename E>
120
- bool push(W* /*wrapper*/, F&& f, E* elems) {
121
- circ::u2_t cur_ct, nxt_ct;
122
- for (unsigned k = 0;;) {
123
- cur_ct = ct_.load(std::memory_order_relaxed);
124
- if (circ::index_of(nxt_ct = cur_ct + 1) ==
125
- circ::index_of(rd_.load(std::memory_order_acquire))) {
126
- return false; // full
127
- }
128
- if (ct_.compare_exchange_weak(cur_ct, nxt_ct, std::memory_order_acq_rel)) {
129
- break;
130
- }
131
- ipc::yield(k);
132
- }
133
- auto* el = elems + circ::index_of(cur_ct);
134
- std::forward<F>(f)(&(el->data_));
135
- // set flag & try update wt
136
- el->f_ct_.store(~static_cast<flag_t>(cur_ct), std::memory_order_release);
137
- while (1) {
138
- auto cac_ct = el->f_ct_.load(std::memory_order_acquire);
139
- if (cur_ct != wt_.load(std::memory_order_relaxed)) {
140
- return true;
141
- }
142
- if ((~cac_ct) != cur_ct) {
143
- return true;
144
- }
145
- if (!el->f_ct_.compare_exchange_strong(cac_ct, 0, std::memory_order_relaxed)) {
146
- return true;
147
- }
148
- wt_.store(nxt_ct, std::memory_order_release);
149
- cur_ct = nxt_ct;
150
- nxt_ct = cur_ct + 1;
151
- el = elems + circ::index_of(cur_ct);
152
- }
153
- return true;
154
- }
155
-
156
- template <typename W, typename F, typename E>
157
- bool force_push(W* wrapper, F&&, E*) {
158
- wrapper->elems()->disconnect_receiver(1);
159
- return false;
160
- }
161
-
162
- template <typename W, typename F, typename R,
163
- template <std::size_t, std::size_t> class E, std::size_t DS, std::size_t AS>
164
- bool pop(W* /*wrapper*/, circ::u2_t& /*cur*/, F&& f, R&& out, E<DS, AS>* elems) {
165
- byte_t buff[DS];
166
- for (unsigned k = 0;;) {
167
- auto cur_rd = rd_.load(std::memory_order_relaxed);
168
- auto cur_wt = wt_.load(std::memory_order_acquire);
169
- auto id_rd = circ::index_of(cur_rd);
170
- auto id_wt = circ::index_of(cur_wt);
171
- if (id_rd == id_wt) {
172
- auto* el = elems + id_wt;
173
- auto cac_ct = el->f_ct_.load(std::memory_order_acquire);
174
- if ((~cac_ct) != cur_wt) {
175
- return false; // empty
176
- }
177
- if (el->f_ct_.compare_exchange_weak(cac_ct, 0, std::memory_order_relaxed)) {
178
- wt_.store(cur_wt + 1, std::memory_order_release);
179
- }
180
- k = 0;
181
- }
182
- else {
183
- std::memcpy(buff, &(elems[circ::index_of(cur_rd)].data_), sizeof(buff));
184
- if (rd_.compare_exchange_weak(cur_rd, cur_rd + 1, std::memory_order_release)) {
185
- std::forward<F>(f)(buff);
186
- std::forward<R>(out)(true);
187
- return true;
188
- }
189
- ipc::yield(k);
190
- }
191
- }
192
- }
193
- };
194
-
195
- template <>
196
- struct prod_cons_impl<wr<relat::single, relat::multi, trans::broadcast>> {
197
-
198
- using rc_t = std::uint64_t;
199
-
200
- enum : rc_t {
201
- ep_mask = 0x00000000ffffffffull,
202
- ep_incr = 0x0000000100000000ull
203
- };
204
-
205
- template <std::size_t DataSize, std::size_t AlignSize>
206
- struct elem_t {
207
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
208
- std::atomic<rc_t> rc_ { 0 }; // read-counter
209
- };
210
-
211
- alignas(cache_line_size) std::atomic<circ::u2_t> wt_; // write index
212
- alignas(cache_line_size) rc_t epoch_ { 0 }; // only one writer
213
-
214
- circ::u2_t cursor() const noexcept {
215
- return wt_.load(std::memory_order_acquire);
216
- }
217
-
218
- template <typename W, typename F, typename E>
219
- bool push(W* wrapper, F&& f, E* elems) {
220
- E* el;
221
- for (unsigned k = 0;;) {
222
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
223
- if (cc == 0) return false; // no reader
224
- el = elems + circ::index_of(wt_.load(std::memory_order_relaxed));
225
- // check all consumers have finished reading this element
226
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
227
- circ::cc_t rem_cc = cur_rc & ep_mask;
228
- if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch_)) {
229
- return false; // has not finished yet
230
- }
231
- // consider rem_cc to be 0 here
232
- if (el->rc_.compare_exchange_weak(
233
- cur_rc, epoch_ | static_cast<rc_t>(cc), std::memory_order_release)) {
234
- break;
235
- }
236
- ipc::yield(k);
237
- }
238
- std::forward<F>(f)(&(el->data_));
239
- wt_.fetch_add(1, std::memory_order_release);
240
- return true;
241
- }
242
-
243
- template <typename W, typename F, typename E>
244
- bool force_push(W* wrapper, F&& f, E* elems) {
245
- E* el;
246
- epoch_ += ep_incr;
247
- for (unsigned k = 0;;) {
248
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
249
- if (cc == 0) return false; // no reader
250
- el = elems + circ::index_of(wt_.load(std::memory_order_relaxed));
251
- // check all consumers have finished reading this element
252
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
253
- circ::cc_t rem_cc = cur_rc & ep_mask;
254
- if (cc & rem_cc) {
255
- ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc);
256
- cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers
257
- if (cc == 0) return false; // no reader
258
- }
259
- // just compare & exchange
260
- if (el->rc_.compare_exchange_weak(
261
- cur_rc, epoch_ | static_cast<rc_t>(cc), std::memory_order_release)) {
262
- break;
263
- }
264
- ipc::yield(k);
265
- }
266
- std::forward<F>(f)(&(el->data_));
267
- wt_.fetch_add(1, std::memory_order_release);
268
- return true;
269
- }
270
-
271
- template <typename W, typename F, typename R, typename E>
272
- bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E* elems) {
273
- if (cur == cursor()) return false; // acquire
274
- auto* el = elems + circ::index_of(cur++);
275
- std::forward<F>(f)(&(el->data_));
276
- for (unsigned k = 0;;) {
277
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
278
- if ((cur_rc & ep_mask) == 0) {
279
- std::forward<R>(out)(true);
280
- return true;
281
- }
282
- auto nxt_rc = cur_rc & ~static_cast<rc_t>(wrapper->connected_id());
283
- if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) {
284
- std::forward<R>(out)((nxt_rc & ep_mask) == 0);
285
- return true;
286
- }
287
- ipc::yield(k);
288
- }
289
- }
290
- };
291
-
292
- template <>
293
- struct prod_cons_impl<wr<relat::multi, relat::multi, trans::broadcast>> {
294
-
295
- using rc_t = std::uint64_t;
296
- using flag_t = std::uint64_t;
297
-
298
- enum : rc_t {
299
- rc_mask = 0x00000000ffffffffull,
300
- ep_mask = 0x00ffffffffffffffull,
301
- ep_incr = 0x0100000000000000ull,
302
- ic_mask = 0xff000000ffffffffull,
303
- ic_incr = 0x0000000100000000ull
304
- };
305
-
306
- template <std::size_t DataSize, std::size_t AlignSize>
307
- struct elem_t {
308
- std::aligned_storage_t<DataSize, AlignSize> data_ {};
309
- std::atomic<rc_t > rc_ { 0 }; // read-counter
310
- std::atomic<flag_t> f_ct_ { 0 }; // commit flag
311
- };
312
-
313
- alignas(cache_line_size) std::atomic<circ::u2_t> ct_; // commit index
314
- alignas(cache_line_size) std::atomic<rc_t> epoch_ { 0 };
315
-
316
- circ::u2_t cursor() const noexcept {
317
- return ct_.load(std::memory_order_acquire);
318
- }
319
-
320
- constexpr static rc_t inc_rc(rc_t rc) noexcept {
321
- return (rc & ic_mask) | ((rc + ic_incr) & ~ic_mask);
322
- }
323
-
324
- constexpr static rc_t inc_mask(rc_t rc) noexcept {
325
- return inc_rc(rc) & ~rc_mask;
326
- }
327
-
328
- template <typename W, typename F, typename E>
329
- bool push(W* wrapper, F&& f, E* elems) {
330
- E* el;
331
- circ::u2_t cur_ct;
332
- rc_t epoch = epoch_.load(std::memory_order_acquire);
333
- for (unsigned k = 0;;) {
334
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
335
- if (cc == 0) return false; // no reader
336
- el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed));
337
- // check all consumers have finished reading this element
338
- auto cur_rc = el->rc_.load(std::memory_order_relaxed);
339
- circ::cc_t rem_cc = cur_rc & rc_mask;
340
- if ((cc & rem_cc) && ((cur_rc & ~ep_mask) == epoch)) {
341
- return false; // has not finished yet
342
- }
343
- else if (!rem_cc) {
344
- auto cur_fl = el->f_ct_.load(std::memory_order_acquire);
345
- if ((cur_fl != cur_ct) && cur_fl) {
346
- return false; // full
347
- }
348
- }
349
- // consider rem_cc to be 0 here
350
- if (el->rc_.compare_exchange_weak(
351
- cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast<rc_t>(cc), std::memory_order_relaxed) &&
352
- epoch_.compare_exchange_weak(epoch, epoch, std::memory_order_acq_rel)) {
353
- break;
354
- }
355
- ipc::yield(k);
356
- }
357
- // only one thread/process would touch here at one time
358
- ct_.store(cur_ct + 1, std::memory_order_release);
359
- std::forward<F>(f)(&(el->data_));
360
- // set flag & try update wt
361
- el->f_ct_.store(~static_cast<flag_t>(cur_ct), std::memory_order_release);
362
- return true;
363
- }
364
-
365
- template <typename W, typename F, typename E>
366
- bool force_push(W* wrapper, F&& f, E* elems) {
367
- E* el;
368
- circ::u2_t cur_ct;
369
- rc_t epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr;
370
- for (unsigned k = 0;;) {
371
- circ::cc_t cc = wrapper->elems()->connections(std::memory_order_relaxed);
372
- if (cc == 0) return false; // no reader
373
- el = elems + circ::index_of(cur_ct = ct_.load(std::memory_order_relaxed));
374
- // check all consumers have finished reading this element
375
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
376
- circ::cc_t rem_cc = cur_rc & rc_mask;
377
- if (cc & rem_cc) {
378
- ipc::log("force_push: k = %u, cc = %u, rem_cc = %u\n", k, cc, rem_cc);
379
- cc = wrapper->elems()->disconnect_receiver(rem_cc); // disconnect all invalid readers
380
- if (cc == 0) return false; // no reader
381
- }
382
- // just compare & exchange
383
- if (el->rc_.compare_exchange_weak(
384
- cur_rc, inc_mask(epoch | (cur_rc & ep_mask)) | static_cast<rc_t>(cc), std::memory_order_relaxed)) {
385
- if (epoch == epoch_.load(std::memory_order_acquire)) {
386
- break;
387
- }
388
- else if (push(wrapper, std::forward<F>(f), elems)) {
389
- return true;
390
- }
391
- epoch = epoch_.fetch_add(ep_incr, std::memory_order_release) + ep_incr;
392
- }
393
- ipc::yield(k);
394
- }
395
- // only one thread/process would touch here at one time
396
- ct_.store(cur_ct + 1, std::memory_order_release);
397
- std::forward<F>(f)(&(el->data_));
398
- // set flag & try update wt
399
- el->f_ct_.store(~static_cast<flag_t>(cur_ct), std::memory_order_release);
400
- return true;
401
- }
402
-
403
- template <typename W, typename F, typename R, typename E, std::size_t N>
404
- bool pop(W* wrapper, circ::u2_t& cur, F&& f, R&& out, E(& elems)[N]) {
405
- auto* el = elems + circ::index_of(cur);
406
- auto cur_fl = el->f_ct_.load(std::memory_order_acquire);
407
- if (cur_fl != ~static_cast<flag_t>(cur)) {
408
- return false; // empty
409
- }
410
- ++cur;
411
- std::forward<F>(f)(&(el->data_));
412
- for (unsigned k = 0;;) {
413
- auto cur_rc = el->rc_.load(std::memory_order_acquire);
414
- if ((cur_rc & rc_mask) == 0) {
415
- std::forward<R>(out)(true);
416
- el->f_ct_.store(cur + N - 1, std::memory_order_release);
417
- return true;
418
- }
419
- auto nxt_rc = inc_rc(cur_rc) & ~static_cast<rc_t>(wrapper->connected_id());
420
- bool last_one = false;
421
- if ((last_one = (nxt_rc & rc_mask) == 0)) {
422
- el->f_ct_.store(cur + N - 1, std::memory_order_release);
423
- }
424
- if (el->rc_.compare_exchange_weak(cur_rc, nxt_rc, std::memory_order_release)) {
425
- std::forward<R>(out)(last_one);
426
- return true;
427
- }
428
- ipc::yield(k);
429
- }
430
- }
431
- };
432
-
433
- } // namespace ipc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/latex/attention/background.tex DELETED
@@ -1,58 +0,0 @@
1
- The goal of reducing sequential computation also forms the foundation of the Extended Neural GPU \citep{extendedngpu}, ByteNet \citep{NalBytenet2017} and ConvS2S \citep{JonasFaceNet2017}, all of which use convolutional neural networks as basic building block, computing hidden representations in parallel for all input and output positions. In these models, the number of operations required to relate signals from two arbitrary input or output positions grows in the distance between positions, linearly for ConvS2S and logarithmically for ByteNet. This makes it more difficult to learn dependencies between distant positions \citep{hochreiter2001gradient}. In the Transformer this is reduced to a constant number of operations, albeit at the cost of reduced effective resolution due to averaging attention-weighted positions, an effect we counteract with Multi-Head Attention as described in section~\ref{sec:attention}.
2
-
3
- Self-attention, sometimes called intra-attention is an attention mechanism relating different positions of a single sequence in order to compute a representation of the sequence. Self-attention has been used successfully in a variety of tasks including reading comprehension, abstractive summarization, textual entailment and learning task-independent sentence representations \citep{cheng2016long, decomposableAttnModel, paulus2017deep, lin2017structured}.
4
-
5
- End-to-end memory networks are based on a recurrent attention mechanism instead of sequence-aligned recurrence and have been shown to perform well on simple-language question answering and language modeling tasks \citep{sukhbaatar2015}.
6
-
7
- To the best of our knowledge, however, the Transformer is the first transduction model relying entirely on self-attention to compute representations of its input and output without using sequence-aligned RNNs or convolution.
8
- In the following sections, we will describe the Transformer, motivate self-attention and discuss its advantages over models such as \citep{neural_gpu, NalBytenet2017} and \citep{JonasFaceNet2017}.
9
-
10
-
11
- %\citep{JonasFaceNet2017} report new SOTA on machine translation for English-to-German (EnDe), Enlish-to-French (EnFr) and English-to-Romanian language pairs.
12
-
13
- %For example,! in MT, we must draw information from both input and previous output words to translate an output word accurately. An attention layer \citep{bahdanau2014neural} can connect a very large number of positions at low computation cost, making it an essential ingredient in competitive recurrent models for machine translation.
14
-
15
- %A natural question to ask then is, "Could we replace recurrence with attention?". \marginpar{Don't know if it's the most natural question to ask given the previous statements. Also, need to say that the complexity table summarizes these statements} Such a model would be blessed with the computational efficiency of attention and the power of cross-positional communication. In this work, show that pure attention models work remarkably well for MT, achieving new SOTA results on EnDe and EnFr, and can be trained in under $2$ days on xyz architecture.
16
-
17
- %After the seminal models introduced in \citep{sutskever14, bahdanau2014neural, cho2014learning}, recurrent models have become the dominant solution for both sequence modeling and sequence-to-sequence transduction. Many efforts such as \citep{wu2016google,luong2015effective,jozefowicz2016exploring} have pushed the boundaries of machine translation (MT) and language modeling with recurrent endoder-decoder and recurrent language models. Recent effort \citep{shazeer2017outrageously} has successfully combined the power of conditional computation with sequence models to train very large models for MT, pushing SOTA at lower computational cost.
18
-
19
- %Recurrent models compute a vector of hidden states $h_t$, for each time step $t$ of computation. $h_t$ is a function of both the input at time $t$ and the previous hidden state $h_t$. This dependence on the previous hidden state precludes processing all timesteps at once, instead requiring long sequences of sequential operations. In practice, this results in greatly reduced computational efficiency, as on modern computing hardware, a single operation on a large batch is much faster than a large number of operations on small batches. The problem gets worse at longer sequence lengths. Although sequential computation is not a severe bottleneck at inference time, as autoregressively generating each output requires all previous outputs, the inability to compute scores at all output positions at once hinders us from rapidly training our models over large datasets. Although impressive work such as \citep{Kuchaiev2017Factorization} is able to significantly accelerate the training of LSTMs with factorization tricks, we are still bound by the linear dependence on sequence length.
20
-
21
- %If the model could compute hidden states at each time step using only the inputs and outputs, it would be liberated from the dependence on results from previous time steps during training. This line of thought is the foundation of recent efforts such as the Markovian neural GPU \citep{neural_gpu}, ByteNet \citep{NalBytenet2017} and ConvS2S \citep{JonasFaceNet2017}, all of which use convolutional neural networks as a building block to compute hidden representations simultaneously for all timesteps, resulting in $O(1)$ sequential time complexity. \citep{JonasFaceNet2017} report new SOTA on machine translation for English-to-German (EnDe), Enlish-to-French (EnFr) and English-to-Romanian language pairs.
22
-
23
- %A crucial component for accurate sequence prediction is modeling cross-positional communication. For example, in MT, we must draw information from both input and previous output words to translate an output word accurately. An attention layer \citep{bahdanau2014neural} can connect a very large number of positions at a low computation cost, also $O(1)$ sequential time complexity, making it an essential ingredient in recurrent encoder-decoder architectures for MT. A natural question to ask then is, "Could we replace recurrence with attention?". \marginpar{Don't know if it's the most natural question to ask given the previous statements. Also, need to say that the complexity table summarizes these statements} Such a model would be blessed with the computational efficiency of attention and the power of cross-positional communication. In this work, show that pure attention models work remarkably well for MT, achieving new SOTA results on EnDe and EnFr, and can be trained in under $2$ days on xyz architecture.
24
-
25
-
26
-
27
- %Note: Facebook model is no better than RNNs in this regard, since it requires a number of layers proportional to the distance you want to communicate. Bytenet is more promising, since it requires a logarithmnic number of layers (does bytenet have SOTA results)?
28
-
29
- %Note: An attention layer can connect a very large number of positions at a low computation cost in O(1) sequential operations. This is why encoder-decoder attention has been so successful in seq-to-seq models so far. It is only natural, then, to also use attention to connect the timesteps of the same sequence.
30
-
31
- %Note: I wouldn't say that long sequences are not a problem during inference. It would be great if we could infer with no long sequences. We could just say later on that, while our training graph is constant-depth, our model still requires sequential operations in the decoder part during inference due to the autoregressive nature of the model.
32
-
33
- %\begin{table}[h!]
34
- %\caption{Attention models are quite efficient for cross-positional communications when sequence length is smaller than channel depth. $n$ represents the sequence length and $d$ represents the channel depth.}
35
- %\label{tab:op_complexities}
36
- %\begin{center}
37
- %\vspace{-5pt}
38
- %\scalebox{0.75}{
39
-
40
- %\begin{tabular}{l|c|c|c}
41
- %\hline \hline
42
- %Layer Type & Receptive & Complexity & Sequential \\
43
- % & Field & & Operations \\
44
- %\hline
45
- %Pointwise Feed-Forward & $1$ & $O(n \cdot d^2)$ & $O(1)$ \\
46
- %\hline
47
- %Recurrent & $n$ & $O(n \cdot d^2)$ & $O(n)$ \\
48
- %\hline
49
- %Convolutional & $r$ & $O(r \cdot n \cdot d^2)$ & $O(1)$ \\
50
- %\hline
51
- %Convolutional (separable) & $r$ & $O(r \cdot n \cdot d + n %\cdot d^2)$ & $O(1)$ \\
52
- %\hline
53
- %Attention & $r$ & $O(r \cdot n \cdot d)$ & $O(1)$ \\
54
- %\hline \hline
55
- %\end{tabular}
56
- %}
57
- %\end{center}
58
- %\end{table}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/test_project/latex/attention/introduction.tex DELETED
@@ -1,18 +0,0 @@
1
- Recurrent neural networks, long short-term memory \citep{hochreiter1997} and gated recurrent \citep{gruEval14} neural networks in particular, have been firmly established as state of the art approaches in sequence modeling and transduction problems such as language modeling and machine translation \citep{sutskever14, bahdanau2014neural, cho2014learning}. Numerous efforts have since continued to push the boundaries of recurrent language models and encoder-decoder architectures \citep{wu2016google,luong2015effective,jozefowicz2016exploring}.
2
-
3
- Recurrent models typically factor computation along the symbol positions of the input and output sequences. Aligning the positions to steps in computation time, they generate a sequence of hidden states $h_t$, as a function of the previous hidden state $h_{t-1}$ and the input for position $t$. This inherently sequential nature precludes parallelization within training examples, which becomes critical at longer sequence lengths, as memory constraints limit batching across examples.
4
- %\marginpar{not sure if the memory constraints are understandable here}
5
- Recent work has achieved significant improvements in computational efficiency through factorization tricks \citep{Kuchaiev2017Factorization} and conditional computation \citep{shazeer2017outrageously}, while also improving model performance in case of the latter. The fundamental constraint of sequential computation, however, remains.
6
-
7
- %\marginpar{@all: there is work on analyzing what attention really does in seq2seq models, couldn't find it right away}
8
-
9
- Attention mechanisms have become an integral part of compelling sequence modeling and transduction models in various tasks, allowing modeling of dependencies without regard to their distance in the input or output sequences \citep{bahdanau2014neural, structuredAttentionNetworks}. In all but a few cases \citep{decomposableAttnModel}, however, such attention mechanisms are used in conjunction with a recurrent network.
10
-
11
- %\marginpar{not sure if "cross-positional communication" is understandable without explanation}
12
- %\marginpar{insert exact training times and stats for the model that reaches sota earliest, maybe even a single GPU model?}
13
-
14
- In this work we propose the Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output. The Transformer allows for significantly more parallelization and can reach a new state of the art in translation quality after being trained for as little as twelve hours on eight P100 GPUs.
15
- %\marginpar{you removed the constant number of repetitions part. I wrote it because I wanted to make it clear that the model does not only perform attention once, while it's also not recurrent. I thought that might be important to get across early.}
16
-
17
- % Just a standard paragraph with citations, rewrite.
18
- %After the seminal papers of \citep{sutskever14}, \citep{bahdanau2014neural}, and \citep{cho2014learning}, recurrent models have become the dominant solution for both sequence modeling and sequence-to-sequence transduction. Many efforts such as \citep{wu2016google,luong2015effective,jozefowicz2016exploring} have pushed the boundaries of machine translation and language modeling with recurrent sequence models. Recent effort \citep{shazeer2017outrageously} has combined the power of conditional computation with sequence models to train very large models for machine translation, pushing SOTA at lower computational cost. Recurrent models compute a vector of hidden states $h_t$, for each time step $t$ of computation. $h_t$ is a function of both the input at time $t$ and the previous hidden state $h_t$. This dependence on the previous hidden state encumbers recurrnet models to process multiple inputs at once, and their time complexity is a linear function of the length of the input and output, both during training and inference. [What I want to say here is that although this is fine during decoding, at training time, we are given both input and output and this linear nature does not allow the RNN to process all inputs and outputs simultaneously and haven't been used on datasets that are the of the scale of the web. What's the largest dataset we have ? . Talk about Nividia and possibly other's effors to speed up things, and possibly other efforts that alleviate this, but are still limited by it's comptuational nature]. Rest of the intro: What if you could construct the state based on the actual inputs and outputs, then you could construct them all at once. This has been the foundation of many promising recent efforts, bytenet,facenet (Also talk about quasi rnn here). Now we talk about attention!! Along with cell architectures such as long short-term meory (LSTM) \citep{hochreiter1997}, and gated recurrent units (GRUs) \citep{cho2014learning}, attention has emerged as an essential ingredient in successful sequence models, in particular for machine translation. In recent years, many, if not all, state-of-the-art (SOTA) results in machine translation have been achieved with attention-based sequence models \citep{wu2016google,luong2015effective,jozefowicz2016exploring}. Talk about the neon work on how it played with attention to do self attention! Then talk about what we do.