diff --git a/src/README.rst b/src/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..f314573b80a1c6f897793df22bb7e7125c308700 --- /dev/null +++ b/src/README.rst @@ -0,0 +1,6 @@ +``pyhwp/`` - the main source code +--------------------------------- + +``hwp5/`` + The main source package. For now, there is not much documentation about the + source code. diff --git a/src/hwp5/COPYING b/src/hwp5/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..dba13ed2ddf783ee8118c6a581dbf75305f816a3 --- /dev/null +++ b/src/hwp5/COPYING @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/src/hwp5/README b/src/hwp5/README new file mode 100644 index 0000000000000000000000000000000000000000..c7c492effbb000726fb13b9e6a745dcf534dc632 --- /dev/null +++ b/src/hwp5/README @@ -0,0 +1,73 @@ +pyhwp +===== + +HWP Document Format v5 parser & processor. + +Features +-------- + +- Analyze and extract internal streams out from a HWP Document Format v5 file +- (*Experimental*) Conversion to OpenDocument format (.odt) or plain text (.txt) + +Installation +------------ + +from `pypi `_:: + + virtualenv pyhwp + pyhwp/bin/pip install --pre pyhwp # Install pyhwp into a virtualenv directory + +Or:: + + pip install --user --pre pyhwp # Install pyhwp into user's home directory + +Requirements +------------ + +- Python 2.7, 3.5, 3.6, 3.7 or 3.8 +- `cryptography `_ +- `lxml `_ +- `olefile `_ + +Documentation & Development +--------------------------- + +- Documentation: `https://pyhwp.readthedocs.io `_ [`한국/조선어 `_] +- Distribution: `https://pypi.org/project/pyhwp/ `_ +- Development: `https://github.com/mete0r/pyhwp `_ +- Issue tracker: `https://github.com/mete0r/pyhwp/issues `_ +- Feedbacks & contributions are welcome! + +Contributors +------------ + +Maintainer: `mete0r `_ + +License +------- + +Copyright (C) 2010-2023 mete0r + +.. image:: http://www.gnu.org/graphics/agplv3-155x51.png + +`GNU Affero General Public License v3.0 `_ +`(text version) `_ + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . + +Disclosure +---------- + +This program has been developed in accordance with a public document named "HWP +Binary Specification 1.1" published by `Hancom Inc. `_ diff --git a/src/hwp5/VERSION.txt b/src/hwp5/VERSION.txt new file mode 100644 index 0000000000000000000000000000000000000000..6d7de6e6abef13b18021a3591debc53ac00616d4 --- /dev/null +++ b/src/hwp5/VERSION.txt @@ -0,0 +1 @@ +1.0.2 diff --git a/src/hwp5/__init__.py b/src/hwp5/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1db871867b199fc72fe539dd082cde38d9195922 --- /dev/null +++ b/src/hwp5/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +from __future__ import with_statement +import os.path + +try: + with open(os.path.join(os.path.dirname(__file__), 'VERSION.txt')) as f: + __version__ = f.read().strip() + del f +except Exception: + __version__ = '0.0-unknown' diff --git a/src/hwp5/binmodel/__init__.py b/src/hwp5/binmodel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a48116d99c7c3314bd493c89e07726abb814c387 --- /dev/null +++ b/src/hwp5/binmodel/__init__.py @@ -0,0 +1,549 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from io import BytesIO +from itertools import takewhile +import json +import logging +import inspect + +from .. import recordstream +from ..bintype import ERROREVENT +from ..bintype import resolve_type_events +from ..bintype import resolve_values_from_stream +from ..dataio import ParseError +from ..dataio import dumpbytes +from ..recordstream import nth +from ..tagids import tagnames +from ..treeop import STARTEVENT +from ..treeop import ENDEVENT +from ..treeop import prefix_ancestors_from_level +from ..utils import JsonObjects + +from ._shared import tag_models +from ._shared import RecordModel +from ._shared import BinStorageId +from ._shared import COLORREF +from ._shared import Margin +from .controlchar import CHID +from .controlchar import ControlChar +from .tagid16_document_properties import DocumentProperties +from .tagid17_id_mappings import IdMappings +from .tagid18_bin_data import BinData +from .tagid20_border_fill import BorderFill +from .tagid19_face_name import FaceName +from .tagid21_char_shape import CharShape +from .tagid21_char_shape import LanguageStruct +from .tagid22_tab_def import TabDef +from .tagid23_numbering import Numbering +from .tagid24_bullet import Bullet +from .tagid25_para_shape import ParaShape +from .tagid26_style import Style +from .tagid27_doc_data import DocData +from .tagid28_distribute_doc_data import DistributeDocData +from .tagid30_compatible_document import CompatibleDocument +from .tagid31_layout_compatibility import LayoutCompatibility +from .tagid32_unknown import TagModel32 +from .tagid50_para_header import Paragraph +from .tagid51_para_text import ParaText +from .tagid51_para_text import ParaTextChunks +from .tagid52_para_char_shape import ParaCharShape +from .tagid53_para_line_seg import ParaLineSeg +from .tagid53_para_line_seg import ParaLineSegList +from .tagid53_para_line_seg import LineSeg +from .tagid54_para_range_tag import ParaRangeTag +from .tagid55_ctrl_header import Control +from .controls.bookmark_control import BookmarkControl +from .controls.columns_def import ColumnsDef +from .controls.common_controls import CommonControl +from .controls.dutmal import Dutmal +from .controls.field import Field +from .controls.field import FieldUnknown +from .controls.field import FieldDate +from .controls.field import FieldDocDate +from .controls.field import FieldPath +from .controls.field import FieldBookmark +from .controls.field import FieldMailMerge +from .controls.field import FieldCrossRef +from .controls.field import FieldFormula +from .controls.field import FieldClickHere +from .controls.field import FieldClickHereData +from .controls.field import FieldSummary +from .controls.field import FieldUserInfo +from .controls.field import FieldHyperLink +from .controls.field import FieldMemo +from .controls.field import FieldPrivateInfoSecurity +from .controls.gshape_object_control import GShapeObjectControl +from .controls.header_footer import HeaderFooter +from .controls.header_footer import Header +from .controls.header_footer import Footer +from .controls.hidden_comment import HiddenComment +from .controls.index_marker import IndexMarker +from .controls.note import Note +from .controls.note import FootNote +from .controls.note import EndNote +from .controls.numbering import AutoNumbering +from .controls.numbering import NewNumbering +from .controls.page_hide import PageHide +from .controls.page_number_position import PageNumberPosition +from .controls.page_odd_even import PageOddEven +from .controls.section_def import SectionDef +from .controls.table_control import TableControl +from .controls.tcps_control import TCPSControl +from .tagid56_list_header import ListHeader +from .tagid56_list_header import TableCaption +from .tagid56_list_header import TableCell +from .tagid56_list_header import TextboxParagraphList +from .tagid56_list_header import HeaderParagraphList +from .tagid56_list_header import FooterParagraphList +from .tagid57_page_def import PageDef +from .tagid58_footnote_shape import FootnoteShape +from .tagid59_page_border_fill import PageBorderFill +from .tagid60_shape_component import ShapeComponent +from .tagid61_table import TableBody +from .tagid62_shape_component_line import ShapeLine +from .tagid63_shape_component_rectangle import ShapeRectangle +from .tagid64_shape_component_ellipse import ShapeEllipse +from .tagid65_shape_component_arc import ShapeArc +from .tagid66_shape_component_polygon import ShapePolygon +from .tagid67_shape_component_curve import ShapeCurve +from .tagid68_shape_component_ole import ShapeOLE +from .tagid69_shape_component_picture import ShapePicture +from .tagid70_shape_component_container import ShapeContainer +from .tagid71_ctrl_data import ControlData +from .tagid72_ctrl_eqedit import EqEdit +from .tagid74_shape_component_textart import ShapeTextArt +from .tagid75_form_object import FormObject +from .tagid76_memo_shape import MemoShape +from .tagid77_memo_list import MemoList +from .tagid78_forbidden_char import ForbiddenChar +from .tagid79_chart_data import ChartData +from .tagid99_shape_component_unknown import ShapeUnknown + +# to suppress pyflake8 warning 'imported but not used' +RecordModel +BinStorageId +COLORREF +Margin +DocumentProperties +BinData +BorderFill +CharShape +LanguageStruct +TabDef +Numbering +Bullet +ParaShape +Style +DocData +DistributeDocData +CompatibleDocument +LayoutCompatibility +TagModel32 +Paragraph +ParaText +ParaTextChunks +ParaCharShape +ParaLineSeg +ParaLineSegList +LineSeg +ParaRangeTag +Control +ListHeader +TableCaption +TableCell +TextboxParagraphList +PageDef +FootnoteShape +PageBorderFill +ShapeComponent +TableBody +ShapeLine +ShapeRectangle +ShapeEllipse +ShapeArc +ShapePolygon +ShapeCurve +ShapeOLE +ShapePicture +ShapeContainer +ControlData +EqEdit +ShapeTextArt +FormObject +MemoShape +MemoList +ForbiddenChar +ChartData +ShapeUnknown +CHID +ControlChar +BookmarkControl +ColumnsDef +CommonControl +Dutmal +Field +FieldUnknown +FieldDate +FieldDocDate +FieldPath +FieldBookmark +FieldMailMerge +FieldCrossRef +FieldFormula +FieldClickHere +FieldClickHereData +FieldSummary +FieldUserInfo +FieldHyperLink +FieldMemo +FieldPrivateInfoSecurity +GShapeObjectControl +HeaderFooter +Header +HeaderParagraphList +Footer +FooterParagraphList +HiddenComment +IndexMarker +Note +FootNote +EndNote +AutoNumbering +NewNumbering +PageHide +PageNumberPosition +PageOddEven +SectionDef +TableControl +TCPSControl + + +logger = logging.getLogger(__name__) + + +class UnknownTagModel(RecordModel): + pass + + +class Text(object): + pass + + +def _check_tag_models(): + for tagid, name in tagnames.items(): + assert tagid in tag_models, 'RecordModel for %s is missing!' % name + + +_check_tag_models() + + +def init_record_parsing_context(base, record): + ''' Initialize a context to parse the given record + + the initializations includes followings: + - context = dict(base) + - context['record'] = record + - context['stream'] = record payload stream + + :param base: the base context to be shallow-copied into the new one + :param record: to be parsed + :returns: new context + ''' + + return dict(base, record=record, stream=BytesIO(record['payload'])) + + +def parse_models(context, records): + for context, model in parse_models_intern(context, records): + yield model + + +def parse_models_intern(context, records): + context_models = ((init_record_parsing_context(context, record), record) + for record in records) + context_models = parse_models_with_parent(context_models) + for context, model in context_models: + stream = context['stream'] + unparsed = stream.read() + if unparsed: + model['unparsed'] = unparsed + yield context, model + + +def parse_models_with_parent(context_models): + level_prefixed = ((model['level'], (context, model)) + for context, model in context_models) + root_item = (dict(), dict()) + ancestors_prefixed = prefix_ancestors_from_level(level_prefixed, root_item) + for ancestors, (context, model) in ancestors_prefixed: + context['parent'] = ancestors[-1] + parse_model(context, model) + yield context, model + + +def parse_model(context, model): + ''' HWPTAG로 모델 결정 후 기본 파싱 ''' + + stream = context['stream'] + context['resolve_values'] = resolve_values_from_stream(stream) + events = resolve_model_events(context, model) + events = raise_on_errorevent(context, events) + model['binevents'] = list(events) + + logger.debug('model: %s', model['type'].__name__) + logger.debug('%s', model['content']) + + +def raise_on_errorevent(context, events): + binevents = list() + for ev, item in events: + yield ev, item + binevents.append((ev, item)) + if ev is ERROREVENT: + e = item['exception'] + msg = 'can\'t parse %s' % item['type'] + pe = ParseError(msg) + pe.cause = e + pe.path = context.get('path') + pe.treegroup = context.get('treegroup') + pe.record = context.get('record') + pe.offset = item.get('bin_offset') + pe.binevents = binevents + raise pe + + +def resolve_models(context, records): + model_contexts = (dict(context, record=record, model=dict(record)) + for record in records) + + level_prefixed = ((context['model']['level'], context) + for context in model_contexts) + root_item = {} + ancestors_prefixed = prefix_ancestors_from_level(level_prefixed, root_item) + for ancestors, context in ancestors_prefixed: + parent = ancestors[-1] + context['parent'] = parent, parent.get('model', {}) + + record_frame = context['record'] + context['type'] = RecordModel + context['name'] = record_frame['tagname'] + yield STARTEVENT, context + for x in resolve_model_events(context, context['model']): + yield x + event, item = x + context['value'] = item + yield ENDEVENT, context + + +def resolve_model_events(context, model): + + resolve_values = context['resolve_values'] + + model['type'] = model_type = tag_models.get(model['tagid'], + UnknownTagModel) + + for ev, item in resolve_type_events(model_type, context, resolve_values): + yield ev, item + + model['content'] = item['value'] + + extension_types = getattr(model['type'], 'extension_types', None) + if extension_types: + key = model['type'].get_extension_key(context, model) + extension = extension_types.get(key) + if extension is not None: + # 예: Control -> TableControl로 바뀌는 경우, + # Control의 member들은 이미 읽은 상태이고 + # CommonControl, TableControl에서 각각 정의한 + # 멤버들을 읽어들여야 함 + for cls in get_extension_mro(extension, model['type']): + extension_type_events = resolve_type_events(cls, context, + resolve_values) + for ev, item in extension_type_events: + yield ev, item + content = item['value'] + model['content'].update(content) + model['type'] = extension + + if 'parent' in context: + parent = context['parent'] + parent_context, parent_model = parent + parent_type = parent_model.get('type') + parent_content = parent_model.get('content') + + on_child = getattr(parent_type, 'on_child', None) + if on_child: + on_child(parent_content, parent_context, (context, model)) + + +def get_extension_mro(cls, up_to_cls=None): + mro = inspect.getmro(cls) + mro = takewhile(lambda cls: cls is not up_to_cls, mro) + mro = list(cls for cls in mro if 'attributes' in cls.__dict__) + mro = reversed(mro) + return mro + + +class ModelJsonEncoder(json.JSONEncoder): + + def default(self, obj): + if isinstance(obj, bytes): + return obj.decode('latin1') + return json.JSONEncoder.default(self, obj) + + +def model_to_json(model, *args, **kwargs): + ''' convert a model to json ''' + kwargs['cls'] = ModelJsonEncoder + model = dict(model) + model['type'] = model['type'].__name__ + record = model + record['payload'] = list(dumpbytes(record['payload'])) + if 'unparsed' in model: + model['unparsed'] = list(dumpbytes(model['unparsed'])) + if 'binevents' in model: + del model['binevents'] + return json.dumps(model, *args, **kwargs) + + +def chain_iterables(iterables): + for iterable in iterables: + for item in iterable: + yield item + + +class ModelStream(recordstream.RecordStream): + + def models(self, **kwargs): + # prepare binmodel parsing context + kwargs.setdefault('version', self.version) + try: + kwargs.setdefault('path', self.path) + except AttributeError: + pass + treegroup = kwargs.get('treegroup', None) + if treegroup is not None: + records = self.records_treegroup(treegroup) # TODO: kwargs + models = parse_models(kwargs, records) + else: + groups = self.models_treegrouped(**kwargs) + models = chain_iterables(groups) + return models + + def models_treegrouped(self, **kwargs): + ''' iterable of iterable of the models, grouped by the top-level tree + ''' + kwargs.setdefault('version', self.version) + for group_idx, records in enumerate(self.records_treegrouped()): + kwargs['treegroup'] = group_idx + yield parse_models(kwargs, records) + + def model(self, idx): + return nth(self.models(), idx) + + def models_json(self, **kwargs): + models = self.models(**kwargs) + return JsonObjects(models, model_to_json) + + def other_formats(self): + d = super(ModelStream, self).other_formats() + d['.models'] = self.models_json().open + return d + + def parse_model_events(self): + context = dict(version=self.version) + + def resolve_values_from_record(record): + stream = BytesIO(record['payload']) + return resolve_values_from_stream(stream) + + for group_idx, records in enumerate(self.records_treegrouped()): + context['treegroup'] = group_idx + for x in resolve_models(context, records): + event, item = x + if item['type'] is RecordModel: + if event is STARTEVENT: + record_frame = item['record'] + stream = BytesIO(record_frame['payload']) + resolve_values = resolve_values_from_stream(stream) + item['stream'] = stream + item['resolve_values'] = resolve_values + elif event is ENDEVENT: + stream = item['stream'] + item['leftover'] = { + 'offset': stream.tell(), + 'bytes': stream.read() + } + yield x + + +class DocInfo(ModelStream): + + @property + def idmappings(self): + for model in self.models(): + if model['type'] is IdMappings: + return model + + @property + def facenames_by_lang(self): + facenames = list(m for m in self.models() + if m['type'] is FaceName) + languages = 'ko', 'en', 'cn', 'jp', 'other', 'symbol', 'user' + facenames_by_lang = dict() + offset = 0 + for lang in languages: + n_fonts = self.idmappings['content'][lang + '_fonts'] + facenames_by_lang[lang] = facenames[offset:offset + n_fonts] + offset += n_fonts + return facenames_by_lang + + @property + def charshapes(self): + return (m for m in self.models() + if m['type'] is CharShape) + + def get_charshape(self, charshape_id): + return nth(self.charshapes, charshape_id) + + def charshape_lang_facename(self, charshape_id, lang): + charshape = self.get_charshape(charshape_id) + lang_facename_offset = charshape['content']['font_face'][lang] + return self.facenames_by_lang[lang][lang_facename_offset] + + +class Sections(recordstream.Sections): + + section_class = ModelStream + + +class Hwp5File(recordstream.Hwp5File): + + docinfo_class = DocInfo + bodytext_class = Sections + + +def create_context(file=None, **context): + if file is not None: + context['version'] = file.fileheader.version + assert 'version' in context + return context diff --git a/src/hwp5/binmodel/_shared.py b/src/hwp5/binmodel/_shared.py new file mode 100644 index 0000000000000000000000000000000000000000..50de6888c116c4a33b38f84fd95433f2d847141a --- /dev/null +++ b/src/hwp5/binmodel/_shared.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + + + +from hwp5.dataio import StructType +from hwp5.dataio import Struct +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import PrimitiveType +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import UINT8 +from hwp5.dataio import INT32 +from hwp5.dataio import INT8 +from hwp5.dataio import BYTE +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import N_ARRAY + + +logger = logging.getLogger(__name__) + + +tag_models = dict() + + +class RecordModelType(StructType): + + def __new__(mcs, name, bases, attrs): + cls = StructType.__new__(mcs, name, bases, attrs) + if 'tagid' in attrs: + tagid = attrs['tagid'] + assert tagid not in tag_models + tag_models[tagid] = cls + return cls + + +class RecordModel(object, metaclass=RecordModelType): + pass + + +class BinStorageId(UINT16): + pass + + +class COLORREF(int, metaclass=PrimitiveType): + binfmt = INT32.binfmt + never_instantiate = False + + def __getattr__(self, name): + if name == 'r': + return self & 0xff + elif name == 'g': + return (self & 0xff00) >> 8 + elif name == 'b': + return (self & 0xff0000) >> 16 + elif name == 'a': + return int((self & 0xff000000) >> 24) + elif name == 'rgb': + return self.r, self.g, self.b + + def __str__(self): + return '#%02x%02x%02x' % (self.r, self.g, self.b) + + def __repr__(self): + class_name = self.__class__.__name__ + value = '(0x%02x, 0x%02x, 0x%02x)' % self.rgb + return class_name + value + + +class Margin(Struct): + def attributes(): + yield HWPUNIT16, 'left' + yield HWPUNIT16, 'right' + yield HWPUNIT16, 'top' + yield HWPUNIT16, 'bottom' + attributes = staticmethod(attributes) + + +class Coord(Struct): + def attributes(): + yield SHWPUNIT, 'x' + yield SHWPUNIT, 'y' + attributes = staticmethod(attributes) + + +class Border(Struct): + + # 표 20 테두리선 종류 + StrokeEnum = Enum('none', 'solid', + 'dashed', 'dotted', 'dash-dot', 'dash-dot-dot', + 'long-dash', 'large-dot', + 'double', 'double-2', 'double-3', 'triple', + 'wave', 'double-wave', + 'inset', 'outset', 'groove', 'ridge') + StrokeType = Flags(UINT8, + 0, 4, StrokeEnum, 'stroke_type') + + # 표 21 테두리선 굵기 + widths = {'0.1mm': 0, + '0.12mm': 1, + '0.15mm': 2, + '0.2mm': 3, + '0.25mm': 4, + '0.3mm': 5, + '0.4mm': 6, + '0.5mm': 7, + '0.6mm': 8, + '0.7mm': 9, + '1.0mm': 10, + '1.5mm': 11, + '2.0mm': 12, + '3.0mm': 13, + '4.0mm': 14, + '5.0mm': 15} + WidthEnum = Enum(**widths) + Width = Flags(UINT8, + 0, 4, WidthEnum, 'width') + + def attributes(cls): + yield cls.StrokeType, 'stroke_flags', + yield cls.Width, 'width_flags', + yield COLORREF, 'color', + attributes = classmethod(attributes) + + +class BorderLine(Struct): + ''' 표 81 테두리 선 정보 ''' + + LineEnd = Enum('round', 'flat') + ArrowShape = Enum('none', 'arrow', 'arrow2', 'diamond', 'circle', 'rect', + 'diamondfilled', 'disc', 'rectfilled') + ArrowSize = Enum('smallest', 'smaller', 'small', 'abitsmall', 'normal', + 'abitlarge', 'large', 'larger', 'largest') + + ''' 표 82 테두리 선 정보 속성 ''' + Flags = Flags(UINT32, + 0, 5, Border.StrokeEnum, 'stroke', + 6, 9, LineEnd, 'line_end', + 10, 15, ArrowShape, 'arrow_start', + 16, 21, ArrowShape, 'arrow_end', + 22, 25, ArrowSize, 'arrow_start_size', + 26, 29, ArrowSize, 'arrow_end_size', + 30, 'arrow_start_fill', + 31, 'arrow_end_fill') + + def attributes(cls): + yield COLORREF, 'color' + yield INT32, 'width' + yield cls.Flags, 'flags' + attributes = classmethod(attributes) + + +class Fill(Struct): + pass + + +class FillNone(Fill): + def attributes(): + yield UINT32, 'size', # SPEC is confusing + attributes = staticmethod(attributes) + + +class FillColorPattern(Fill): + ''' 표 23 채우기 정보 ''' + PatternTypeEnum = Enum(NONE=255, HORIZONTAL=0, VERTICAL=1, BACKSLASH=2, + SLASH=3, GRID=4, CROSS=5) + PatternTypeFlags = Flags(UINT32, + 0, 7, PatternTypeEnum, 'pattern_type') + + def attributes(cls): + yield COLORREF, 'background_color', + yield COLORREF, 'pattern_color', + yield cls.PatternTypeFlags, 'pattern_type_flags', + attributes = classmethod(attributes) + + +class FillImage(Fill): + FillImageEnum = Enum(TILE_ALL=0, + TILE_HORIZONTAL_TOP=1, + TILE_HORIZONTAL_BOTTOM=2, + TILE_VERTICAL_LEFT=3, + TILE_VERTICAL_RIGHT=4, + RESIZE=5, + CENTER=6, + CENTER_TOP=7, + CENTER_BOTTOM=8, + LEFT_MIDDLE=9, + LEFT_TOP=10, + LEFT_BOTTOM=11, + RIGHT_MIDDLE=12, + RIGHT_TOP=13, + RIGHT_BOTTOM=14, + NONE=15) + FillImageFlags = Flags(BYTE, + 0, 16, FillImageEnum, 'fillimage_type') + + EffectEnum = Enum(REAL_PIC=0, + GRAY_SCALE=1, + BLACK_WHITE=2, + PATTERN8x8=3) + EffectFlags = Flags(UINT8, + 0, 8, EffectEnum, 'effect_type') + + def attributes(cls): + yield cls.FillImageFlags, 'flags' + yield INT8, 'brightness' + yield INT8, 'contrast' + yield cls.EffectFlags, 'effect' + yield UINT16, 'bindata_id' + attributes = classmethod(attributes) + + +class Coord32(Struct): + def attributes(): + yield UINT32, 'x' + yield UINT32, 'y' + attributes = staticmethod(attributes) + + +GradationTypeEnum = Enum(LINEAR=1, CIRCULAR=2, CONICAL=3, RECTANGULAR=4) +GradationTypeFlags = Flags(BYTE, + 0, 8, GradationTypeEnum, 'gradation_type') + + +class FillGradation(Fill): + def attributes(): + yield GradationTypeFlags, 'type', + yield UINT32, 'shear', # 기울임 각 (도) + yield Coord32, 'center', + yield UINT32, 'blur', # 번짐 정도: 0-100 + # TODO: 스펙 1.2에 따르면 색상 수 > 2인 경우 + # 색상이 바뀌는 위치 배열이 온다고 함 + yield N_ARRAY(UINT32, COLORREF), 'colors', + attributes = staticmethod(attributes) + + +def ref_parent_member(member_name): + def f(context, values): + context, model = context['parent'] + return model['content'][member_name] + f.__doc__ = 'PARENTREC.' + member_name + return f diff --git a/src/hwp5/binmodel/controlchar.py b/src/hwp5/binmodel/controlchar.py new file mode 100644 index 0000000000000000000000000000000000000000..a258f018af3e6bba6288aacc5a5c9b8d8653b5b4 --- /dev/null +++ b/src/hwp5/binmodel/controlchar.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import re +import sys + + + +from hwp5.dataio import PrimitiveType +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import UINT8 + + +PY3 = sys.version_info.major == 3 +if PY3: + unichr = chr + + +class CHID(str, metaclass=PrimitiveType): + + fixed_size = 4 + + # Common controls + GSO = 'gso ' + TBL = 'tbl ' + LINE = '$lin' + RECT = '$rec' + ELLI = '$ell' + ARC = '$arc' + POLY = '$pol' + CURV = '$cur' + EQED = 'eqed' + PICT = '$pic' + OLE = '$ole' + CONTAINER = '$con' + + # Controls + SECD = 'secd' + COLD = 'cold' + HEADER = 'head' + FOOTER = 'foot' + FN = 'fn ' + EN = 'en ' + ATNO = 'atno' + NWNO = 'nwno' + PGHD = 'pghd' + PGCT = 'pgct' + PGNP = 'pgnp' + IDXM = 'idxm' + BOKM = 'bokm' + TCPS = 'tcps' + TDUT = 'tdut' + TCMT = 'tcmt' + + # Field starts + FIELD_UNK = '%unk' + FIELD_DTE = '%dte' + FIELD_DDT = '%ddt' + FIELD_PAT = '%pat' + FIELD_BMK = '%bmk' + FIELD_MMG = '%mmg' + FIELD_XRF = '%xrf' + FIELD_FMU = '%fmu' + FIELD_CLK = '%clk' + FIELD_SMR = '%smr' + FIELD_USR = '%usr' + FIELD_HLK = '%hlk' + FIELD_REVISION_SIGN = '%sig' + FIELD_REVISION_DELETE = '%%*d' + FIELD_REVISION_ATTACH = '%%*a' + FIELD_REVISION_CLIPPING = '%%*C' + FIELD_REVISION_SAWTOOTH = '%%*S' + FIELD_REVISION_THINKING = '%%*T' + FIELD_REVISION_PRAISE = '%%*P' + FIELD_REVISION_LINE = '%%*L' + FIELD_REVISION_SIMPLECHANGE = '%%*c' + FIELD_REVISION_HYPERLINK = '%%*h' + FIELD_REVISION_LINEATTACH = '%%*A' + FIELD_REVISION_LINELINK = '%%*i' + FIELD_REVISION_LINETRANSFER = '%%*t' + FIELD_REVISION_RIGHTMOVE = '%%*r' + FIELD_REVISION_LEFTMOVE = '%%*l' + FIELD_REVISION_TRANSFER = '%%*n' + FIELD_REVISION_SIMPLEINSERT = '%%*e' + FIELD_REVISION_SPLIT = '%spl' + FIELD_REVISION_CHANGE = '%%mr' + FIELD_MEMO = '%%me' + FIELD_PRIVATE_INFO_SECURITY = '%cpr' + + def decode(bytes, context=None): + if PY3: + return ( + chr(bytes[3]) + + chr(bytes[2]) + + chr(bytes[1]) + + chr(bytes[0]) + ) + else: + return bytes[3] + bytes[2] + bytes[1] + bytes[0] + decode = staticmethod(decode) + + +class ControlChar(object): + class CHAR(object): + size = 1 + + class INLINE(object): + size = 8 + + class EXTENDED(object): + size = 8 + chars = {0x00: ('NULL', CHAR), + 0x01: ('CTLCHR01', EXTENDED), + 0x02: ('SECTION_COLUMN_DEF', EXTENDED), + 0x03: ('FIELD_START', EXTENDED), + 0x04: ('FIELD_END', INLINE), + 0x05: ('CTLCHR05', INLINE), + 0x06: ('CTLCHR06', INLINE), + 0x07: ('CTLCHR07', INLINE), + 0x08: ('TITLE_MARK', INLINE), + 0x09: ('TAB', INLINE), + 0x0a: ('LINE_BREAK', CHAR), + 0x0b: ('DRAWING_TABLE_OBJECT', EXTENDED), + 0x0c: ('CTLCHR0C', EXTENDED), + 0x0d: ('PARAGRAPH_BREAK', CHAR), + 0x0e: ('CTLCHR0E', EXTENDED), + 0x0f: ('HIDDEN_EXPLANATION', EXTENDED), + 0x10: ('HEADER_FOOTER', EXTENDED), + 0x11: ('FOOT_END_NOTE', EXTENDED), + 0x12: ('AUTO_NUMBER', EXTENDED), + 0x13: ('CTLCHR13', INLINE), + 0x14: ('CTLCHR14', INLINE), + 0x15: ('PAGE_CTLCHR', EXTENDED), + 0x16: ('BOOKMARK', EXTENDED), + 0x17: ('CTLCHR17', EXTENDED), + 0x18: ('HYPHEN', CHAR), + 0x1e: ('NONBREAK_SPACE', CHAR), + 0x1f: ('FIXWIDTH_SPACE', CHAR)} + names = dict((unichr(code), name) for code, (name, kind) in chars.items()) + kinds = dict((unichr(code), kind) for code, (name, kind) in chars.items()) + + def _populate(cls): + for ch, name in cls.names.items(): + setattr(cls, name, ch) + _populate = classmethod(_populate) + REGEX_CONTROL_CHAR = re.compile(b'[\x00-\x1f]\x00') + + def find(cls, data, start_idx): + while True: + m = cls.REGEX_CONTROL_CHAR.search(data, start_idx) + if m is not None: + i = m.start() + if i & 1 == 1: + start_idx = i + 1 + continue + if PY3: + char = unichr(data[i]) + else: + char = unichr(ord(data[i])) + size = cls.kinds[char].size + return i, i + (size * 2) + data_len = len(data) + return data_len, data_len + find = classmethod(find) + + def decode(cls, bytes): + code = UINT16.decode(bytes[0:2]) + ch = unichr(code) + if cls.kinds[ch].size == 8: + bytes = bytes[2:2 + 12] + if ch == ControlChar.TAB: + param = dict(width=UINT32.decode(bytes[0:4]), + unknown0=UINT8.decode(bytes[4:5]), + unknown1=UINT8.decode(bytes[5:6]), + unknown2=bytes[6:]) + return dict(code=code, param=param) + else: + chid = CHID.decode(bytes[0:4]) + param = bytes[4:12] + return dict(code=code, chid=chid, param=param) + else: + return dict(code=code) + decode = classmethod(decode) + + def get_kind_by_code(cls, code): + ch = unichr(code) + return cls.kinds[ch] + get_kind_by_code = classmethod(get_kind_by_code) + + def get_name_by_code(cls, code): + ch = unichr(code) + return cls.names.get(ch, 'CTLCHR%02x' % code) + get_name_by_code = classmethod(get_name_by_code) + + +ControlChar._populate() diff --git a/src/hwp5/binmodel/controls/__init__.py b/src/hwp5/binmodel/controls/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..06401badf7a891717c54fcc0cc23c134bac5af62 --- /dev/null +++ b/src/hwp5/binmodel/controls/__init__.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controls.bookmark_control import BookmarkControl +from hwp5.binmodel.controls.columns_def import ColumnsDef +from hwp5.binmodel.controls.common_controls import CommonControl +from hwp5.binmodel.controls.dutmal import Dutmal +from hwp5.binmodel.controls.field import Field +from hwp5.binmodel.controls.field import FieldUnknown +from hwp5.binmodel.controls.field import FieldDate +from hwp5.binmodel.controls.field import FieldDocDate +from hwp5.binmodel.controls.field import FieldPath +from hwp5.binmodel.controls.field import FieldBookmark +from hwp5.binmodel.controls.field import FieldMailMerge +from hwp5.binmodel.controls.field import FieldCrossRef +from hwp5.binmodel.controls.field import FieldFormula +from hwp5.binmodel.controls.field import FieldClickHere +from hwp5.binmodel.controls.field import FieldClickHereData +from hwp5.binmodel.controls.field import FieldSummary +from hwp5.binmodel.controls.field import FieldUserInfo +from hwp5.binmodel.controls.field import FieldHyperLink +from hwp5.binmodel.controls.field import FieldMemo +from hwp5.binmodel.controls.field import FieldPrivateInfoSecurity +from hwp5.binmodel.controls.gshape_object_control import GShapeObjectControl +from hwp5.binmodel.controls.header_footer import HeaderFooter +from hwp5.binmodel.controls.hidden_comment import HiddenComment +from hwp5.binmodel.controls.index_marker import IndexMarker +from hwp5.binmodel.controls.note import Note +from hwp5.binmodel.controls.note import FootNote +from hwp5.binmodel.controls.note import EndNote +from hwp5.binmodel.controls.numbering import AutoNumbering +from hwp5.binmodel.controls.numbering import NewNumbering +from hwp5.binmodel.controls.page_hide import PageHide +from hwp5.binmodel.controls.page_number_position import PageNumberPosition +from hwp5.binmodel.controls.page_odd_even import PageOddEven +from hwp5.binmodel.controls.section_def import SectionDef +from hwp5.binmodel.controls.table_control import TableControl +from hwp5.binmodel.controls.tcps_control import TCPSControl + +# suppress pyflake8 warning 'imported but not used' +BookmarkControl +ColumnsDef +CommonControl +Dutmal +Field +FieldUnknown +FieldDate +FieldDocDate +FieldPath +FieldBookmark +FieldMailMerge +FieldCrossRef +FieldFormula +FieldClickHere +FieldClickHereData +FieldSummary +FieldUserInfo +FieldHyperLink +FieldMemo +FieldPrivateInfoSecurity +GShapeObjectControl +HeaderFooter +HiddenComment +IndexMarker +Note +FootNote +EndNote +AutoNumbering +NewNumbering +PageHide +PageNumberPosition +PageOddEven +SectionDef +TableControl +TCPSControl diff --git a/src/hwp5/binmodel/controls/_shared.py b/src/hwp5/binmodel/controls/_shared.py new file mode 100644 index 0000000000000000000000000000000000000000..ea509fe419b867bd9ba6f470e297c7a4f737f936 --- /dev/null +++ b/src/hwp5/binmodel/controls/_shared.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + + +from hwp5.binmodel._shared import RecordModelType +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_CTRL_HEADER +from hwp5.binmodel.controlchar import CHID + + +control_models = dict() + + +class ControlType(RecordModelType): + + def __new__(mcs, name, bases, attrs): + cls = RecordModelType.__new__(mcs, name, bases, attrs) + if 'chid' in attrs: + chid = attrs['chid'] + assert chid not in control_models + control_models[chid] = cls + return cls + + +class Control(RecordModel, metaclass=ControlType): + ''' 4.2.6. 컨트롤 헤더 ''' + + tagid = HWPTAG_CTRL_HEADER + + def attributes(): + yield CHID, 'chid' + attributes = staticmethod(attributes) + + extension_types = control_models + + def get_extension_key(cls, context, model): + ''' chid ''' + return model['content']['chid'] + get_extension_key = classmethod(get_extension_key) diff --git a/src/hwp5/binmodel/controls/bookmark_control.py b/src/hwp5/binmodel/controls/bookmark_control.py new file mode 100644 index 0000000000000000000000000000000000000000..87b981d4c7a5ae1fb43ba700b48209dd0ad3288d --- /dev/null +++ b/src/hwp5/binmodel/controls/bookmark_control.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import BSTR +from hwp5.binmodel.tagid71_ctrl_data import ControlData +from hwp5.binmodel.controls._shared import Control + + +class BookmarkControl(Control): + ''' 4.2.10.11. 책갈피 ''' + chid = CHID.BOKM + + def attributes(): + if False: + yield + attributes = staticmethod(attributes) + + +class BookmarkControlData(ControlData): + + parent_model_type = BookmarkControl + + def attributes(): + yield UINT32, 'unknown1' + yield UINT32, 'unknown2' + yield UINT16, 'unknown3' + yield BSTR, 'name' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/controls/columns_def.py b/src/hwp5/binmodel/controls/columns_def.py new file mode 100644 index 0000000000000000000000000000000000000000..21c760f250e8eff74cf27e9d6610dbd754c1c9d4 --- /dev/null +++ b/src/hwp5/binmodel/controls/columns_def.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import Struct +from hwp5.dataio import UINT16 +from hwp5.dataio import WORD +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import X_ARRAY +from hwp5.dataio import ref_member_flag +from hwp5.binmodel.controls._shared import Control +from hwp5.binmodel._shared import Border + + +class Column0(Struct): + + @staticmethod + def attributes(): + yield WORD, 'width' + + +class Column(Struct): + @staticmethod + def attributes(): + yield WORD, 'spacing' + yield WORD, 'width' + + +class ColumnsDef(Control): + ''' 4.2.10.2. 단 정의 ''' + chid = CHID.COLD + + Kind = Enum('normal', 'distribute', 'parallel') + Direction = Enum('l2r', 'r2l', 'both') + Flags = Flags(UINT16, + 0, 1, Kind, 'kind', + 2, 9, 'count', + 10, 11, Direction, 'direction', + 12, 'same_widths') + + def attributes(cls): + yield cls.Flags, 'flags' + yield HWPUNIT16, 'spacing' + + def not_same_widths(context, values): + ''' flags.same_widths == 0 ''' + return not values['flags'].same_widths + + def n_entries(member_ref): + def n_entries(context, values): + n_columns = member_ref(context, values) + return n_columns - 1 + return n_entries + + yield dict(name='column0', type=Column0, + condition=not_same_widths) + yield dict(name='columns', + type=X_ARRAY(Column, + n_entries(ref_member_flag('flags', 'count'))), + condition=not_same_widths) + yield UINT16, 'attr2' + yield Border, 'splitter' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/controls/common_controls.py b/src/hwp5/binmodel/controls/common_controls.py new file mode 100644 index 0000000000000000000000000000000000000000..a9c15bf7a0cb4d3b4f05688291c0e04ccf4cb640 --- /dev/null +++ b/src/hwp5/binmodel/controls/common_controls.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controls._shared import Control +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import HWPUNIT +from hwp5.dataio import INT16 +from hwp5.dataio import BSTR +from hwp5.binmodel._shared import Margin + + +class CommonControl(Control): + + # 표 65 개체 공통 속성의 속성 + Flow = Enum(FLOAT=0, BLOCK=1, BACK=2, FRONT=3) + TextSide = Enum(BOTH=0, LEFT=1, RIGHT=2, LARGER=3) + VRelTo = Enum(PAPER=0, PAGE=1, PARAGRAPH=2) + HRelTo = Enum(PAPER=0, PAGE=1, COLUMN=2, PARAGRAPH=3) + VAlign = Enum(TOP=0, MIDDLE=1, BOTTOM=2) + HAlign = Enum(LEFT=0, CENTER=1, RIGHT=2, INSIDE=3, OUTSIDE=4) + WidthRelTo = Enum(PAPER=0, PAGE=1, COLUMN=2, PARAGRAPH=3, ABSOLUTE=4) + HeightRelTo = Enum(PAPER=0, PAGE=1, ABSOLUTE=2) + NumberCategory = Enum(NONE=0, FIGURE=1, TABLE=2, EQUATION=3) + + CommonControlFlags = Flags(UINT32, + 0, 'inline', + 2, 'affect_line_spacing', + 3, 4, VRelTo, 'vrelto', + 5, 7, VAlign, 'valign', + 8, 9, HRelTo, 'hrelto', + 10, 12, HAlign, 'halign', + 13, 'restrict_in_page', + 14, 'overlap_others', + 15, 17, WidthRelTo, 'width_relto', + 18, 19, HeightRelTo, 'height_relto', + 20, 'protect_size_when_vrelto_paragraph', + 21, 23, Flow, 'flow', + 24, 25, TextSide, 'text_side', + 26, 27, NumberCategory, 'number_category') + + MARGIN_LEFT = 0 + MARGIN_RIGHT = 1 + MARGIN_TOP = 2 + MARGIN_BOTTOM = 3 + + def attributes(cls): + ''' 표 64 개체 공통 속성 ''' + yield cls.CommonControlFlags, 'flags', + yield SHWPUNIT, 'y', # DIFFSPEC + yield SHWPUNIT, 'x', # DIFFSPEC + yield HWPUNIT, 'width', + yield HWPUNIT, 'height', + yield INT16, 'z_order', + yield INT16, 'unknown1', + yield Margin, 'margin', + yield UINT32, 'instance_id', + yield dict(type=INT16, name='unknown2', version=(5, 0, 0, 5)) + yield dict(type=BSTR, name='description', version=(5, 0, 0, 5)) + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/controls/dutmal.py b/src/hwp5/binmodel/controls/dutmal.py new file mode 100644 index 0000000000000000000000000000000000000000..d9b0bfcfdf1506e68f19bda6259e5324fb34591f --- /dev/null +++ b/src/hwp5/binmodel/controls/dutmal.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import BSTR +from hwp5.binmodel.controls._shared import Control + + +class Dutmal(Control): + ''' 4.2.10.13. 덧말 ''' + chid = CHID.TDUT + Position = Enum(ABOVE=0, BELOW=1, CENTER=2) + Align = Enum(BOTH=0, LEFT=1, RIGHT=2, CENTER=3, DISTRIBUTE=4, + DISTRIBUTE_SPACE=5) + + def attributes(cls): + yield BSTR, 'maintext' + yield BSTR, 'subtext' + yield Flags(UINT32, + 0, 31, cls.Position, 'position'), 'position-flags' + yield UINT32, 'fsizeratio' + yield UINT32, 'option' + yield UINT32, 'stylenumber' + yield Flags(UINT32, + 0, 31, cls.Align, 'align'), 'align-flags' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/controls/field.py b/src/hwp5/binmodel/controls/field.py new file mode 100644 index 0000000000000000000000000000000000000000..ca32bf05208ce0ae794150ee09f63d92cdf352eb --- /dev/null +++ b/src/hwp5/binmodel/controls/field.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import BSTR +from hwp5.dataio import BYTE +from hwp5.binmodel.tagid71_ctrl_data import ControlData +from hwp5.binmodel.controls._shared import Control + + +class Field(Control): + ''' 4.2.10.15 필드 시작 ''' + + Flags = Flags(UINT32, + 0, 'editableInReadOnly', + 11, 14, 'visitedType', + 15, 'modified') + + def attributes(cls): + yield cls.Flags, 'flags', + yield BYTE, 'extra_attr', + yield BSTR, 'command', + yield UINT32, 'id', + attributes = classmethod(attributes) + + +class FieldUnknown(Field): + chid = CHID.FIELD_UNK + + +class FieldDate(Field): + chid = CHID.FIELD_DTE + + +class FieldDocDate(Field): + chid = CHID.FIELD_DDT + + +class FieldPath(Field): + chid = CHID.FIELD_PAT + + +class FieldBookmark(Field): + chid = CHID.FIELD_BMK + + +class FieldMailMerge(Field): + chid = CHID.FIELD_MMG + + +class FieldCrossRef(Field): + chid = CHID.FIELD_XRF + + +class FieldFormula(Field): + chid = CHID.FIELD_FMU + + +class FieldClickHere(Field): + chid = CHID.FIELD_CLK + + +class FieldClickHereData(ControlData): + parent_model_type = FieldClickHere + + +class FieldSummary(Field): + chid = CHID.FIELD_SMR + + +class FieldUserInfo(Field): + chid = CHID.FIELD_USR + + +class FieldHyperLink(Field): + chid = CHID.FIELD_HLK + + def geturl(self): + s = self.command.split(';') + return s[0].replace('\\:', ':') + + +class FieldRevisionSign(Field): + chid = CHID.FIELD_REVISION_SIGN + + +class FieldRevisionDelete(Field): + chid = CHID.FIELD_REVISION_DELETE + + +class FieldRevisionAttach(Field): + chid = CHID.FIELD_REVISION_ATTACH + + +class FieldRevisionClipping(Field): + chid = CHID.FIELD_REVISION_CLIPPING + + +class FieldRevisionSawtooth(Field): + chid = CHID.FIELD_REVISION_SAWTOOTH + + +class FieldRevisionThinking(Field): + chid = CHID.FIELD_REVISION_THINKING + + +class FieldRevisionPraise(Field): + chid = CHID.FIELD_REVISION_PRAISE + + +class FieldRevisionLine(Field): + chid = CHID.FIELD_REVISION_LINE + + +class FieldRevisionSimpleChange(Field): + chid = CHID.FIELD_REVISION_SIMPLECHANGE + + +class FieldRevisionHyperlink(Field): + chid = CHID.FIELD_REVISION_HYPERLINK + + +class FieldRevisionLineAttach(Field): + chid = CHID.FIELD_REVISION_LINEATTACH + + +class FieldRevisionLineLink(Field): + chid = CHID.FIELD_REVISION_LINELINK + + +class FieldRevisionLineTransfer(Field): + chid = CHID.FIELD_REVISION_LINETRANSFER + + +class FieldRevisionRightMove(Field): + chid = CHID.FIELD_REVISION_RIGHTMOVE + + +class FieldRevisionLeftMove(Field): + chid = CHID.FIELD_REVISION_LEFTMOVE + + +class FieldRevisionTransfer(Field): + chid = CHID.FIELD_REVISION_TRANSFER + + +class FieldRevisionSimpleInsert(Field): + chid = CHID.FIELD_REVISION_SIMPLEINSERT + + +class FieldRevisionSplit(Field): + chid = CHID.FIELD_REVISION_SPLIT + + +class FieldRevisionChange(Field): + chid = CHID.FIELD_REVISION_CHANGE + + +class FieldMemo(Field): + chid = CHID.FIELD_MEMO + + +class FieldPrivateInfoSecurity(Field): + chid = CHID.FIELD_PRIVATE_INFO_SECURITY diff --git a/src/hwp5/binmodel/controls/gshape_object_control.py b/src/hwp5/binmodel/controls/gshape_object_control.py new file mode 100644 index 0000000000000000000000000000000000000000..f58586580588cbc5c719bff0c32664d824e29588 --- /dev/null +++ b/src/hwp5/binmodel/controls/gshape_object_control.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controls.common_controls import CommonControl +from hwp5.binmodel.controlchar import CHID + + +class GShapeObjectControl(CommonControl): + chid = CHID.GSO diff --git a/src/hwp5/binmodel/controls/header_footer.py b/src/hwp5/binmodel/controls/header_footer.py new file mode 100644 index 0000000000000000000000000000000000000000..faa783aa345d393856de54550f43d3089ea72265 --- /dev/null +++ b/src/hwp5/binmodel/controls/header_footer.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.binmodel.controls._shared import Control + + +class HeaderFooter(Control): + ''' 4.2.10.3. 머리말/꼬리말 ''' + Places = Enum(BOTH_PAGES=0, EVEN_PAGE=1, ODD_PAGE=2) + Flags = Flags(UINT32, + 0, 1, Places, 'places') + + def attributes(cls): + yield cls.Flags, 'flags' + attributes = classmethod(attributes) + + +class Header(HeaderFooter): + ''' 머리말 ''' + chid = CHID.HEADER + + +class Footer(HeaderFooter): + ''' 꼬리말 ''' + chid = CHID.FOOTER diff --git a/src/hwp5/binmodel/controls/hidden_comment.py b/src/hwp5/binmodel/controls/hidden_comment.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea39aad28d096a21997e2cc1435aa41dd3b8a66 --- /dev/null +++ b/src/hwp5/binmodel/controls/hidden_comment.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.binmodel.controls._shared import Control + + +class HiddenComment(Control): + ''' 4.2.10.14 숨은 설명 ''' + chid = CHID.TCMT + + def attributes(): + if False: + yield + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/controls/index_marker.py b/src/hwp5/binmodel/controls/index_marker.py new file mode 100644 index 0000000000000000000000000000000000000000..d1a828ac773b1f8d4823158afb28ac5bc5ed5c1d --- /dev/null +++ b/src/hwp5/binmodel/controls/index_marker.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import UINT16 +from hwp5.dataio import BSTR +from hwp5.binmodel.controls._shared import Control + + +class IndexMarker(Control): + ''' 4.2.10.10. 찾아보기 표식 ''' + chid = CHID.IDXM + + def attributes(): + yield BSTR, 'keyword1' + yield BSTR, 'keyword2' + yield UINT16, 'dummy' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/controls/note.py b/src/hwp5/binmodel/controls/note.py new file mode 100644 index 0000000000000000000000000000000000000000..8e0b560187a8dd8329cacae9031897ff84ae3280 --- /dev/null +++ b/src/hwp5/binmodel/controls/note.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.binmodel.controls._shared import Control + + +class Note(Control): + ''' 4.2.10.4 미주/각주 ''' + def attributes(): + yield UINT32, 'number' + yield UINT32, 'unknown0' + yield UINT32, 'unknown1' + yield dict(type=UINT16, name='unknown2', version=(5, 0, 3, 0)) + yield dict(type=UINT16, name='unknown3', version=(5, 0, 3, 0)) + attributes = staticmethod(attributes) + + +class FootNote(Note): + ''' 각주 ''' + chid = CHID.FN + + +class EndNote(Note): + ''' 미주 ''' + chid = CHID.EN diff --git a/src/hwp5/binmodel/controls/numbering.py b/src/hwp5/binmodel/controls/numbering.py new file mode 100644 index 0000000000000000000000000000000000000000..b6a297b6db9b175ae2117896dad3dcec91cd10ca --- /dev/null +++ b/src/hwp5/binmodel/controls/numbering.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import WCHAR +from hwp5.binmodel.controls._shared import Control + + +class NumberingControl(Control): + Kind = Enum( + PAGE=0, FOOTNOTE=1, ENDNOTE=2, PICTURE=3, TABLE=4, EQUATION=5, + UNKNOWN6=6, + ) + Flags = Flags(UINT32, + 0, 3, Kind, 'kind', + 4, 11, 'footnoteshape', + 12, 'superscript') + + def attributes(cls): + yield cls.Flags, 'flags', + yield UINT16, 'number', + attributes = classmethod(attributes) + + +class AutoNumbering(NumberingControl): + ''' 4.2.10.5. 자동 번호 ''' + chid = CHID.ATNO + + def attributes(cls): + yield WCHAR, 'usersymbol', + yield WCHAR, 'prefix', + yield WCHAR, 'suffix', + attributes = classmethod(attributes) + + def __unicode__(self): + prefix = u'' + suffix = u'' + if self.flags.kind == self.Kind.FOOTNOTE: + if self.suffix != u'\x00': + suffix = self.suffix + return prefix + unicode(self.number) + suffix + + +class NewNumbering(NumberingControl): + ''' 4.2.10.6. 새 번호 지정 ''' + chid = CHID.NWNO diff --git a/src/hwp5/binmodel/controls/page_hide.py b/src/hwp5/binmodel/controls/page_hide.py new file mode 100644 index 0000000000000000000000000000000000000000..ac7c3172190575cb7d83bd512da96249dfa72f62 --- /dev/null +++ b/src/hwp5/binmodel/controls/page_hide.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.binmodel.controls._shared import Control + + +class PageHide(Control): + ''' 4.2.10.7 감추기 ''' + chid = CHID.PGHD + Flags = Flags(UINT32, + 0, 'header', + 1, 'footer', + 2, 'basepage', + 3, 'pageborder', + 4, 'pagefill', + 5, 'pagenumber') + + def attributes(cls): + yield cls.Flags, 'flags' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/controls/page_number_position.py b/src/hwp5/binmodel/controls/page_number_position.py new file mode 100644 index 0000000000000000000000000000000000000000..32f78688b4bd2b11746c03b260980304badf228c --- /dev/null +++ b/src/hwp5/binmodel/controls/page_number_position.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import WCHAR +from hwp5.binmodel.controls._shared import Control + + +class PageNumberPosition(Control): + ''' 4.2.10.9. 쪽 번호 위치 ''' + chid = CHID.PGNP + Position = Enum(NONE=0, + TOP_LEFT=1, TOP_CENTER=2, TOP_RIGHT=3, + BOTTOM_LEFT=4, BOTTOM_CENTER=5, BOTTOM_RIGHT=6, + OUTSIDE_TOP=7, OUTSIDE_BOTTOM=8, + INSIDE_TOP=9, INSIDE_BOTTOM=10) + Flags = Flags(UINT32, + 0, 7, 'shape', + 8, 11, Position, 'position') + + def attributes(cls): + yield cls.Flags, 'flags' + yield WCHAR, 'usersymbol' + yield WCHAR, 'prefix' + yield WCHAR, 'suffix' + yield WCHAR, 'dash' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/controls/page_odd_even.py b/src/hwp5/binmodel/controls/page_odd_even.py new file mode 100644 index 0000000000000000000000000000000000000000..cff86ee17dcacedfd3a019a5a31ae0abb22e713b --- /dev/null +++ b/src/hwp5/binmodel/controls/page_odd_even.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.binmodel.controls._shared import Control + + +class PageOddEven(Control): + ''' 4.2.10.8 홀/짝수 조정 ''' + chid = CHID.PGCT + OddEven = Enum(BOTH_PAGES=0, EVEN_PAGE=1, ODD_PAGE=2) + Flags = Flags(UINT32, + 0, 1, OddEven, 'pages') + + def attributes(cls): + yield cls.Flags, 'flags' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/controls/section_def.py b/src/hwp5/binmodel/controls/section_def.py new file mode 100644 index 0000000000000000000000000000000000000000..b4084cc5e1f0e5db176ef46f79ecdd4a9c464831 --- /dev/null +++ b/src/hwp5/binmodel/controls/section_def.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import HWPUNIT +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import HexBytes +from hwp5.binmodel.tagid71_ctrl_data import ControlData +from hwp5.binmodel.controls._shared import Control + + +class SectionDef(Control): + ''' 4.2.10.1. 구역 정의 ''' + chid = CHID.SECD + + Flags = Flags(UINT32, + 0, 'hide_header', + 1, 'hide_footer', + 2, 'hide_page', + 3, 'hide_border', + 4, 'hide_background', + 5, 'hide_pagenumber', + 8, 'show_border_on_first_page_only', + 9, 'show_background_on_first_page_only', + 16, 18, 'text_direction', + 19, 'hide_blank_line', + 20, 21, 'pagenum_on_split_section', + 22, 'squared_manuscript_paper') + + def attributes(cls): + yield cls.Flags, 'flags', + yield HWPUNIT16, 'columnspacing', + yield HWPUNIT16, 'grid_vertical', + yield HWPUNIT16, 'grid_horizontal', + yield HWPUNIT, 'defaultTabStops', + yield UINT16, 'numbering_shape_id', + yield UINT16, 'starting_pagenum', + yield UINT16, 'starting_picturenum', + yield UINT16, 'starting_tablenum', + yield UINT16, 'starting_equationnum', + yield dict(type=UINT32, name='unknown1', version=(5, 0, 1, 7)) + yield dict(type=UINT32, name='unknown2', version=(5, 0, 1, 7)) + attributes = classmethod(attributes) + + +class SectionDefData(ControlData): + parent_model_type = SectionDef + + def attributes(): + yield HexBytes(280), 'unknown' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/controls/table_control.py b/src/hwp5/binmodel/controls/table_control.py new file mode 100644 index 0000000000000000000000000000000000000000..f9d29c592671f095c8f66c8b596ff4231f5ca803 --- /dev/null +++ b/src/hwp5/binmodel/controls/table_control.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controls.common_controls import CommonControl +from hwp5.binmodel.controlchar import CHID +from hwp5.binmodel.tagid61_table import TableBody + + +class TableControl(CommonControl): + chid = CHID.TBL + + def on_child(cls, attributes, context, child): + child_context, child_model = child + if child_model['type'] is TableBody: + # referenced in ListHeader parsing + context['seen_table_body'] = True + on_child = classmethod(on_child) diff --git a/src/hwp5/binmodel/controls/tcps_control.py b/src/hwp5/binmodel/controls/tcps_control.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf2295840f2b8a921247f7e4963b4a522539c60 --- /dev/null +++ b/src/hwp5/binmodel/controls/tcps_control.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controlchar import CHID +from hwp5.dataio import BSTR +from hwp5.binmodel.controls._shared import Control + + +class TCPSControl(Control): + ''' 4.2.10.12. 글자 겹침 ''' + chid = CHID.TCPS + + def attributes(): + yield BSTR, 'textlength' + # yield UINT8, 'frameType' + # yield INT8, 'internalCharacterSize' + # yield UINT8, 'internalCharacterFold' + # yield N_ARRAY(UINT8, UINT32), 'characterShapeIds' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid16_document_properties.py b/src/hwp5/binmodel/tagid16_document_properties.py new file mode 100644 index 0000000000000000000000000000000000000000..46770a0cb098cc43d18ab38419f6dcb6542efcd9 --- /dev/null +++ b/src/hwp5/binmodel/tagid16_document_properties.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_DOCUMENT_PROPERTIES +from hwp5.dataio import UINT16 +from hwp5.dataio import UINT32 + + +class DocumentProperties(RecordModel): + ''' 4.1.1. 문서 속성 ''' + + tagid = HWPTAG_DOCUMENT_PROPERTIES + + def attributes(): + ''' 표 9 문서 속성 ''' + yield UINT16, 'section_count', + yield UINT16, 'page_startnum', + yield UINT16, 'footnote_startnum', + yield UINT16, 'endnote_startnum', + yield UINT16, 'picture_startnum', + yield UINT16, 'table_startnum', + yield UINT16, 'math_startnum', + yield UINT32, 'list_id', + yield UINT32, 'paragraph_id', + yield UINT32, 'character_unit_loc_in_paragraph', + # yield UINT32, 'flags', # DIFFSPEC + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid17_id_mappings.py b/src/hwp5/binmodel/tagid17_id_mappings.py new file mode 100644 index 0000000000000000000000000000000000000000..68dbb245d163668226950a24011da8a6758aa92b --- /dev/null +++ b/src/hwp5/binmodel/tagid17_id_mappings.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_ID_MAPPINGS +from hwp5.dataio import UINT32 + + +class IdMappings(RecordModel): + ''' 4.1.2. 아이디 매핑 헤더 ''' + + tagid = HWPTAG_ID_MAPPINGS + + def attributes(): + ''' 표 10 아이디 매핑 헤더 ''' + yield UINT32, 'bindata', + yield UINT32, 'ko_fonts', + yield UINT32, 'en_fonts', + yield UINT32, 'cn_fonts', + yield UINT32, 'jp_fonts', + yield UINT32, 'other_fonts', + yield UINT32, 'symbol_fonts', + yield UINT32, 'user_fonts', + yield UINT32, 'borderfills', + yield UINT32, 'charshapes', + yield UINT32, 'tabdefs', + yield UINT32, 'numberings', + yield UINT32, 'bullets', + yield UINT32, 'parashapes', + yield UINT32, 'styles', + + # memoshapes are found from 5.0.1.7, but not in 5.0.1.6 + yield dict(type=UINT32, name='memoshapes', version=(5, 0, 1, 7)) + + # TODO unknown fields: + # followings are found from 5.0.3.2, but not in 5.0.3.1 + # but some 5.0.3.3 files do not have them: + # 5.0.3.3/d6dfac424525298119de54410c3b22d74aa85511 + # yield dict(type=UINT32, name='unknown1', version=(5, 0, 3, 2)) + # yield dict(type=UINT32, name='unknown2', version=(5, 0, 3, 2)) + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid18_bin_data.py b/src/hwp5/binmodel/tagid18_bin_data.py new file mode 100644 index 0000000000000000000000000000000000000000..5cd94bfd08320463559219f0f6781f5951bf5e1d --- /dev/null +++ b/src/hwp5/binmodel/tagid18_bin_data.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_BIN_DATA +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT16 +from hwp5.dataio import Struct +from hwp5.dataio import BSTR +from hwp5.dataio import SelectiveType +from hwp5.dataio import ref_member_flag +from hwp5.binmodel._shared import BinStorageId + + +class BinDataLink(Struct): + def attributes(): + yield BSTR, 'abspath' + yield BSTR, 'relpath' + attributes = staticmethod(attributes) + + +class BinDataEmbedding(Struct): + def attributes(): + yield BinStorageId, 'storage_id' + yield BSTR, 'ext' + attributes = staticmethod(attributes) + + +class BinDataStorage(Struct): + def attributes(): + yield BinStorageId, 'storage_id' + attributes = staticmethod(attributes) + + +class BinData(RecordModel): + ''' 4.1.3. 바이너리 데이터 ''' + + tagid = HWPTAG_BIN_DATA + + # 표 13 바이너리 데이터 속성 + StorageType = Enum(LINK=0, EMBEDDING=1, STORAGE=2) + CompressionType = Enum(STORAGE_DEFAULT=0, YES=1, NO=2) + AccessState = Enum(NEVER=0, OK=1, FAILED=2, FAILED_IGNORED=3) + Flags = Flags(UINT16, + 0, 3, StorageType, 'storage', + 4, 5, CompressionType, 'compression', + 16, 17, AccessState, 'access') + + def attributes(cls): + ''' 표 12 바이너리 데이터 ''' + yield cls.Flags, 'flags' + yield (SelectiveType(ref_member_flag('flags', 'storage'), + {cls.StorageType.LINK: BinDataLink, + cls.StorageType.EMBEDDING: BinDataEmbedding, + cls.StorageType.STORAGE: BinDataStorage}), + 'bindata') + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid19_face_name.py b/src/hwp5/binmodel/tagid19_face_name.py new file mode 100644 index 0000000000000000000000000000000000000000..48868c2c0851cab792f402db6bb6556614b6d7cb --- /dev/null +++ b/src/hwp5/binmodel/tagid19_face_name.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_FACE_NAME +from hwp5.dataio import Flags +from hwp5.dataio import Enum +from hwp5.dataio import BSTR +from hwp5.dataio import BYTE +from hwp5.dataio import Struct + + +class AlternateFont(Struct): + def attributes(): + yield BYTE, 'kind' + yield BSTR, 'name' + attributes = staticmethod(attributes) + + +class Panose1(Struct): + ''' 표 17 글꼴 유형 정보 ''' + + FamilyType = Enum('any', 'no_fit', 'text_display', 'script', 'decorative', + 'pictorial') + + SerifStyle = Enum('any', 'no_fit', 'cove', 'obtuse_cove', 'square_cove', + 'obtuse_square_cove', 'square', 'thin', 'bone', + 'exaggerated', 'triangle', 'normal_sans', 'obtuse_sans', + 'perp_sans', 'flared', 'rounded') + + Weight = Enum('any', 'no_fit', 'very_light', 'light', 'thin', 'book', + 'medium', 'demi', 'bold', 'heavy', 'black', 'nord') + + Proportion = Enum('any', 'no_fit', 'old_style', 'modern', 'even_width', + 'expanded', 'condensed', 'very_expanded', + 'very_condensed', 'monospaced') + + Contrast = Enum('any', 'no_fit', 'none', 'very_low', 'low', 'medium_low', + 'medium', 'medium_high', 'high', 'very_high') + + StrokeVariation = Enum('any', 'no_fit', 'gradual_diag', 'gradual_tran', + 'gradual_vert', 'gradual_horz', 'rapid_vert', + 'rapid_horz', 'instant_vert') + + ArmStyle = Enum('any', 'no_fit', 'straight_horz', 'straight_wedge', + 'straight_vert', 'straight_single_serif', + 'straight_double_serif', 'bent_horz', 'bent_wedge', + 'bent_vert', 'bent_single_serif', 'bent_double_serif') + + Letterform = Enum('any', 'no_fit', 'normal_contact', 'normal_weighted', + 'normal_boxed', 'normal_flattened', 'normal_rounded', + 'normal_off_center', 'normal_square', 'oblique_contact', + 'oblique_weighted', 'oblique_boxed', 'oblique_flattened', + 'oblique_rounded', 'oblique_off_center', + 'oblique_square') + + Midline = Enum('any', 'no_fit', 'standard_trimmed', 'standard_pointed', + 'standard_serifed', 'high_trimmed', 'high_pointed', + 'high_serifed', 'constant_trimmed', 'constant_pointed', + 'constant_serifed', 'low_trimmed', 'low_pointed', + 'low_serifed') + + XHeight = Enum('any', 'no_fit', 'constant_small', 'constant_std', + 'constant_large', 'ducking_small', 'ducking_std', + 'ducking_large') + + def attributes(): + yield BYTE, 'family_type', + yield BYTE, 'serif_style', + yield BYTE, 'weight', + yield BYTE, 'proportion', + yield BYTE, 'contrast', + yield BYTE, 'stroke_variation', + yield BYTE, 'arm_style', + yield BYTE, 'letterform', + yield BYTE, 'midline', + yield BYTE, 'x_height', + attributes = staticmethod(attributes) + + +class FaceName(RecordModel): + ''' 4.1.4. 글꼴 ''' + + tagid = HWPTAG_FACE_NAME + + # 표 16 대체 글꼴 유형 + FontFileType = Enum(UNKNOWN=0, TTF=1, HFT=2) + + # 표 15 글꼴 속성 + Flags = Flags(BYTE, + 0, 1, FontFileType, 'font_file_type', + 5, 'default', + 6, 'metric', + 7, 'alternate') + + def attributes(cls): + ''' 표 14 글꼴 ''' + yield cls.Flags, 'flags' + yield BSTR, 'name' + + def has_alternate(context, values): + ''' flags.alternate == 1 ''' + return values['flags'].alternate + + def has_metric(context, values): + ''' flags.metric == 1 ''' + return values['flags'].metric + + def has_default(context, values): + ''' flags.default == 1 ''' + return values['flags'].default + + yield dict(type=AlternateFont, name='alternate_font', + condition=has_alternate) + yield dict(type=Panose1, name='panose1', condition=has_metric) + yield dict(type=BSTR, name='default_font', condition=has_default) + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid20_border_fill.py b/src/hwp5/binmodel/tagid20_border_fill.py new file mode 100644 index 0000000000000000000000000000000000000000..6848d12d650907be5fcc31f975e34ccc40f2159f --- /dev/null +++ b/src/hwp5/binmodel/tagid20_border_fill.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_BORDER_FILL +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import BYTE +from hwp5.dataio import Flags +# from hwp5.dataio import N_ARRAY +from hwp5.binmodel._shared import Border +from hwp5.binmodel._shared import FillColorPattern +from hwp5.binmodel._shared import FillGradation +from hwp5.binmodel._shared import FillImage + + +class BorderFill(RecordModel): + ''' 4.1.5. 테두리/배경 ''' + + tagid = HWPTAG_BORDER_FILL + + # 표 19 테두리/배경 속성 + BorderFlags = Flags(UINT16, + 0, 'effect_3d', + 1, 'effect_shadow', + 2, 4, 'slash', + 5, 6, 'backslash') + + # 표 23 채우기 정보 + FillFlags = Flags(UINT32, + 0, 'colorpattern', + 1, 'image', + 2, 'gradation') + + def attributes(cls): + ''' 표 18 테두리/배경 속성 ''' + yield cls.BorderFlags, 'borderflags' + yield Border, 'left', + yield Border, 'right', + yield Border, 'top', + yield Border, 'bottom', + yield Border, 'diagonal' + yield cls.FillFlags, 'fillflags' + + def fill_colorpattern(context, values): + ''' fillflags.fill_colorpattern ''' + return values['fillflags'].colorpattern + + def fill_image(context, values): + ''' fillflags.fill_image ''' + return values['fillflags'].image + + def fill_gradation(context, values): + ''' fillflags.fill_gradation ''' + return values['fillflags'].gradation + + yield dict(type=FillColorPattern, name='fill_colorpattern', + condition=fill_colorpattern) + yield dict(type=FillGradation, name='fill_gradation', + condition=fill_gradation) + yield dict(type=FillImage, name='fill_image', + condition=fill_image) + yield dict(type=UINT32, name='shape', + condition=fill_gradation) + yield dict(type=BYTE, name='blur_center', + condition=fill_gradation) + # 스펙 1.2의 표28 설명과는 달리, + # 적어도 5.0.1.7에서, 있는 경우가 있고 없는 경우가 있음 + # yield N_ARRAY(UINT32, BYTE), 'extended' # SPEC + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid21_char_shape.py b/src/hwp5/binmodel/tagid21_char_shape.py new file mode 100644 index 0000000000000000000000000000000000000000..e366b7b8dd2d9f0aba732927bea37d05325563f5 --- /dev/null +++ b/src/hwp5/binmodel/tagid21_char_shape.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_CHAR_SHAPE +from hwp5.dataio import StructType +from hwp5.dataio import Struct +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import INT32 +from hwp5.dataio import INT8 +from hwp5.dataio import UINT8 +from hwp5.dataio import WORD +from hwp5.binmodel._shared import COLORREF + + +def LanguageStruct(name, basetype): + ''' 표 29 글꼴에 대한 언어 ''' + def attributes(): + for lang in ('ko', 'en', 'cn', 'jp', 'other', 'symbol', 'user'): + yield basetype, lang + attributes = staticmethod(attributes) + return StructType(name, (Struct,), dict(basetype=basetype, + attributes=attributes)) + + +class ShadowSpace(Struct): + def attributes(): + yield INT8, 'x' + yield INT8, 'y' + attributes = staticmethod(attributes) + + +class CharShape(RecordModel): + ''' 4.1.6. 글자 모양 ''' + + tagid = HWPTAG_CHAR_SHAPE + + # 표 30 글자 모양 속성 + Underline = Enum(NONE=0, UNDERLINE=1, LINE_THROUGH=2, OVERLINE=3) + UnderlineStyle = Enum(SOLID=0, DASHED=1, DOTTED=2, DASH_DOT=3, + DASH_DOT_DOT=4, LONG_DASHED=5, LARGE_DOTTED=6, + DOUBLE=7, LOWER_WEIGHTED=8, UPPER_WEIGHTED=9, + MIDDLE_WEIGHTED=10) + Flags = Flags(UINT32, + 0, 'italic', + 1, 'bold', + 2, 3, Underline, 'underline', + 4, 7, UnderlineStyle, 'underline_style', + 8, 10, 'outline', + 11, 13, 'shadow') + + def attributes(cls): + ''' 표 28 글자 모양 ''' + yield LanguageStruct('FontFace', WORD), 'font_face', + yield (LanguageStruct('LetterWidthExpansion', UINT8), + 'letter_width_expansion') + yield LanguageStruct('LetterSpacing', INT8), 'letter_spacing' + yield LanguageStruct('RelativeSize', INT8), 'relative_size' + yield LanguageStruct('Position', INT8), 'position' + yield INT32, 'basesize', + yield cls.Flags, 'charshapeflags', + yield ShadowSpace, 'shadow_space' + yield COLORREF, 'text_color', + yield COLORREF, 'underline_color', + yield COLORREF, 'shade_color', + yield COLORREF, 'shadow_color', + # yield UINT16, 'borderfill_id', # DIFFSPEC + # yield COLORREF, 'strikeoutColor', # DIFFSPEC + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid22_tab_def.py b/src/hwp5/binmodel/tagid22_tab_def.py new file mode 100644 index 0000000000000000000000000000000000000000..dc024d824c5cd216f00c2681bf743c6c7e1aa5f0 --- /dev/null +++ b/src/hwp5/binmodel/tagid22_tab_def.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_TAB_DEF +from hwp5.dataio import HWPUNIT +from hwp5.dataio import UINT32 +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import Struct +from hwp5.dataio import N_ARRAY + + +class Tab(Struct): + + Kind = Enum(LEFT=0, RIGHT=1, CENTER=2, FLOAT=3) + + Flags = Flags(UINT32, + 0, 7, Kind, 'kind', + 8, 15, 'fill_type') + + @classmethod + def attributes(cls): + yield HWPUNIT, 'pos', + yield cls.Flags, 'flags' + + +class TabDef(RecordModel): + ''' 4.1.7. 탭 정의 ''' + + tagid = HWPTAG_TAB_DEF + + ''' 표 32 탭 정의 속성 ''' + Flags = Flags(UINT32, + 0, 'autotab_left', + 1, 'autotab_right') + + @classmethod + def attributes(cls): + yield dict(type=cls.Flags, name='flags') + yield dict(type=N_ARRAY(UINT32, Tab), name='tabs') diff --git a/src/hwp5/binmodel/tagid23_numbering.py b/src/hwp5/binmodel/tagid23_numbering.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ac597db14c9550164bbc8d178373ea669c68f4 --- /dev/null +++ b/src/hwp5/binmodel/tagid23_numbering.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_NUMBERING +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import INT32 +from hwp5.dataio import BSTR +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import Struct +from hwp5.dataio import ARRAY + + +class NumberingLevel(Struct): + + # 표 35 문단 머리 정보 속성 + Align = Enum(LEFT=0, CENTER=1, RIGHT=2, UNKNOWN=3) + DistanceType = Enum(RATIO=0, VALUE=1) + Flags = Flags(UINT32, + 0, 1, Align, 'align', + 2, 'auto_width', + 3, 'auto_indent', + 4, DistanceType, 'space_type') + + @classmethod + def attributes(cls): + ''' 표 34 문단 머리 정보 ''' + yield cls.Flags, 'flags' + yield HWPUNIT16, 'width_correction' + yield UINT16, 'space' + yield INT32, 'charshape_id' + yield BSTR, 'numbering_format' # see 표 36 문단 번호 형식 + + +class Numbering(RecordModel): + ''' 4.1.8. 문단 번호 ''' + tagid = HWPTAG_NUMBERING + + Align = NumberingLevel.Align + DistanceType = NumberingLevel.DistanceType + Flags = NumberingLevel.Flags + + def attributes(cls): + ''' 표 33 문단 번호 ''' + yield ARRAY(NumberingLevel, 7), 'levels' + yield UINT16, 'starting_number' + yield dict(type=ARRAY(UINT32, 7), + name='unknown', + version=(5, 0, 3, 0)) + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid24_bullet.py b/src/hwp5/binmodel/tagid24_bullet.py new file mode 100644 index 0000000000000000000000000000000000000000..be36dccc8b44441df235447e41298d63143b7f6a --- /dev/null +++ b/src/hwp5/binmodel/tagid24_bullet.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_BULLET +from hwp5.dataio import INT32 +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import WCHAR +from hwp5.dataio import Enum +from hwp5.dataio import Flags + + +BulletAlignEnum = Enum(LEFT=0, CENTER=1, RIGHT=2) +BulletFlags = Flags(UINT32, + 0, 1, BulletAlignEnum, 'align', + 3, 'auto_indent') + + +class Bullet(RecordModel): + ''' 4.1.9. 글머리표 ''' + + tagid = HWPTAG_BULLET + + @staticmethod + def attributes(): + # TODO: Spec 1.2 is insufficient and incorrect + yield BulletFlags, 'flags', + yield HWPUNIT16, 'width', # 너비, 단위: HWPUNIT + yield UINT16, 'space', # 본문과의 간격, 단위: % + yield INT32, 'charshape_id', + yield WCHAR, 'char' diff --git a/src/hwp5/binmodel/tagid25_para_shape.py b/src/hwp5/binmodel/tagid25_para_shape.py new file mode 100644 index 0000000000000000000000000000000000000000..4f31fcc718b0d0d24b9ea9034d615200b39adf52 --- /dev/null +++ b/src/hwp5/binmodel/tagid25_para_shape.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PARA_SHAPE +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import INT32 +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import HWPUNIT16 + + +class ParaShape(RecordModel): + ''' 4.1.10. 문단 모양 ''' + tagid = HWPTAG_PARA_SHAPE + + # 표 39 문단 모양 속성1 + LineSpacingType = Enum(RATIO=0, FIXED=1, SPACEONLY=2, MINIMUM=3) + Align = Enum(BOTH=0, LEFT=1, RIGHT=2, CENTER=3, DISTRIBUTE=4, + DISTRIBUTE_SPACE=5) + VAlign = Enum(FONT=0, TOP=1, CENTER=2, BOTTOM=3) + LineBreakAlphabet = Enum(WORD=0, HYPHEN=1, CHAR=2) + LineBreakHangul = Enum(WORD=0, CHAR=1) + HeadShape = Enum(NONE=0, OUTLINE=1, NUMBER=2, BULLET=3) + Flags1 = Flags(UINT32, + 0, 1, LineSpacingType, 'linespacing_type', + 2, 4, Align, 'align', + 5, 6, LineBreakAlphabet, 'linebreak_alphabet', + 7, LineBreakHangul, 'linebreak_hangul', + 8, 'use_paper_grid', + 9, 15, 'minimum_space', # 공백 최소값 + 16, 'protect_single_line', # 외톨이줄 보호 + 17, 'with_next_paragraph', # 다음 문단과 함께 + 18, 'protect', # 문단 보호 + 19, 'start_new_page', # 문단 앞에서 항상 쪽 나눔 + 20, 21, VAlign, 'valign', + 22, 'lineheight_along_fontsize', # 글꼴에 어울리는 줄 높이 + 23, 24, HeadShape, 'head_shape', # 문단 머리 모양 + 25, 27, 'level', # 문단 수준 + 28, 'linked_border', # 문단 테두리 연결 여부 + 29, 'ignore_margin', # 문단 여백 무시 + 30, 'tail_shape') # 문단 꼬리 모양 + + # 표 40 문단 모양 속성2 + Flags2 = Flags(UINT32, + 0, 1, 'in_single_line', + 2, 3, 'reserved', + 4, 'autospace_alphabet', + 5, 'autospace_number') + + # 표 41 줄 간격 종류 + Flags3 = Flags(UINT32, + 0, 4, LineSpacingType, 'linespacing_type3') + + Flags = Flags1 + + def attributes(cls): + ''' 표 38 문단 모양 ''' + yield cls.Flags, 'parashapeflags', + yield INT32, 'doubled_margin_left', # 1/7200 * 2 # DIFFSPEC + yield INT32, 'doubled_margin_right', # 1/7200 * 2 + yield SHWPUNIT, 'indent', + yield INT32, 'doubled_margin_top', # 1/7200 * 2 + yield INT32, 'doubled_margin_bottom', # 1/7200 * 2 + yield SHWPUNIT, 'linespacing', + yield UINT16, 'tabdef_id', + yield UINT16, 'numbering_bullet_id', + yield UINT16, 'borderfill_id', + yield HWPUNIT16, 'border_left', + yield HWPUNIT16, 'border_right', + yield HWPUNIT16, 'border_top', + yield HWPUNIT16, 'border_bottom', + yield dict(type=cls.Flags2, name='flags2', version=(5, 0, 1, 7)) + # yield cls.Flags3, 'flags3', # DIFFSPEC + # yield UINT32, 'lineSpacing', # DIFFSPEC + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid26_style.py b/src/hwp5/binmodel/tagid26_style.py new file mode 100644 index 0000000000000000000000000000000000000000..3c9daa706358d565fc4e8af809aae72995289297 --- /dev/null +++ b/src/hwp5/binmodel/tagid26_style.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_STYLE +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import BYTE +from hwp5.dataio import BSTR +from hwp5.dataio import INT16 +from hwp5.dataio import UINT16 + + +class Style(RecordModel): + ''' 4.1.11. 스타일 ''' + tagid = HWPTAG_STYLE + + # 표 43 스타일 종류 + Kind = Enum(PARAGRAPH=0, CHAR=1) + Flags = Flags(BYTE, + 0, 1, Kind, 'kind') + + def attributes(cls): + ''' 표 42 스타일 ''' + yield BSTR, 'local_name', + yield BSTR, 'name', + yield cls.Flags, 'flags', + yield BYTE, 'next_style_id', + yield INT16, 'lang_id', + yield UINT16, 'parashape_id', + yield UINT16, 'charshape_id', + + # unknown fields + # following fields are found from 5.0.0.0 + yield UINT16, 'unknown' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid27_doc_data.py b/src/hwp5/binmodel/tagid27_doc_data.py new file mode 100644 index 0000000000000000000000000000000000000000..e8624b7f0cd453faa5b62e851b1d53f6a347c3c7 --- /dev/null +++ b/src/hwp5/binmodel/tagid27_doc_data.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_DOC_DATA + + +class DocData(RecordModel): + ''' 4.1.12. 문서 임의의 데이터 ''' + tagid = HWPTAG_DOC_DATA + + # TODO diff --git a/src/hwp5/binmodel/tagid28_distribute_doc_data.py b/src/hwp5/binmodel/tagid28_distribute_doc_data.py new file mode 100644 index 0000000000000000000000000000000000000000..cafcdf008bc2374411c39a1ef27eb9508b4f4a40 --- /dev/null +++ b/src/hwp5/binmodel/tagid28_distribute_doc_data.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_DISTRIBUTE_DOC_DATA + + +class DistributeDocData(RecordModel): + ''' 4.1.13. 배포용 문서 데이터 ''' + tagid = HWPTAG_DISTRIBUTE_DOC_DATA diff --git a/src/hwp5/binmodel/tagid30_compatible_document.py b/src/hwp5/binmodel/tagid30_compatible_document.py new file mode 100644 index 0000000000000000000000000000000000000000..d63736e24c0ba7b543cc44fcab6398e061f78c0a --- /dev/null +++ b/src/hwp5/binmodel/tagid30_compatible_document.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_COMPATIBLE_DOCUMENT +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 + + +class CompatibleDocument(RecordModel): + ''' 4.1.14. 호환 문서 ''' + tagid = HWPTAG_COMPATIBLE_DOCUMENT + + # 표 50 대상 프로그램 + Target = Enum(DEFAULT=0, HWP2007=1, MSWORD=2) + Flags = Flags(UINT32, + 0, 1, 'target') + + def attributes(cls): + ''' 표 49 호환 문서 ''' + yield cls.Flags, 'flags' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid31_layout_compatibility.py b/src/hwp5/binmodel/tagid31_layout_compatibility.py new file mode 100644 index 0000000000000000000000000000000000000000..bcb420fbb94a4fa95ec8217a223c049922da4fa1 --- /dev/null +++ b/src/hwp5/binmodel/tagid31_layout_compatibility.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_LAYOUT_COMPATIBILITY +from hwp5.dataio import UINT32 + + +class LayoutCompatibility(RecordModel): + ''' 4.1.15. 레이아웃 호환성 ''' + tagid = HWPTAG_LAYOUT_COMPATIBILITY + + def attributes(): + ''' 표 51 레이아웃 호환성 ''' + yield UINT32, 'char', + yield UINT32, 'paragraph', + yield UINT32, 'section', + yield UINT32, 'object', + yield UINT32, 'field', + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid32_unknown.py b/src/hwp5/binmodel/tagid32_unknown.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf0ab024297dbd9c6b47f2448e9d7ddabc25aeb --- /dev/null +++ b/src/hwp5/binmodel/tagid32_unknown.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_BEGIN_PLUS_16 + + +class TagModel32(RecordModel): + tagid = HWPTAG_BEGIN_PLUS_16 + # TODO diff --git a/src/hwp5/binmodel/tagid50_para_header.py b/src/hwp5/binmodel/tagid50_para_header.py new file mode 100644 index 0000000000000000000000000000000000000000..7c80a7f5294e5221b669fa5da38d1db23fe805a3 --- /dev/null +++ b/src/hwp5/binmodel/tagid50_para_header.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PARA_HEADER +from hwp5.dataio import Flags +from hwp5.dataio import BYTE +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 + + +class Paragraph(RecordModel): + ''' 4.2.1. 문단 헤더 ''' + + tagid = HWPTAG_PARA_HEADER + + # 표 54 단 나누기 종류 + SplitFlags = Flags(BYTE, + 0, 'new_section', + 1, 'new_columnsdef', + 2, 'new_page', + 3, 'new_column') + ControlMask = Flags(UINT32, + 2, 'unknown1', + 11, 'control', + 21, 'new_number') + Flags = Flags(UINT32, + 31, 'unknown', + 0, 30, 'chars') + + def attributes(cls): + ''' 표 53 문단 헤더 ''' + yield cls.Flags, 'text', + yield cls.ControlMask, 'controlmask', + yield UINT16, 'parashape_id', + yield BYTE, 'style_id', + yield cls.SplitFlags, 'split', + yield UINT16, 'charshapes', + yield UINT16, 'rangetags', + yield UINT16, 'linesegs', + yield UINT32, 'instance_id', + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid51_para_text.py b/src/hwp5/binmodel/tagid51_para_text.py new file mode 100644 index 0000000000000000000000000000000000000000..3cc537a12678a9186b04f7154395e744d52f1f4e --- /dev/null +++ b/src/hwp5/binmodel/tagid51_para_text.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division + + + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PARA_TEXT +from hwp5.dataio import ArrayType +from hwp5.binmodel.controlchar import ControlChar + + +class ParaTextChunks(list, metaclass=ArrayType): + + def read(cls, f): + bytes = f.read() + return [x for x in cls.parse_chunks(bytes)] + read = classmethod(read) + + def parse_chunks(bytes): + from hwp5.dataio import decode_utf16le_with_hypua + size = len(bytes) + idx = 0 + while idx < size: + ctrlpos, ctrlpos_end = ControlChar.find(bytes, idx) + if idx < ctrlpos: + text = decode_utf16le_with_hypua(bytes[idx:ctrlpos]) + yield (idx // 2, ctrlpos // 2), text + if ctrlpos < ctrlpos_end: + cch = ControlChar.decode(bytes[ctrlpos:ctrlpos_end]) + yield (ctrlpos // 2, ctrlpos_end // 2), cch + idx = ctrlpos_end + parse_chunks = staticmethod(parse_chunks) + + +class ParaText(RecordModel): + ''' 4.2.2. 문단의 텍스트 ''' + tagid = HWPTAG_PARA_TEXT + + def attributes(): + yield ParaTextChunks, 'chunks' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid52_para_char_shape.py b/src/hwp5/binmodel/tagid52_para_char_shape.py new file mode 100644 index 0000000000000000000000000000000000000000..7625fd2724aaf488ef48ab147ec70fc669326c19 --- /dev/null +++ b/src/hwp5/binmodel/tagid52_para_char_shape.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PARA_CHAR_SHAPE +from hwp5.dataio import ArrayType +from hwp5.dataio import X_ARRAY +from hwp5.dataio import ARRAY +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.binmodel._shared import ref_parent_member + + +class ParaCharShape(RecordModel): + ''' 4.2.3. 문단의 글자 모양 ''' + tagid = HWPTAG_PARA_CHAR_SHAPE + + def attributes(): + ''' 표 56 문단의 글자 모양 ''' + yield dict(name='charshapes', + type=X_ARRAY(ARRAY(UINT32, 2), + ref_parent_member('charshapes'))) + attributes = staticmethod(attributes) + + +class ParaCharShapeList(list, metaclass=ArrayType): + + itemtype = ARRAY(UINT16, 2) + + def read(cls, f, context): + bytes = f.read() + return cls.decode(bytes, context) + read = classmethod(read) + + def decode(payload, context=None): + import struct + fmt = 'II' + unitsize = struct.calcsize('<' + fmt) + unitcount = len(payload) / unitsize + values = struct.unpack('<' + (fmt * unitcount), payload) + return list(tuple(values[i * 2:i * 2 + 2]) + for i in range(0, unitcount)) + decode = staticmethod(decode) diff --git a/src/hwp5/binmodel/tagid53_para_line_seg.py b/src/hwp5/binmodel/tagid53_para_line_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..7f496a5d862d53969dd075dbd74df33d9b4ac140 --- /dev/null +++ b/src/hwp5/binmodel/tagid53_para_line_seg.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PARA_LINE_SEG +from hwp5.binmodel._shared import ref_parent_member +from hwp5.dataio import ArrayType +from hwp5.dataio import Struct +from hwp5.dataio import UINT32 +from hwp5.dataio import Flags +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import INT32 +from hwp5.dataio import X_ARRAY + + +class LineSeg(Struct): + Flags = Flags(UINT32, + # 0, 'first_in_page', + # 1, 'first_in_column', + # 16, 'empty', + 17, 'line_head', + 18, 'line_tail', + # 19, 'auto_hyphen', + 20, 'indented', + 21, 'bullet') + + def attributes(cls): + yield INT32, 'chpos', + yield SHWPUNIT, 'y', + yield SHWPUNIT, 'height', + yield SHWPUNIT, 'height_text', + yield SHWPUNIT, 'height_baseline', + yield SHWPUNIT, 'space_below', + yield SHWPUNIT, 'x', + yield SHWPUNIT, 'width' + yield cls.Flags, 'lineseg_flags' + attributes = classmethod(attributes) + + +class ParaLineSeg(RecordModel): + ''' 4.2.4. 문단의 레이아웃 ''' + + tagid = HWPTAG_PARA_LINE_SEG + + def attributes(cls): + ''' 표 57 문단의 레이아웃 ''' + yield dict(name='linesegs', + type=X_ARRAY(LineSeg, ref_parent_member('linesegs'))) + attributes = classmethod(attributes) + + +class ParaLineSegList(list, metaclass=ArrayType): + + itemtype = LineSeg + + def read(cls, f, context): + payload = context['stream'].read() + return cls.decode(context, payload) + read = classmethod(read) + + def decode(cls, context, payload): + import struct + unitfmt = 'iiiiiiiiHH' + unitsize = struct.calcsize('<' + unitfmt) + unitcount = len(payload) // unitsize + values = struct.unpack('<' + unitfmt * unitcount, payload) + names = ['chpos', 'y', 'height', 'height2', 'height85', 'space_below', + 'x', 'width', 'a8', 'flags'] + x = list(dict(zip(names, tuple(values[i * 10:i * 10 + 10]))) + for i in range(0, unitcount)) + for d in x: + d['flags'] = LineSeg.Flags(d['flags']) + return x + decode = classmethod(decode) diff --git a/src/hwp5/binmodel/tagid54_para_range_tag.py b/src/hwp5/binmodel/tagid54_para_range_tag.py new file mode 100644 index 0000000000000000000000000000000000000000..dc1377d7b49c0b2da6acc450719a86f2f299af25 --- /dev/null +++ b/src/hwp5/binmodel/tagid54_para_range_tag.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PARA_RANGE_TAG +from hwp5.dataio import UINT32 +from hwp5.dataio import Flags +from hwp5.dataio import Struct +from hwp5.dataio import X_ARRAY +from hwp5.binmodel._shared import ref_parent_member + + +class RangeTag(Struct): + ''' 표 58 문단의 영역 태그 ''' + + Tag = Flags(UINT32, + 0, 23, 'data', + 24, 31, 'kind') + + @classmethod + def attributes(cls): + yield UINT32, 'start' + yield UINT32, 'end' + yield cls.Tag, 'tag' + + +class ParaRangeTag(RecordModel): + ''' 4.2.5. 문단의 영역 태그 ''' + + tagid = HWPTAG_PARA_RANGE_TAG + + @staticmethod + def attributes(): + yield dict(name='range_tags', + type=X_ARRAY(RangeTag, ref_parent_member('rangetags'))) diff --git a/src/hwp5/binmodel/tagid55_ctrl_header.py b/src/hwp5/binmodel/tagid55_ctrl_header.py new file mode 100644 index 0000000000000000000000000000000000000000..e1a0ada89dce4c69cd6b3ac857e8dc0a4d0f0dad --- /dev/null +++ b/src/hwp5/binmodel/tagid55_ctrl_header.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel.controls._shared import Control + +import hwp5.binmodel.controls.bookmark_control +import hwp5.binmodel.controls.columns_def +import hwp5.binmodel.controls.common_controls +import hwp5.binmodel.controls.dutmal +import hwp5.binmodel.controls.field +import hwp5.binmodel.controls.gshape_object_control +import hwp5.binmodel.controls.header_footer +import hwp5.binmodel.controls.hidden_comment +import hwp5.binmodel.controls.index_marker +import hwp5.binmodel.controls.note +import hwp5.binmodel.controls.numbering +import hwp5.binmodel.controls.page_hide +import hwp5.binmodel.controls.page_number_position +import hwp5.binmodel.controls.page_odd_even +import hwp5.binmodel.controls.section_def +import hwp5.binmodel.controls.table_control +import hwp5.binmodel.controls.tcps_control + +hwp5 +Control diff --git a/src/hwp5/binmodel/tagid56_list_header.py b/src/hwp5/binmodel/tagid56_list_header.py new file mode 100644 index 0000000000000000000000000000000000000000..e43f39486e6b18272e32849a97f9f10e2247ddc5 --- /dev/null +++ b/src/hwp5/binmodel/tagid56_list_header.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + + +from hwp5.binmodel._shared import RecordModelType +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_LIST_HEADER +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import HWPUNIT +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import BYTE +from hwp5.binmodel._shared import Margin +from hwp5.binmodel.controls.table_control import TableControl +from hwp5.binmodel.controls.gshape_object_control import GShapeObjectControl +from hwp5.binmodel.controls.header_footer import Header +from hwp5.binmodel.controls.header_footer import Footer +from hwp5.binmodel.tagid60_shape_component import ShapeComponent + + +list_header_models = dict() + + +class ListHeaderType(RecordModelType): + + def __new__(mcs, name, bases, attrs): + cls = RecordModelType.__new__(mcs, name, bases, attrs) + if 'parent_model_type' in attrs: + parent_model_type = attrs['parent_model_type'] + before_tablebody = attrs.get('before_tablebody', False) + list_type_key = parent_model_type, before_tablebody + assert list_type_key not in list_header_models + list_header_models[list_type_key] = cls + return cls + + +class ListHeader(RecordModel, metaclass=ListHeaderType): + ''' 4.2.7. 문단 리스트 헤더 ''' + + tagid = HWPTAG_LIST_HEADER + + VAlign = Enum(TOP=0, MIDDLE=1, BOTTOM=2) + Flags = Flags(UINT32, + 0, 2, 'textdirection', + 3, 4, 'linebreak', + 5, 6, VAlign, 'valign') + + def attributes(cls): + ''' 표 60 문단 리스트 헤더 ''' + yield UINT16, 'paragraphs', + yield UINT16, 'unknown1', + yield cls.Flags, 'listflags', + attributes = classmethod(attributes) + + extension_types = list_header_models + + def get_extension_key(context, model): + ''' (parent model type, after TableBody) ''' + if 'parent' in context: + context, model = context['parent'] + seen_table_body = context.get('seen_table_body', False) + return model['type'], seen_table_body + get_extension_key = staticmethod(get_extension_key) + + +class TableCaption(ListHeader): + ''' 표 66 캡션 리스트 ''' + parent_model_type = TableControl + before_tablebody = False + + # 표 68 캡션 속성 + Position = Enum(LEFT=0, RIGHT=1, TOP=2, BOTTOM=3) + Flags = Flags(UINT32, + 0, 1, Position, 'position', + # 캡션이 긴 경우 테이블 여백 영역까지 확장 + 2, 'expand_to_margin') + + def attributes(cls): + ''' 표 67 캡션 ''' + yield cls.Flags, 'flags', + yield HWPUNIT, 'width', + yield HWPUNIT16, 'separation', # 캡션과 틀 사이 간격 + yield HWPUNIT, 'max_width', # expand_to_margin 고려한 최대 너비 + attributes = classmethod(attributes) + + +class TableCell(ListHeader): + ''' 표 75 셀 속성 ''' + parent_model_type = TableControl + before_tablebody = True + + def attributes(): + yield UINT16, 'col', + yield UINT16, 'row', + yield UINT16, 'colspan', + yield UINT16, 'rowspan', + yield SHWPUNIT, 'width', + yield SHWPUNIT, 'height', + yield Margin, 'padding', + yield UINT16, 'borderfill_id', + yield SHWPUNIT, 'unknown_width', + attributes = staticmethod(attributes) + + +class GShapeObjectCaption(TableCaption): + parent_model_type = GShapeObjectControl + + +class TextboxParagraphList(ListHeader): + ''' 표 85 그리기 개체 글상자용 텍스트 속성 ''' + parent_model_type = ShapeComponent + + def attributes(): + yield Margin, 'padding' + yield HWPUNIT, 'maxwidth' + attributes = staticmethod(attributes) + + +class HeaderFooterParagraphList(ListHeader): + ''' 표 129 머리말/꼬리말 ''' + def attributes(): + yield HWPUNIT, 'width' + yield HWPUNIT, 'height' + yield BYTE, 'textrefsbitmap' + yield BYTE, 'numberrefsbitmap' + attributes = staticmethod(attributes) + + +class HeaderParagraphList(HeaderFooterParagraphList): + parent_model_type = Header + + +class FooterParagraphList(HeaderFooterParagraphList): + parent_model_type = Footer diff --git a/src/hwp5/binmodel/tagid57_page_def.py b/src/hwp5/binmodel/tagid57_page_def.py new file mode 100644 index 0000000000000000000000000000000000000000..b40d62869e62caeb432cd283297e955cf21e3faa --- /dev/null +++ b/src/hwp5/binmodel/tagid57_page_def.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PAGE_DEF +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import HWPUNIT + + +class PageDef(RecordModel): + tagid = HWPTAG_PAGE_DEF + Orientation = Enum(PORTRAIT=0, LANDSCAPE=1) + BookBinding = Enum(LEFT=0, RIGHT=1, TOP=2, BOTTOM=3) + Flags = Flags(UINT32, + 0, Orientation, 'orientation', + 1, 2, BookBinding, 'bookbinding') + + def attributes(cls): + yield HWPUNIT, 'width', + yield HWPUNIT, 'height', + yield HWPUNIT, 'left_offset', + yield HWPUNIT, 'right_offset', + yield HWPUNIT, 'top_offset', + yield HWPUNIT, 'bottom_offset', + yield HWPUNIT, 'header_offset', + yield HWPUNIT, 'footer_offset', + yield HWPUNIT, 'bookbinding_offset', + yield cls.Flags, 'attr', + # yield UINT32, 'attr', + attributes = classmethod(attributes) + + def getDimension(self): + width = HWPUNIT(self.paper_width - self.offsetLeft - self.offsetRight) + height = HWPUNIT(self.paper_height + - (self.offsetTop + self.offsetHeader) + - (self.offsetBottom + self.offsetFooter)) + if self.attr.landscape: + return (height, width) + else: + return (width, height) + dimension = property(getDimension) + + def getHeight(self): + if self.attr.landscape: + width = HWPUNIT(self.paper_width - self.offsetLeft - + self.offsetRight) + return width + else: + height = HWPUNIT(self.paper_height + - (self.offsetTop + self.offsetHeader) + - (self.offsetBottom + self.offsetFooter)) + return height + height = property(getHeight) + + def getWidth(self): + if self.attr.landscape: + height = HWPUNIT(self.paper_height + - (self.offsetTop + self.offsetHeader) + - (self.offsetBottom + self.offsetFooter)) + return height + else: + width = HWPUNIT(self.paper_width - self.offsetLeft - + self.offsetRight) + return width + width = property(getWidth) diff --git a/src/hwp5/binmodel/tagid58_footnote_shape.py b/src/hwp5/binmodel/tagid58_footnote_shape.py new file mode 100644 index 0000000000000000000000000000000000000000..25554723d5e989db7781c26ffad51d1b954a5406 --- /dev/null +++ b/src/hwp5/binmodel/tagid58_footnote_shape.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_FOOTNOTE_SHAPE +from hwp5.dataio import Flags +from hwp5.dataio import WCHAR +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.binmodel._shared import COLORREF +from hwp5.binmodel._shared import Border + + +class FootnoteShape(RecordModel): + tagid = HWPTAG_FOOTNOTE_SHAPE + Flags = Flags(UINT32) + + def attributes(cls): + yield cls.Flags, 'flags' + yield WCHAR, 'usersymbol' + yield WCHAR, 'prefix' + yield WCHAR, 'suffix' + yield UINT16, 'starting_number' + yield HWPUNIT16, 'splitter_length' + yield HWPUNIT16, 'splitter_unknown' + yield HWPUNIT16, 'splitter_margin_top' + yield HWPUNIT16, 'splitter_margin_bottom' + yield HWPUNIT16, 'notes_spacing' + yield Border.StrokeType, 'splitter_stroke_type' + yield Border.Width, 'splitter_width' + yield COLORREF, 'splitter_color' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid59_page_border_fill.py b/src/hwp5/binmodel/tagid59_page_border_fill.py new file mode 100644 index 0000000000000000000000000000000000000000..9b2e9e0da472b5614580d256afac6bce65c662c3 --- /dev/null +++ b/src/hwp5/binmodel/tagid59_page_border_fill.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_PAGE_BORDER_FILL +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.binmodel._shared import Margin + + +class PageBorderFill(RecordModel): + tagid = HWPTAG_PAGE_BORDER_FILL + RelativeTo = Enum(BODY=0, PAPER=1) + FillArea = Enum(PAPER=0, PAGE=1, BORDER=2) + Flags = Flags(UINT32, + 0, RelativeTo, 'relative_to', + 1, 'include_header', + 2, 'include_footer', + 3, 4, FillArea, 'fill') + + def attributes(cls): + yield cls.Flags, 'flags' + yield Margin, 'margin' + yield UINT16, 'borderfill_id' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid60_shape_component.py b/src/hwp5/binmodel/tagid60_shape_component.py new file mode 100644 index 0000000000000000000000000000000000000000..f00e1c855860f1fda357c14b836d65303e802402 --- /dev/null +++ b/src/hwp5/binmodel/tagid60_shape_component.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import UINT8 +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import WORD +from hwp5.dataio import BYTE +from hwp5.dataio import X_ARRAY +from hwp5.dataio import N_ARRAY +from hwp5.dataio import Struct +from hwp5.dataio import DOUBLE +from hwp5.dataio import ref_member +from hwp5.dataio import HexBytes +from hwp5.binmodel.controlchar import CHID +from hwp5.binmodel._shared import Coord +from hwp5.binmodel._shared import BorderLine +from hwp5.binmodel._shared import FillColorPattern +from hwp5.binmodel._shared import FillGradation +from hwp5.binmodel._shared import FillImage +from hwp5.binmodel.controls.gshape_object_control import GShapeObjectControl + + +class Matrix(Struct): + ''' 표 80 matrix 정보 + + 2D Transform Matrix + + [a c e][x] + [b d f][y] + [0 0 1][1] + ''' + def attributes(): + yield DOUBLE, 'a' + yield DOUBLE, 'c' + yield DOUBLE, 'e' + yield DOUBLE, 'b' + yield DOUBLE, 'd' + yield DOUBLE, 'f' + attributes = staticmethod(attributes) + + +class ScaleRotationMatrix(Struct): + def attributes(): + yield Matrix, 'scaler', + yield Matrix, 'rotator', + attributes = staticmethod(attributes) + + +def parent_must_be_gso(context, values): + ''' parent record type is GShapeObjectControl ''' + # GSO-child ShapeComponent specific: + # it may be a GSO model's attribute, e.g. 'child_chid' + if 'parent' in context: + parent_context, parent_model = context['parent'] + return parent_model['type'] is GShapeObjectControl + + +def chid_is_container(context, values): + ''' chid == CHID.CONTAINER ''' + return values['chid'] == CHID.CONTAINER + + +def chid_is_rect(context, values): + ''' chid == CHID.RECT ''' + return values['chid'] == CHID.RECT + + +def chid_is_rect_and_fill_colorpattern(context, values): + ''' chid == CHID.RECT and fill_flags.fill_colorpattern ''' + return (values['chid'] == CHID.RECT and + values['fill_flags'].fill_colorpattern) + + +def chid_is_rect_and_fill_image(context, values): + ''' chid == CHID.RECT and fill_flags.fill_image ''' + return (values['chid'] == CHID.RECT and + values['fill_flags'].fill_image) + + +def chid_is_rect_and_fill_gradation(context, values): + ''' chid == CHID.RECT and fill_flags.fill_gradation ''' + return (values['chid'] == CHID.RECT and + values['fill_flags'].fill_gradation) + + +def chid_is_line(context, values): + ''' chid == CHID.LINE ''' + return values['chid'] == CHID.LINE + + +class ShapeComponent(RecordModel): + ''' 4.2.9.2.1. 개체 요소 ''' + tagid = HWPTAG_SHAPE_COMPONENT + FillFlags = Flags(UINT16, + 8, 'fill_colorpattern', + 9, 'fill_image', + 10, 'fill_gradation') + Flags = Flags(UINT32, + 0, 'flip') + + def attributes(cls): + ''' 표 78 개체 요소 속성 ''' + + yield dict(type=CHID, name='chid0', condition=parent_must_be_gso) + + yield CHID, 'chid' + yield SHWPUNIT, 'x_in_group' + yield SHWPUNIT, 'y_in_group' + yield WORD, 'level_in_group' + yield WORD, 'local_version' + yield SHWPUNIT, 'initial_width' + yield SHWPUNIT, 'initial_height' + yield SHWPUNIT, 'width' + yield SHWPUNIT, 'height' + yield cls.Flags, 'flags' + yield WORD, 'angle' + yield Coord, 'rotation_center' + + ''' 표 79 Rendering 정보 ''' + yield WORD, 'scalerotations_count' + yield Matrix, 'translation' + yield dict(type=X_ARRAY(ScaleRotationMatrix, + ref_member('scalerotations_count')), + name='scalerotations') + + # + # Container + # + + yield dict(type=N_ARRAY(WORD, CHID), + name='controls', + condition=chid_is_container) + + # + # Rectangle + # + + ''' 표 81 테두리 선 정보 ''' + yield dict(type=BorderLine, name='border', condition=chid_is_rect) + ''' 표 83 Outline style ''' + # TODO: Outline ??? + yield dict(type=cls.FillFlags, name='fill_flags', + condition=chid_is_rect) + yield dict(type=UINT16, name='unknown', condition=chid_is_rect) + yield dict(type=UINT8, name='unknown1', condition=chid_is_rect) + yield dict(type=FillColorPattern, name='fill_colorpattern', + condition=chid_is_rect_and_fill_colorpattern) + yield dict(type=FillGradation, name='fill_gradation', + condition=chid_is_rect_and_fill_gradation) + yield dict(type=FillImage, name='fill_image', + condition=chid_is_rect_and_fill_image) + yield dict(type=UINT32, name='fill_shape', + condition=chid_is_rect) + yield dict(type=BYTE, name='fill_blur_center', + condition=chid_is_rect_and_fill_gradation) + + # TODO: 아래 두 필드: chid == $rec일 때만인지 확인 필요 + yield dict(type=HexBytes(5), name='unknown2', + condition=chid_is_rect, version=(5, 0, 2, 4)) + yield dict(type=HexBytes(16), name='unknown3', + condition=chid_is_rect, version=(5, 0, 2, 4)) + + # + # Line + # + + yield dict(type=BorderLine, name='line', + condition=chid_is_line) + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid61_table.py b/src/hwp5/binmodel/tagid61_table.py new file mode 100644 index 0000000000000000000000000000000000000000..d04ddb12958e1a695628d4dc102c8683deb7cd0f --- /dev/null +++ b/src/hwp5/binmodel/tagid61_table.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_TABLE +from hwp5.dataio import Enum +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import HWPUNIT16 +from hwp5.dataio import N_ARRAY +from hwp5.dataio import X_ARRAY +from hwp5.dataio import ref_member +from hwp5.dataio import Struct +from hwp5.binmodel._shared import Margin + + +class ZoneInfo(Struct): + def attributes(): + ''' 표 73 영역 속성 ''' + yield UINT16, 'starting_column' + yield UINT16, 'starting_row' + yield UINT16, 'end_column' + yield UINT16, 'end_row' + yield UINT16, 'borderfill_id' + attributes = staticmethod(attributes) + + +class TableBody(RecordModel): + ''' 4.2.9.1. 표 개체 ''' + tagid = HWPTAG_TABLE + + # 표 71 표 속성의 속성 + Split = Enum(NONE=0, BY_CELL=1, SPLIT=2) + Flags = Flags(UINT32, + 0, 1, Split, 'split_page', + 2, 'repeat_header') + + def attributes(cls): + ''' 표 70 표 개체 속성 ''' + yield cls.Flags, 'flags' + yield UINT16, 'rows' + yield UINT16, 'cols' + yield HWPUNIT16, 'cellspacing' + + # 표 72 안쪽 여백 정보 + yield Margin, 'padding' + + yield dict(type=X_ARRAY(UINT16, ref_member('rows')), + name='rowcols') + yield UINT16, 'borderfill_id' + yield dict(type=N_ARRAY(UINT16, ZoneInfo), + name='validZones', + version=(5, 0, 0, 7)) + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid62_shape_component_line.py b/src/hwp5/binmodel/tagid62_shape_component_line.py new file mode 100644 index 0000000000000000000000000000000000000000..14f5d87a9e45451817c42179129900d97d0968e7 --- /dev/null +++ b/src/hwp5/binmodel/tagid62_shape_component_line.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_LINE +from hwp5.dataio import UINT16 +from hwp5.binmodel._shared import Coord + + +class ShapeLine(RecordModel): + ''' 4.2.9.2.2. 선 개체 ''' + tagid = HWPTAG_SHAPE_COMPONENT_LINE + + def attributes(): + ''' 표 87 선 개체 속성 ''' + yield Coord, 'p0' + yield Coord, 'p1' + yield UINT16, 'attr' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid63_shape_component_rectangle.py b/src/hwp5/binmodel/tagid63_shape_component_rectangle.py new file mode 100644 index 0000000000000000000000000000000000000000..02702e99dde593479063d6cf7097aa17fd535bb2 --- /dev/null +++ b/src/hwp5/binmodel/tagid63_shape_component_rectangle.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_RECTANGLE +from hwp5.dataio import BYTE +from hwp5.binmodel._shared import Coord + + +class ShapeRectangle(RecordModel): + ''' 4.2.9.2.3. 사각형 개체 ''' + tagid = HWPTAG_SHAPE_COMPONENT_RECTANGLE + + def attributes(): + ''' 표 89 사각형 개체 속성 ''' + yield BYTE, 'round', + yield Coord, 'p0' + yield Coord, 'p1' + yield Coord, 'p2' + yield Coord, 'p3' + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid64_shape_component_ellipse.py b/src/hwp5/binmodel/tagid64_shape_component_ellipse.py new file mode 100644 index 0000000000000000000000000000000000000000..9355417008222703b14c740052aa68258d0cebe4 --- /dev/null +++ b/src/hwp5/binmodel/tagid64_shape_component_ellipse.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_ELLIPSE +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.binmodel._shared import Coord + + +class ShapeEllipse(RecordModel): + ''' 4.2.9.2.4. 타원 개체 ''' + tagid = HWPTAG_SHAPE_COMPONENT_ELLIPSE + + ''' 표 92 타원/호 개체 속성 ''' + Flags = Flags(UINT32, + 0, 'arc_recalc_required', + 1, 'arc', + 2, 9, 'arc_kind') + + def attributes(cls): + ''' 표 91 타원 개체 속성 ''' + yield cls.Flags, 'flags' + yield Coord, 'center' + yield Coord, 'axis1' + yield Coord, 'axis2' + yield Coord, 'start1' + yield Coord, 'end1' + yield Coord, 'start2' + yield Coord, 'end2' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid65_shape_component_arc.py b/src/hwp5/binmodel/tagid65_shape_component_arc.py new file mode 100644 index 0000000000000000000000000000000000000000..0f7cba9848c0e242ebf6b9eb3cd95d1a3a425af7 --- /dev/null +++ b/src/hwp5/binmodel/tagid65_shape_component_arc.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_ARC +from hwp5.binmodel._shared import Coord + + +class ShapeArc(RecordModel): + ''' 4.2.9.2.6. 호 개체 ''' + tagid = HWPTAG_SHAPE_COMPONENT_ARC + + def attributes(cls): + ''' 표 96 호 개체 속성 ''' + # yield ShapeEllipse.Flags, 'flags' # SPEC TODO + yield Coord, 'center' + yield Coord, 'axis1' + yield Coord, 'axis2' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid66_shape_component_polygon.py b/src/hwp5/binmodel/tagid66_shape_component_polygon.py new file mode 100644 index 0000000000000000000000000000000000000000..105491c634db5d4eafcdbefc4a756e199a1344a7 --- /dev/null +++ b/src/hwp5/binmodel/tagid66_shape_component_polygon.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_POLYGON +from hwp5.dataio import N_ARRAY +from hwp5.dataio import UINT16 +from hwp5.binmodel._shared import Coord + + +class ShapePolygon(RecordModel): + ''' 4.2.9.2.5. 다각형 개체 ''' + tagid = HWPTAG_SHAPE_COMPONENT_POLYGON + + def attributes(cls): + ''' 표 94 다각형 개체 속성 ''' + yield N_ARRAY(UINT16, Coord), 'points' + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid67_shape_component_curve.py b/src/hwp5/binmodel/tagid67_shape_component_curve.py new file mode 100644 index 0000000000000000000000000000000000000000..6fec0c314f7eb06ec18f277b80ec964db4780a73 --- /dev/null +++ b/src/hwp5/binmodel/tagid67_shape_component_curve.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_CURVE +from hwp5.dataio import N_ARRAY +from hwp5.dataio import UINT16 +from hwp5.binmodel._shared import Coord + + +class ShapeCurve(RecordModel): + ''' 4.2.9.2.7. 곡선 개체 ''' + + tagid = HWPTAG_SHAPE_COMPONENT_CURVE + + def attributes(cls): + ''' 표 98 곡선 개체 속성 ''' + yield N_ARRAY(UINT16, Coord), 'points' + # TODO: segment type + attributes = classmethod(attributes) diff --git a/src/hwp5/binmodel/tagid68_shape_component_ole.py b/src/hwp5/binmodel/tagid68_shape_component_ole.py new file mode 100644 index 0000000000000000000000000000000000000000..007a2b83b9d940801fb8dd4663887d1361dfa97d --- /dev/null +++ b/src/hwp5/binmodel/tagid68_shape_component_ole.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_OLE +from hwp5.dataio import Flags +from hwp5.dataio import UINT32 +from hwp5.dataio import INT32 +from hwp5.binmodel._shared import BinStorageId +from hwp5.binmodel._shared import BorderLine + + +class ShapeOLE(RecordModel): + ''' 4.2.9.5 OLE 개체 ''' + + tagid = HWPTAG_SHAPE_COMPONENT_OLE + + Flags = Flags(UINT32, + 0, 7, 'dvaspect', + 8, 'moniker', + # baseline: + # 0 means defaut (85%) + # 1 means 0% + # 101 means 100% + 9, 15, 'baseline') + + @classmethod + def attributes(cls): + yield cls.Flags, 'flags' + yield INT32, 'extent_x' + yield INT32, 'extent_y' + yield BinStorageId, 'storage_id' + yield BorderLine, 'border' diff --git a/src/hwp5/binmodel/tagid69_shape_component_picture.py b/src/hwp5/binmodel/tagid69_shape_component_picture.py new file mode 100644 index 0000000000000000000000000000000000000000..80a4bca63518c4c35c641fa455dd344a0eeb0ac6 --- /dev/null +++ b/src/hwp5/binmodel/tagid69_shape_component_picture.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_PICTURE +from hwp5.dataio import Struct +from hwp5.dataio import Flags +from hwp5.dataio import SHWPUNIT +from hwp5.dataio import UINT32 +from hwp5.dataio import UINT16 +from hwp5.dataio import INT8 +from hwp5.dataio import BYTE +from hwp5.binmodel._shared import Margin +from hwp5.binmodel._shared import Coord +from hwp5.binmodel._shared import BorderLine + + +class PictureInfo(Struct): + ''' 표 27 그림 정보 ''' + def attributes(): + yield INT8, 'brightness', + yield INT8, 'contrast', + yield BYTE, 'effect', + yield UINT16, 'bindata_id', + attributes = staticmethod(attributes) + + +class PictureEffect(Struct): + ''' 표 103 그림 효과 속성 ''' + + Flags = Flags(UINT32) + + @classmethod + def attributes(cls): + yield cls.Flags, 'flags' + # TODO + + +# HWPML에서의 이름 사용 +class ImageRect(Struct): + ''' 이미지 좌표 정보 ''' + + def attributes(): + yield Coord, 'p0' + yield Coord, 'p1' + yield Coord, 'p2' + yield Coord, 'p3' + attributes = staticmethod(attributes) + + +# HWPML에서의 이름 사용 +class ImageClip(Struct): + ''' 이미지 자르기 정보 ''' + + def attributes(): + yield SHWPUNIT, 'left', + yield SHWPUNIT, 'top', + yield SHWPUNIT, 'right', + yield SHWPUNIT, 'bottom', + attributes = staticmethod(attributes) + + +class ShapePicture(RecordModel): + ''' 4.2.9.4. 그림 개체 ''' + tagid = HWPTAG_SHAPE_COMPONENT_PICTURE + + def attributes(): + ''' 표 102 그림 개체 속성 ''' + yield BorderLine, 'border' + yield ImageRect, 'rect', + yield ImageClip, 'clip', + yield Margin, 'padding', + yield PictureInfo, 'picture', + yield dict(type=BYTE, name='border_transparency', version=(5, 0, 2, 2)) + yield dict(type=UINT32, name='instance_id', version=(5, 0, 2, 5)) + + # TODO: this choke on 5.0.3.3 d6dfac424525298119de54410c3b22d74aa85511 + # Strangely, its ok on 5.0.3.3 83a0ea1f9da368ff9f0b45f72e9306b776edf38a + # and other 5.0.3.0, 5.0.3.2 and 5.0.3.4 files. + yield dict(type=PictureEffect, name='picture_effect', + version=(5, 0, 3, 4)) + + attributes = staticmethod(attributes) diff --git a/src/hwp5/binmodel/tagid70_shape_component_container.py b/src/hwp5/binmodel/tagid70_shape_component_container.py new file mode 100644 index 0000000000000000000000000000000000000000..edc91b3e8d3dd42ab88f181983839929fbf0c52a --- /dev/null +++ b/src/hwp5/binmodel/tagid70_shape_component_container.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_CONTAINER + + +class ShapeContainer(RecordModel): + tagid = HWPTAG_SHAPE_COMPONENT_CONTAINER + # TODO diff --git a/src/hwp5/binmodel/tagid71_ctrl_data.py b/src/hwp5/binmodel/tagid71_ctrl_data.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c98295299d44f01d47e4a4b8383a797f147a9d --- /dev/null +++ b/src/hwp5/binmodel/tagid71_ctrl_data.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + + +from hwp5.binmodel._shared import RecordModelType +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_CTRL_DATA + + +control_data_models = dict() + + +class ControlDataType(RecordModelType): + + def __new__(mcs, name, bases, attrs): + cls = RecordModelType.__new__(mcs, name, bases, attrs) + if 'parent_model_type' in attrs: + parent_model_type = attrs['parent_model_type'] + assert parent_model_type not in control_data_models + control_data_models[parent_model_type] = cls + return cls + + +class ControlData(RecordModel, metaclass=ControlDataType): + ''' 4.2.8. 컨트롤 임의의 데이터 ''' + + tagid = HWPTAG_CTRL_DATA + + extension_types = control_data_models + + def get_extension_key(cls, context, model): + ''' parent model type ''' + parent = context.get('parent') + if parent: + return parent[1]['type'] + get_extension_key = classmethod(get_extension_key) diff --git a/src/hwp5/binmodel/tagid72_ctrl_eqedit.py b/src/hwp5/binmodel/tagid72_ctrl_eqedit.py new file mode 100644 index 0000000000000000000000000000000000000000..a461ef49ed3098f612f9e2cdb54c40a6fa2bc809 --- /dev/null +++ b/src/hwp5/binmodel/tagid72_ctrl_eqedit.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_CTRL_EQEDIT +from hwp5.dataio import UINT32 +from hwp5.dataio import Enum +from hwp5.dataio import Flags + + +class EqEdit(RecordModel): + ''' 4.2.9.3. 한글 스크립트 수식 (한글 97 방식 수식) ''' + tagid = HWPTAG_CTRL_EQEDIT + + ScriptScope = Enum(CHAR=0, LINE=1) + Flags = Flags(UINT32, + 0, ScriptScope, 'script_scope') + + @classmethod + def attributes(cls): + ''' 표 100 수식 개체 속성 ''' + + # TODO: followings are not tested against real files + if False: + yield + # yield cls.Flags, 'flags' + # yield BSTR, 'script' + # yield HWPUNIT, 'font_size' + # yield COLORREF, 'color' + # yield INT16, 'baseline' diff --git a/src/hwp5/binmodel/tagid74_shape_component_textart.py b/src/hwp5/binmodel/tagid74_shape_component_textart.py new file mode 100644 index 0000000000000000000000000000000000000000..d804a6481cf0a26ae179d57f7f06f084686cdc60 --- /dev/null +++ b/src/hwp5/binmodel/tagid74_shape_component_textart.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_TEXTART + + +class ShapeTextArt(RecordModel): + tagid = HWPTAG_SHAPE_COMPONENT_TEXTART + # TODO diff --git a/src/hwp5/binmodel/tagid75_form_object.py b/src/hwp5/binmodel/tagid75_form_object.py new file mode 100644 index 0000000000000000000000000000000000000000..1e4523e0f89be89e8df75a543e9dad30bdca5e70 --- /dev/null +++ b/src/hwp5/binmodel/tagid75_form_object.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_FORM_OBJECT + + +class FormObject(RecordModel): + tagid = HWPTAG_FORM_OBJECT + # TODO diff --git a/src/hwp5/binmodel/tagid76_memo_shape.py b/src/hwp5/binmodel/tagid76_memo_shape.py new file mode 100644 index 0000000000000000000000000000000000000000..0c2ef8f1716397d3f92eb020473e05aee06e85a1 --- /dev/null +++ b/src/hwp5/binmodel/tagid76_memo_shape.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_MEMO_SHAPE + + +class MemoShape(RecordModel): + tagid = HWPTAG_MEMO_SHAPE + # TODO diff --git a/src/hwp5/binmodel/tagid77_memo_list.py b/src/hwp5/binmodel/tagid77_memo_list.py new file mode 100644 index 0000000000000000000000000000000000000000..445694968bcbcacd56b302556bdeff9b8be1f7df --- /dev/null +++ b/src/hwp5/binmodel/tagid77_memo_list.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_MEMO_LIST + + +class MemoList(RecordModel): + tagid = HWPTAG_MEMO_LIST + # TODO diff --git a/src/hwp5/binmodel/tagid78_forbidden_char.py b/src/hwp5/binmodel/tagid78_forbidden_char.py new file mode 100644 index 0000000000000000000000000000000000000000..8994c79057d59e5c01cc9b6c0e7312a9c199d27d --- /dev/null +++ b/src/hwp5/binmodel/tagid78_forbidden_char.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_FORBIDDEN_CHAR + + +class ForbiddenChar(RecordModel): + tagid = HWPTAG_FORBIDDEN_CHAR + # TODO diff --git a/src/hwp5/binmodel/tagid79_chart_data.py b/src/hwp5/binmodel/tagid79_chart_data.py new file mode 100644 index 0000000000000000000000000000000000000000..529ea23fc900df704a6564e14c9a856de3eb4939 --- /dev/null +++ b/src/hwp5/binmodel/tagid79_chart_data.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_CHART_DATA + + +class ChartData(RecordModel): + tagid = HWPTAG_CHART_DATA + # TODO diff --git a/src/hwp5/binmodel/tagid99_shape_component_unknown.py b/src/hwp5/binmodel/tagid99_shape_component_unknown.py new file mode 100644 index 0000000000000000000000000000000000000000..2ec0fdca9a657bf73b3ebb7f53672d80cb340cea --- /dev/null +++ b/src/hwp5/binmodel/tagid99_shape_component_unknown.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from hwp5.binmodel._shared import RecordModel +from hwp5.tagids import HWPTAG_SHAPE_COMPONENT_UNKNOWN + + +class ShapeUnknown(RecordModel): + tagid = HWPTAG_SHAPE_COMPONENT_UNKNOWN + # TODO diff --git a/src/hwp5/binspec.py b/src/hwp5/binspec.py new file mode 100644 index 0000000000000000000000000000000000000000..eaa9dc3d14b347c924d648887da7effe0c2591bb --- /dev/null +++ b/src/hwp5/binspec.py @@ -0,0 +1,419 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +'''Generate HWPv5 Binary Spec Document + +Usage:: + + hwp5spec xml [--loglevel=] + hwp5spec -h | --help + hwp5spec --version + +Options:: + + -h --help Show this screen + --version Show version + --loglevel= Set log level [default: warning] +''' + +import logging +import xml.etree.ElementTree as ET + + +logger = logging.getLogger(__name__) + + +def define_enum_type(enum_type): + attrs = dict(name=enum_type.__name__) + if enum_type.scoping_struct: + attrs['scope'] = enum_type.scoping_struct.__name__ + elem = ET.Element('EnumType', attrs) + value_names = list((e, e.name) for e in enum_type.instances) + value_names.sort() + for value, name in value_names: + item = ET.Element('item', dict(name=name, value=str(value))) + elem.append(item) + return elem + + +def define_bitfield(bitgroup_name, bitgroup_desc): + attrs = dict(name=bitgroup_name, + lsb=str(bitgroup_desc.lsb), + msb=str(bitgroup_desc.msb)) + elem = ET.Element('BitField', attrs) + elem.append(reference_type(bitgroup_desc.valuetype)) + return elem + + +def define_flags_type(flags_type): + elem = ET.Element('FlagsType') + from hwp5.dataio import BitGroupDescriptor + base = ET.SubElement(elem, 'base') + base.append(reference_type(flags_type.basetype)) + bitgroups = flags_type.__dict__.items() + bitgroups = ((v.lsb, (k, v)) for k, v in bitgroups + if isinstance(v, BitGroupDescriptor)) + bitgroups = list(bitgroups) + bitgroups.sort() + bitgroups = reversed(bitgroups) + bitgroups = ((k, v) for lsb, (k, v) in bitgroups) + bitgroups = (define_bitfield(k, v) for k, v in bitgroups) + for bitgroup in bitgroups: + elem.append(bitgroup) + return elem + + +def define_fixed_array_type(array_type): + attrs = dict() + attrs['size'] = str(array_type.size) + elem = ET.Element('FixedArrayType', attrs) + item_type_elem = ET.SubElement(elem, 'item-type') + item_type_elem.append(reference_type(array_type.itemtype)) + return elem + + +def define_variable_length_array_type(array_type): + elem = ET.Element('VariableLengthArrayType') + count_type_elem = ET.SubElement(elem, 'count-type') + count_type_elem.append(reference_type(array_type.counttype)) + item_type_elem = ET.SubElement(elem, 'item-type') + item_type_elem.append(reference_type(array_type.itemtype)) + return elem + + +def define_x_array_type(t): + elem = ET.Element('XArrayType', dict(size=t.count_reference.__doc__)) + item_type_elem = ET.SubElement(elem, 'item-type') + item_type_elem.append(reference_type(t.itemtype)) + return elem + + +def define_selective_type(t): + elem = ET.Element('SelectiveType', + dict(selector=t.selector_reference.__doc__)) + for k, v in t.selections.items(): + sel = ET.SubElement(elem, 'selection', + dict(when=make_condition_value(k))) + sel.append(reference_type(v)) + return elem + + +def reference_type(t): + attrs = dict() + attrs['name'] = t.__name__ + attrs['meta'] = type(t).__name__ + elem = ET.Element('type-ref', attrs) + + from hwp5.dataio import EnumType + from hwp5.dataio import FlagsType + from hwp5.dataio import FixedArrayType + from hwp5.dataio import X_ARRAY + from hwp5.dataio import VariableLengthArrayType + from hwp5.dataio import SelectiveType + if isinstance(t, EnumType): + if t.scoping_struct: + elem.attrib['scope'] = t.scoping_struct.__name__ + elif isinstance(t, FlagsType): + elem.append(define_flags_type(t)) + elif isinstance(t, FixedArrayType): + elem.append(define_fixed_array_type(t)) + elif isinstance(t, X_ARRAY): + elem.append(define_x_array_type(t)) + elif isinstance(t, VariableLengthArrayType): + elem.append(define_variable_length_array_type(t)) + elif isinstance(t, SelectiveType): + elem.append(define_selective_type(t)) + return elem + + +def referenced_types_by_member(member): + t = member.get('type') + if t: + yield t + for x in direct_referenced_types(t): + yield x + + +def define_member(struct_type, member): + attrs = dict(name=member['name']) + + version = member.get('version') + if version: + version = '.'.join(str(x) for x in version) + attrs['version'] = version + + elem = ET.Element('member', attrs) + + t = member.get('type') + if t: + elem.append(reference_type(t)) + + condition = member.get('condition') + if condition: + condition = condition.__doc__ or condition.__name__ or '' + condition = condition.strip() + condition_elem = ET.Element('condition') + condition_elem.text = condition + elem.append(condition_elem) + + return elem + + +def direct_referenced_types(t): + from hwp5.dataio import FlagsType + from hwp5.dataio import FixedArrayType + from hwp5.dataio import X_ARRAY + from hwp5.dataio import VariableLengthArrayType + from hwp5.dataio import StructType + from hwp5.dataio import SelectiveType + if isinstance(t, FlagsType): + for k, desc in t.bitfields.items(): + yield desc.valuetype + elif isinstance(t, FixedArrayType): + yield t.itemtype + elif isinstance(t, X_ARRAY): + yield t.itemtype + elif isinstance(t, VariableLengthArrayType): + yield t.counttype + yield t.itemtype + elif isinstance(t, StructType): + if 'members' in t.__dict__: + for member in t.members: + for x in referenced_types_by_member(member): + yield x + elif isinstance(t, SelectiveType): + for selection in t.selections.values(): + yield selection + + +def referenced_types_by_struct_type(t): + if 'members' in t.__dict__: + for member in t.members: + for x in referenced_types_by_member(member): + yield x + + +def extension_sort_key(cls): + import inspect + key = inspect.getmro(cls) + key = list(x.__name__ for x in key) + key = tuple(reversed(key)) + return key + + +def sort_extensions(extension_types): + extension_types = extension_types.items() + extension_types = list((extension_sort_key(cls), (k, cls)) + for k, cls in extension_types) + extension_types.sort() + extension_types = ((k, cls) for sort_key, (k, cls) in extension_types) + return extension_types + + +def extensions_of_tag_model(tag_model): + extension_types = getattr(tag_model, 'extension_types', None) + if extension_types: + extension_types = sort_extensions(extension_types) + key_condition = getattr(tag_model, 'get_extension_key', None) + key_condition = key_condition.__doc__.strip() + for key, extension_type in extension_types: + yield (key_condition, key), extension_type + + +def define_struct_type(t): + elem = ET.Element('StructType', + dict(name=t.__name__)) + for extend in get_extends(t): + elem.append(define_extends(extend)) + + if 'members' in t.__dict__: + for member in t.members: + elem.append(define_member(t, member)) + return elem + + +def define_tag_model(tag_id): + from hwp5.tagids import tagnames + from hwp5.binmodel import tag_models + tag_name = tagnames[tag_id] + tag_model = tag_models[tag_id] + elem = ET.Element('TagModel', + dict(tag_id=str(tag_id), + name=tag_name)) + elem.append(define_base_type(tag_model)) + for (name, value), extension_type in extensions_of_tag_model(tag_model): + elem.append(define_extension(extension_type, + tag_model, + name, + value)) + return elem + + +def define_base_type(t): + elem = ET.Element('base', dict(name=t.__name__)) + return elem + + +def make_condition_value(value): + from hwp5.dataio import EnumType + if isinstance(value, tuple): + value = tuple(make_condition_value(v) for v in value) + return '('+', '.join(value)+')' + elif isinstance(type(value), EnumType): + return repr(value) + elif isinstance(value, type): + return value.__name__ + else: + return str(value) + + +def define_extension(t, up_to_type, name, value): + attrs = dict(name=t.__name__) + elem = ET.Element('extension', attrs) + condition = ET.Element('condition') + condition.text = name + ' == ' + make_condition_value(value) + elem.append(condition) + + for extend in get_extends(t, up_to_type): + elem.append(define_extends(extend)) + + if 'members' in t.__dict__: + for member in t.members: + elem.append(define_member(t, member)) + return elem + + +def get_extends(t, up_to_type=None): + def take_up_to(up_to_type, mro): + for t in mro: + yield t + if t is up_to_type: + return + from itertools import takewhile + + import inspect + mro = inspect.getmro(t) + mro = mro[1:] # exclude self + # mro = take_up_to(up_to_type, mro) + mro = takewhile(lambda cls: cls is not up_to_type, mro) + mro = (t for t in mro if 'members' in t.__dict__) + mro = list(mro) + mro = reversed(mro) + return mro + + +def define_extends(t): + attrs = dict(name=t.__name__) + elem = ET.Element('extends', attrs) + return elem + + +def define_primitive_type(t): + attrs = dict(name=t.__name__) + fixed_size = getattr(t, 'fixed_size', None) + if fixed_size: + attrs['size'] = str(fixed_size) + + elem = ET.Element('PrimitiveType', attrs) + + binfmt = getattr(t, 'binfmt', None) + if binfmt: + binfmt_elem = ET.Element('binfmt') + binfmt_elem.text = binfmt + elem.append(binfmt_elem) + return elem + + +def main(): + from docopt import docopt + from hwp5 import __version__ + from hwp5.proc import rest_to_docopt + + doc = rest_to_docopt(__doc__) + args = docopt(doc, version=__version__) + + if '--loglevel' in args: + loglevel = args['--loglevel'].lower() + loglevel = dict(error=logging.ERROR, + warning=logging.WARNING, + info=logging.INFO, + debug=logging.DEBUG).get(loglevel, logging.WARNING) + logger.setLevel(loglevel) + logger.addHandler(logging.StreamHandler()) + + from hwp5 import binmodel + import sys + + enum_types = set() + extensions = set() + struct_types = set() + primitive_types = set() + + root = ET.Element('binspec', dict(version=__version__)) + for tag_id, tag_model in binmodel.tag_models.items(): + logger.debug('TAG_MODEL: %s', tag_model.__name__) + root.append(define_tag_model(tag_id)) + struct_types.add(tag_model) + + from hwp5.dataio import EnumType + from hwp5.dataio import StructType + from hwp5.dataio import PrimitiveType + for t in referenced_types_by_struct_type(tag_model): + if isinstance(t, EnumType): + enum_types.add(t) + if isinstance(t, StructType): + struct_types.add(t) + if isinstance(t, PrimitiveType): + logger.debug('- PrimitiveType: %s', t.__name__) + primitive_types.add(t) + + for _, t in extensions_of_tag_model(tag_model): + extensions.add(t) + + for t in extensions: + struct_types.add(t) + for extends in get_extends(t): + struct_types.add(extends) + + for struct_type in struct_types: + for t in referenced_types_by_struct_type(struct_type): + if isinstance(t, EnumType): + enum_types.add(t) + if isinstance(t, PrimitiveType): + primitive_types.add(t) + + enum_types = list((t.__name__, t) for t in enum_types) + enum_types.sort() + enum_types = (t for name, t in enum_types) + for t in enum_types: + root.append(define_enum_type(t)) + + struct_types = list((t.__name__, t) for t in struct_types) + struct_types.sort() + struct_types = (t for name, t in struct_types) + for t in struct_types: + root.append(define_struct_type(t)) + + primitive_types = list((t.__name__, t) for t in primitive_types) + primitive_types.sort() + primitive_types = (t for name, t in primitive_types) + for t in primitive_types: + root.append(define_primitive_type(t)) + + doc = ET.ElementTree(root) + doc.write(sys.stdout, 'utf-8') diff --git a/src/hwp5/bintype.py b/src/hwp5/bintype.py new file mode 100644 index 0000000000000000000000000000000000000000..1e65910e91e4a4a80cf7b0011c7b21fbac2e9296 --- /dev/null +++ b/src/hwp5/bintype.py @@ -0,0 +1,523 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from collections import deque +from pprint import pprint +import logging +import struct +import sys + +from .dataio import BSTR +from .dataio import FixedArrayType +from .dataio import FlagsType +from .dataio import ParseError +from .dataio import SelectiveType +from .dataio import StructType +from .dataio import VariableLengthArrayType +from .dataio import X_ARRAY +from .dataio import readn +from .treeop import STARTEVENT, ENDEVENT +from .treeop import iter_subevents + + +logger = logging.getLogger(__name__) + + +def bintype_map_events(bin_item): + bin_type = bin_item['type'] + if isinstance(bin_type, StructType): + yield STARTEVENT, bin_item + if hasattr(bin_type, 'members'): + for member in bin_type.members: + for x in bintype_map_events(member): + yield x + yield ENDEVENT, bin_item + elif isinstance(bin_type, FixedArrayType): + yield STARTEVENT, bin_item + item = dict(type=bin_type.itemtype) + for x in bintype_map_events(item): + yield x + yield ENDEVENT, bin_item + elif isinstance(bin_type, VariableLengthArrayType): + yield STARTEVENT, bin_item + item = dict(type=bin_type.itemtype) + for x in bintype_map_events(item): + yield x + yield ENDEVENT, bin_item + elif isinstance(bin_type, X_ARRAY): + yield STARTEVENT, bin_item + item = dict(type=bin_type.itemtype) + for x in bintype_map_events(item): + yield x + yield ENDEVENT, bin_item + elif isinstance(bin_type, SelectiveType): + yield STARTEVENT, bin_item + for k, v in bin_type.selections.items(): + item = dict(bin_item, select_when=k, type=v) + for x in bintype_map_events(item): + yield x + yield ENDEVENT, bin_item + elif isinstance(bin_type, FlagsType): + # TODO: this should be done in model definitions + # bin_type: used in binary reading + # flags_type: binary value to flags type + bin_item['bin_type'] = bin_type.basetype + bin_item['flags_type'] = bin_type + yield None, bin_item + else: + yield None, bin_item + + +def filter_with_version(events, version): + for ev, item in events: + required_version = item.get('version') + if required_version is not None and version < required_version: + # just consume and skip this tree + logger.debug('skip following: (required version: %s)', + required_version) + logger.debug(' %s', (ev, item)) + if ev is STARTEVENT: + for x in iter_subevents(events): + pass + continue + yield ev, item + + +def make_items_immutable(events): + stack = [] + for ev, item in events: + if ev is None: + item = tuple(sorted(item.items())) + elif ev is STARTEVENT: + item = tuple(sorted(item.items())) + stack.append(item) + elif ev is ENDEVENT: + item = stack.pop() + yield ev, item + + +def compile_type_definition(bin_item): + events = bintype_map_events(bin_item) + events = make_items_immutable(events) + return tuple(events) + + +master_typedefs = dict() + + +def get_compiled_typedef(type): + if type not in master_typedefs: + logger.info('compile typedef of %s', type) + typedef_events = compile_type_definition(dict(type=type)) + master_typedefs[type] = typedef_events + return master_typedefs[type] + + +versioned_typedefs = dict() + + +def get_compiled_typedef_with_version(type, version): + if version not in versioned_typedefs: + versioned_typedefs[version] = typedefs = dict() + typedefs = versioned_typedefs[version] + + if type not in typedefs: + logger.info('filter compiled typedef of %s with version %s', + type, version) + typedef_events = get_compiled_typedef(type) + events = static_to_mutable(typedef_events) + events = filter_with_version(events, version) + events = make_items_immutable(events) + events = tuple(events) + typedefs[type] = events + + return typedefs[type] + + +class ERROREVENT(object): + pass + + +def static_to_mutable(events): + stack = [] + for ev, item in events: + if ev is None: + item = dict(item) + elif ev is STARTEVENT: + item = dict(item) + stack.append(item) + elif ev is ENDEVENT: + item = stack.pop() + yield ev, item + + +def pop_subevents(events_deque): + level = 0 + while len(events_deque) > 0: + event, item = events_deque.popleft() + yield event, item + if event is STARTEVENT: + level += 1 + elif event is ENDEVENT: + if level > 0: + level -= 1 + else: + return + + +def resolve_typedefs(typedef_events, context): + + array_types = (X_ARRAY, VariableLengthArrayType, FixedArrayType) + + stack = [] + selective_stack = [] + + events = static_to_mutable(typedef_events) + events = deque(events) + while len(events) > 0: + ev, item = events.popleft() + if isinstance(item['type'], SelectiveType): + if ev is STARTEVENT: + parent_struct = stack[-1] + struct_value = parent_struct['value'] + selector_reference = item['type'].selector_reference + select_key = selector_reference(context, struct_value) + logger.debug('select_key: %s', select_key) + item['select_key'] = select_key + selective_stack.append(item) + elif ev is ENDEVENT: + selective_stack.pop() + else: + assert False + elif 'select_when' in item: + assert ev in (None, STARTEVENT) + select_key = selective_stack[-1]['select_key'] + select_when = item.pop('select_when') + if select_when != select_key: + # just consume and skip this tree + logger.debug('skip following: (select key %r != %r)', + select_key, select_when) + logger.debug(' %s', (ev, item)) + if ev is STARTEVENT: + for x in pop_subevents(events): + logger.debug(' %s', x) + pass + continue + logger.debug('selected for: %r', select_when) + events.appendleft((ev, item)) + elif 'condition' in item: + assert ev in (STARTEVENT, None) + condition = item.pop('condition') + parent_struct = stack[-1] + if not condition(context, parent_struct['value']): + # just consume and skip this tree + logger.debug('skip following: (not matched condition: %s)', + condition) + logger.debug(' %s', (ev, item)) + if ev is STARTEVENT: + for x in pop_subevents(events): + logger.debug(' %s', x) + pass + continue + events.appendleft((ev, item)) + elif isinstance(item['type'], array_types) and 'count' not in item: + assert ev is STARTEVENT + + if isinstance(item['type'], X_ARRAY): + parent_struct = stack[-1] + struct_value = parent_struct['value'] + + count_reference = item['type'].count_reference + count = count_reference(context, struct_value) + elif isinstance(item['type'], VariableLengthArrayType): + count = dict(type=item['type'].counttype, dontcollect=True) + yield None, count + count = count['value'] + elif isinstance(item['type'], FixedArrayType): + count = item['type'].size + item['count'] = count + + subevents = list(pop_subevents(events)) + endevent = subevents[-1] + subevents = subevents[:-1] + + def clone(events): + stack = [] + for ev, item in events: + if ev in (STARTEVENT, None): + item = dict(item) + if ev is STARTEVENT: + stack.append(item) + else: + item = stack.pop() + yield ev, item + + events.appendleft(endevent) + for _ in range(0, count): + cloned = list(clone(subevents)) + events.extendleft(reversed(cloned)) + events.appendleft((ev, item)) + else: + if ev is STARTEVENT: + stack.append(item) + elif ev is ENDEVENT: + stack.pop() + yield ev, item + + +def evaluate_bin_values(events): + for ev, item in events: + if 'flags_type' in item: + flags_type = item['flags_type'] + assert isinstance(flags_type, FlagsType) + item['value'] = flags_type(item['value']) + yield ev, item + + +def construct_composite_values(events): + + stack = [] + + for ev, item in events: + if ev is STARTEVENT: + if isinstance(item['type'], StructType): + item['value'] = dict() + elif isinstance(item['type'], (X_ARRAY, VariableLengthArrayType, + FixedArrayType)): + item['value'] = list() + else: + assert False + stack.append(item) + elif ev in (None, ENDEVENT): + if ev is ENDEVENT: + item = stack.pop() + if isinstance(item['type'], FixedArrayType): + item['value'] = tuple(item['value']) + + if len(stack) > 0: + if not item.get('dontcollect', False): + if isinstance(stack[-1]['type'], StructType): + # reduce a struct member into struct value + stack[-1]['value'][item['name']] = item['value'] + elif isinstance(stack[-1]['type'], + (X_ARRAY, + VariableLengthArrayType, + FixedArrayType)): + stack[-1]['value'].append(item['value']) + yield ev, item + + +def log_events(events, log_fn): + for ev, item in events: + if ev in (STARTEVENT, ENDEVENT): + fmt = ['%s:'] + val = [ev.__name__] + else: + fmt = [' %04x:'] + val = [item['bin_offset']] + + fmt.append('%s') + val.append(item['type'].__name__) + + if 'name' in item: + fmt.append('%r') + val.append(str(item['name'])) + + if 'value' in item and ev is None: + fmt.append('%r') + val.append(item['value']) + + if 'exception' in item: + fmt.append('-- Exception: %r') + val.append(item['exception']) + + log_fn(' '.join(fmt), *val) + yield ev, item + + +def eval_typedef_events(typedef_events, context, resolve_values): + events = static_to_mutable(typedef_events) + events = resolve_typedefs(events, context) + events = resolve_values(events) + events = evaluate_bin_values(events) + events = construct_composite_values(events) + events = log_events(events, logger.debug) + return events + + +def resolve_values_from_stream(stream): + def resolve_values(events): + for ev, item in events: + if ev is None: + item['bin_offset'] = stream.tell() + try: + item['value'] = resolve_value_from_stream(item, stream) + except Exception as e: + item['exception'] = e + ev = ERROREVENT + yield ev, item + return resolve_values + + +def resolve_value_from_stream(item, stream): + from hwp5.binmodel import ParaTextChunks + from hwp5.binmodel import CHID + if 'bin_type' in item: + item_type = item['bin_type'] + else: + item_type = item['type'] + if hasattr(item_type, 'binfmt'): + binfmt = item_type.binfmt + binsize = struct.calcsize(binfmt) + bytes = readn(stream, binsize) + unpacked = struct.unpack(binfmt, bytes) + return unpacked[0] + elif item_type is CHID: + bytes = readn(stream, 4) + return CHID.decode(bytes) + elif item_type is BSTR: + return BSTR.read(stream) + elif item_type is ParaTextChunks: + return ParaTextChunks.read(stream) + elif hasattr(item_type, 'fixed_size'): + bytes = readn(stream, item_type.fixed_size) + if hasattr(item_type, 'decode'): + return item_type.decode(bytes) + return bytes + else: + assert hasattr(item_type, 'read') + logger.warning('%s: item type relies on its read() to resolve a value', + item_type.__name__) + return item_type.read(stream) + + +def resolve_type_events(type, context, resolve_values): + # get typedef events: if current version is specified in the context, + # get version specific typedef + if 'version' in context: + version = context['version'] + events = get_compiled_typedef_with_version(type, version) + else: + events = get_compiled_typedef(type) + + # evaluate with context/stream + return eval_typedef_events(events, context, resolve_values) + + +def read_type_events(type, context, stream): + resolve_values = resolve_values_from_stream(stream) + events = resolve_type_events(type, context, resolve_values) + for ev, item in events: + yield ev, item + if ev is ERROREVENT: + e = item['exception'] + msg = 'can\'t parse %s' % type + pe = ParseError(msg) + pe.cause = e + pe.path = context.get('path') + pe.treegroup = context.get('treegroup') + pe.record = context.get('record') + pe.offset = item.get('bin_offset') + raise pe + + +def read_type_item(type, context, stream, binevents=None): + if binevents is None: + binevents = [] + try: + binevents.extend(read_type_events(type, context, stream)) + except ParseError as e: + e.binevents = binevents + raise + return binevents[-1][1] + + +def read_type(type, context, stream, binevents=None): + item = read_type_item(type, context, stream, binevents) + return item['value'] + + +def dump_events(events): + def prefix_level(event_prefixed_items): + level = 0 + for ev, item in event_prefixed_items: + if ev is STARTEVENT: + yield level, item + level += 1 + elif ev is ENDEVENT: + level -= 1 + else: + yield level, item + + def item_to_dict(events): + for ev, item in events: + yield ev, dict(item) + + def type_to_string(events): + for ev, item in events: + item['type'] = item['type'].__name__ + yield ev, item + + def condition_to_string(events): + for ev, item in events: + if 'condition' in item: + item['condition'] = item['condition'].__name__ + yield ev, item + + events = item_to_dict(events) + events = type_to_string(events) + events = condition_to_string(events) + for level, item in prefix_level(events): + indents = '' + if level > 0: + if level > 1: + indents = ' ' * (level - 2) + ' ' + indents += '- ' + print('{}{}'.format(indents, item)) + + +def main(): + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler()) + + import hwp5.binmodel + name = sys.argv[1] + type = getattr(hwp5.binmodel, name) + typedef_events = compile_type_definition(dict(type=type)) + pprint(typedef_events) + + context = {} + + def resolve_values(events): + for ev, item in events: + if ev is None: + print('') + for k, v in sorted(item.items()): + print('- {} : {}'.format(k, v)) + value = raw_input('>> ') + value = eval(value) + if isinstance(item['type'], FlagsType): + value = item['type'](value) + item['value'] = value + yield ev, item + events = eval_typedef_events(typedef_events, context, resolve_values) + for ev, item in events: + print('{} {}'.format(ev, item)) diff --git a/src/hwp5/charsets.py b/src/hwp5/charsets.py new file mode 100644 index 0000000000000000000000000000000000000000..00a47a8876374bd1bfe0f2aee5095f5c263c2673 --- /dev/null +++ b/src/hwp5/charsets.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + +def get_unichr_lang(uch): + # Hangul Syllables + # U+AC00..U+D7AF + # Hangul Jamo Extended-B + # U+D7B0..D7FF + if u'\uAC00' <= uch <= u'\uD7FF': + return 'ko' + + # Control Characters and Numbers in Basic Latin + if u'\u0000' <= uch <= u'\u0040': + return None + + # Hangul Jamo + if u'\u1100' <= uch <= u'\u11FF': + return 'ko' + + # Hangul Compatibility Jamo + if u'\u3130' <= uch <= u'\u318F': + return 'ko' + + # Hangul Jamo Extended-A + if u'\uA960' <= uch <= u'\uA97F': + return 'ko' + + # -- en -- + + # Basic Latin, Latin Extended-A/B + if u'\u0040' <= uch <= u'\u024F': + return 'en' + + # -- cn -- + + # CJK Unified Ideographs + # U+4E00..U+9FFF + if u'\u4E00' <= uch <= u'\u9FFF': + return 'cn' + + # CJK Radicals Supplement + # U+2E80..U+2EFF + # Kangxi Radicals + # U+2F00..U+2FDF + if u'\u2E80' <= uch <= u'\u2FDF': + return 'cn' + + # CJK Unified Ideographs Extension A + # U+3400..U+4DBF + if u'\u3400' <= uch <= u'\u4DBF': + return 'cn' + + # CJK Compatibility Ideographs + # U+F900..U+FAFF + if u'\uF900' <= uch <= u'\uFAFF': + return 'cn' + + # CJK Symbols and Punctuation + # U+3000..U+303F + if u'\u3000' <= uch <= u'\u303F': + return 'symbol' + + # -- jp -- + + # Hiragana + Katakana + if u'\u3040' <= uch <= u'\u30FF': + return 'jp' + + return 'other' + + +def tokenize_unicode_by_lang(text): + buf = [] + buf_lang = None + for uch in text: + lang = get_unichr_lang(uch) + if lang is None: + buf.append(uch) + continue + if buf_lang == lang or buf_lang is None: + buf_lang = lang + buf.append(uch) + continue + else: + yield buf_lang or 'ko', ''.join(buf) + buf = [uch] + buf_lang = lang + if buf: + yield buf_lang or 'ko', ''.join(buf) diff --git a/src/hwp5/cli.py b/src/hwp5/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..1fb921e747299ada1af93d9fc6abbb209c4eb480 --- /dev/null +++ b/src/hwp5/cli.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import logging +import os + +from .plat import xsltproc +from .plat import xmllint +from .storage import ExtraItemStorage +from .storage import open_storage_item +from .storage.ole import OleStorage +from .xmlmodel import Hwp5File + + +def init_logger(args): + logger = logging.getLogger('hwp5') + try: + from colorlog import ColoredFormatter + except ImportError: + formatter = None + else: + formatter = ColoredFormatter( + '%(log_color)s%(levelname)-8s%(reset)s %(blue)s%(message)s', + datefmt=None, reset=True, + log_colors={ + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'red' + } + ) + + loglevel = args.loglevel + if not loglevel: + loglevel = os.environ.get('PYHWP_LOGLEVEL') + if loglevel: + levels = dict(debug=logging.DEBUG, + info=logging.INFO, + warning=logging.WARNING, + error=logging.ERROR, + critical=logging.CRITICAL) + loglevel = loglevel.lower() + loglevel = levels.get(loglevel, logging.WARNING) + logger.setLevel(loglevel) + + logfile = args.logfile + if not logfile: + logfile = os.environ.get('PYHWP_LOGFILE') + if logfile: + handler = logging.FileHandler(logfile) + else: + handler = logging.StreamHandler() + if formatter: + handler.setFormatter(formatter) + logger.addHandler(handler) + + +def init_with_environ(): + if 'PYHWP_XSLTPROC' in os.environ: + xsltproc.executable = os.environ['PYHWP_XSLTPROC'] + xsltproc.enable() + + if 'PYHWP_XMLLINT' in os.environ: + xmllint.executable = os.environ['PYHWP_XMLLINT'] + xmllint.enable() + + +def open_hwpfile(args): + filename = args.hwp5file + if args.ole: + hwpfile = OleStorage(filename) + else: + hwpfile = Hwp5File(filename) + if args.vstreams: + hwpfile = ExtraItemStorage(hwpfile) + return hwpfile + + +def parse_recordstream_name(hwpfile, streamname): + if streamname == 'docinfo': + return hwpfile.docinfo + segments = streamname.split('/') + if len(segments) == 2: + if segments[0] == 'bodytext': + try: + idx = int(segments[1]) + return hwpfile.bodytext.section(idx) + except ValueError: + pass + return open_storage_item(hwpfile, streamname) diff --git a/src/hwp5/compressed.py b/src/hwp5/compressed.py new file mode 100644 index 0000000000000000000000000000000000000000..033bfeda7da0cd53ba38e4cbd9227cca84d64cbb --- /dev/null +++ b/src/hwp5/compressed.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from io import BytesIO +import codecs +import zlib + +from .utils import GeneratorReader + + +class ZLibIncrementalDecoder(codecs.IncrementalDecoder): + def __init__(self, errors='strict', wbits=15): + assert errors == 'strict' + self.errors = errors + self.wbits = wbits + self.reset() + + def decode(self, input, final=False): + c = self.decompressobj.decompress(input) + if final: + c += self.decompressobj.flush() + return c + + def reset(self): + self.decompressobj = zlib.decompressobj(self.wbits) + + +def decompress_gen(source, bufsize=4096): + dec = ZLibIncrementalDecoder(wbits=-15) + exausted = False + while not exausted: + input = source.read(bufsize) + if len(input) < bufsize: + exausted = True + yield dec.decode(input, exausted) + + +def decompress_experimental(source, bufsize=4096): + ''' decompress inputstream + + stream: a file-like readable + returns a file-like readable + ''' + return GeneratorReader(decompress_gen(source, bufsize)) + + +def decompress(stream): + ''' decompress inputstream + + stream: a file-like readable + returns a file-like readable + ''' + + # #176 참고. #175의 임시방편을 사용한다. + compressed_maybe = stream.read() + try: + decompressed = zlib.decompress(compressed_maybe, -15) # without gzip header + except zlib.error: + return BytesIO(compressed_maybe) + else: + return BytesIO(decompressed) diff --git a/src/hwp5/dataio.py b/src/hwp5/dataio.py new file mode 100644 index 0000000000000000000000000000000000000000..3b29efd75146b84f4043d637981791b0528ff81f --- /dev/null +++ b/src/hwp5/dataio.py @@ -0,0 +1,642 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# + +from array import array +from binascii import b2a_hex +from itertools import takewhile +import inspect +import logging +import struct +import sys + + + + +# Python 3 Compatible Definitions +long = int +unicode = str +basestring = str + + + +logger = logging.getLogger(__name__) + + +class Eof(Exception): + def __init__(self, *args): + self.args = args + + +class OutOfData(Exception): + pass + + +def readn(f, size): + data = f.read(size) + datasize = len(data) + if datasize == 0: + try: + pos = f.tell() + except IOError: + pos = '' + raise Eof(pos) + return data + + +class PrimitiveType(type): + def __new__(mcs, name, bases, attrs): + basetype = bases[0] + attrs['basetype'] = basetype + attrs.setdefault('__slots__', []) + + never_instantiate = attrs.pop('never_instantiate', True) + if never_instantiate and '__new__' not in attrs: + def __new__(cls, *args, **kwargs): + return basetype.__new__(basetype, *args, **kwargs) + attrs['__new__'] = __new__ + + if 'binfmt' in attrs: + binfmt = attrs['binfmt'] + fixed_size = struct.calcsize(binfmt) + + if 'fixed_size' in attrs: + assert fixed_size == attrs['fixed_size'] + else: + attrs['fixed_size'] = fixed_size + + if 'decode' not in attrs: + def decode(cls, s): + return struct.unpack(binfmt, s)[0] + attrs['decode'] = classmethod(decode) + + if 'fixed_size' in attrs and 'read' not in attrs: + fixed_size = attrs['fixed_size'] + + def read(cls, f): + s = readn(f, fixed_size) + decode = getattr(cls, 'decode', None) + if decode: + return decode(s) + return s + attrs['read'] = classmethod(read) + + return type.__new__(mcs, str(name), bases, attrs) + + +def Primitive(name, basetype, binfmt, **attrs): + attrs['binfmt'] = binfmt + return PrimitiveType(name, (basetype,), attrs) + + +UINT32 = Primitive('UINT32', long, ' 2: + lsb, msb, valuetype = bitgroup + else: + lsb, msb = bitgroup + else: + lsb = msb = bitgroup + self.lsb = lsb + self.msb = msb + self.valuetype = valuetype + + def __get__(self, instance, owner): + valuetype = self.valuetype + return valuetype(self.get_int_value(instance)) + + def get_int_value(self, instance): + lsb = self.lsb + msb = self.msb + return int(instance >> lsb) & int((2 ** (msb + 1 - lsb)) - 1) + + +class FlagsType(type): + def __new__(mcs, name, bases, attrs): + basetype = attrs.pop('basetype') + bases = (basetype.basetype,) + + bitgroups = dict((k, BitGroupDescriptor(v)) + for k, v in attrs.items()) + + attrs = dict(bitgroups) + attrs['__name__'] = name + attrs['__slots__'] = () + + attrs['basetype'] = basetype + attrs['bitfields'] = bitgroups + + def dictvalue(self): + return dict((name, getattr(self, name)) + for name in bitgroups.keys()) + attrs['dictvalue'] = dictvalue + + return type.__new__(mcs, str(name), bases, attrs) + + +def _lex_flags_args(args): + for idx, arg in enumerate(args): + while True: + pushback = (yield idx, arg) + if pushback is arg: + yield + continue + break + + +def _parse_flags_args(args): + args = _lex_flags_args(args) + try: + idx = -1 + while True: + # lsb + try: + idx, lsb = next(args) + except StopIteration: + break + assert isinstance(lsb, int), ('#%d arg is expected to be' + 'a int: %s' % (idx, repr(lsb))) + + # msb (default: lsb) + idx, x = next(args) + if isinstance(x, int): + msb = x + elif isinstance(x, (type, basestring)): + args.send(x) # pushback + msb = lsb + else: + assert False, '#%d arg is unexpected type: %s' % (idx, repr(x)) + + # type (default: int) + idx, x = next(args) + assert not isinstance(x, int), ('#%d args is expected to be a type' + 'or name: %s' % (idx, repr(x))) + if isinstance(x, type): + t = x + elif isinstance(x, basestring): + args.send(x) # pushback + t = int + else: + assert False, '#%d arg is unexpected type: %s' % (idx, repr(x)) + + # name + idx, name = next(args) + assert isinstance(name, basestring), ('#%d args is expected to be ' + 'a name: %s' % (idx, + repr(name))) + + yield name, (lsb, msb, t) + + except StopIteration: + assert False, '#%d arg is expected' % (idx + 1) + + +def Flags(basetype, *args): + attrs = dict(_parse_flags_args(args)) + attrs['basetype'] = basetype + return FlagsType('Flags', (), attrs) + + +enum_type_instances = set() + + +class EnumType(type): + def __new__(mcs, enum_type_name, bases, attrs): + items = attrs.pop('items') + moreitems = attrs.pop('moreitems') + + populate_state = [1] + + names_by_instance = dict() + instances_by_name = dict() + instances_by_value = dict() + + def __new__(cls, value, name=None): + if isinstance(value, cls): + return value + + if name is None: + if value in instances_by_value: + return instances_by_value[value] + else: + logger.warning('undefined %s value: %s', + cls.__name__, value) + logger.warning('defined name/values: %s', + str(instances_by_name)) + return int.__new__(cls, value) + + if len(populate_state) == 0: + raise TypeError() + + assert name not in instances_by_name + + if value in instances_by_value: + self = instances_by_value[value] + else: + # define new instance of this enum + self = int.__new__(cls, value) + instances_by_value[value] = self + names_by_instance[self] = name + + instances_by_name[name] = self + return self + attrs['__new__'] = __new__ + attrs['__slots__'] = [] + attrs['scoping_struct'] = None + + class NameDescriptor(object): + def __get__(self, instance, owner): + if instance is None: + return owner.__name__ + return names_by_instance.get(instance) + + attrs['name'] = NameDescriptor() + + def __repr__(self): + enum_name = type(self).__name__ + item_name = self.name + if item_name is not None: + return enum_name + '.' + item_name + else: + return '%s(%d)' % (enum_name, self) + attrs['__repr__'] = __repr__ + + cls = type.__new__(mcs, str(enum_type_name), bases, attrs) + + for v, k in enumerate(items): + setattr(cls, k, cls(v, k)) + for k, v in moreitems.items(): + setattr(cls, k, cls(v, k)) + + cls.names = set(instances_by_name.keys()) + cls.instances = set(names_by_instance.keys()) + + # no more population + populate_state.pop() + + enum_type_instances.add(cls) + return cls + + def __init__(cls, *args, **kwargs): + pass + + +def Enum(*items, **moreitems): + attrs = dict(items=items, moreitems=moreitems) + return EnumType('Enum', (int,), attrs) + + +class CompoundType(type): + def __new__(mcs, name, bases, attrs): + return type.__new__(mcs, str(name), bases, attrs) + + +class ArrayType(CompoundType): + def __init__(self, *args, **kwargs): + pass + + +class FixedArrayType(ArrayType): + + classes = dict() + + def __new__(mcs, itemtype, size): + key = itemtype, size + + cls = mcs.classes.get(key) + if cls is not None: + return cls + + attrs = dict(itemtype=itemtype, size=size) + name = 'ARRAY(%s,%s)' % (itemtype.__name__, size) + cls = ArrayType.__new__(mcs, str(name), (tuple,), attrs) + mcs.classes[key] = cls + return cls + + +ARRAY = FixedArrayType + + +class VariableLengthArrayType(ArrayType): + + classes = dict() + + def __new__(mcs, counttype, itemtype): + key = counttype, itemtype + + cls = mcs.classes.get(key) + if cls is not None: + return cls + + attrs = dict(itemtype=itemtype, counttype=counttype) + name = 'N_ARRAY(%s,%s)' % (counttype.__name__, itemtype.__name__) + cls = ArrayType.__new__(mcs, str(name), (list,), attrs) + mcs.classes[key] = cls + return cls + + +N_ARRAY = VariableLengthArrayType + + +def ref_member(member_name): + def fn(context, values): + return values[member_name] + fn.__doc__ = member_name + return fn + + +def ref_member_flag(member_name, bitfield_name): + def fn(context, values): + return getattr(values[member_name], bitfield_name) + fn.__doc__ = '%s.%s' % (member_name, bitfield_name) + return fn + + +class X_ARRAY(object): + + def __init__(self, itemtype, count_reference): + name = 'ARRAY(%s, \'%s\')' % (itemtype.__name__, + count_reference.__doc__) + self.__doc__ = self.__name__ = name + self.itemtype = itemtype + self.count_reference = count_reference + + def __call__(self, context, values): + count = self.count_reference(context, values) + return ARRAY(self.itemtype, count) + + +class SelectiveType(object): + + def __init__(self, selector_reference, selections): + self.__name__ = 'SelectiveType' + self.selections = selections + self.selector_reference = selector_reference + + def __call__(self, context, values): + selector = self.selector_reference(context, values) + return self.selections.get(selector, Struct) # default: empty struct + + +class ParseError(Exception): + + treegroup = None + + def __init__(self, *args, **kwargs): + Exception.__init__(self, *args, **kwargs) + self.cause = None + self.path = None + self.record = None + self.binevents = None + self.parse_stack_traces = [] + + def print_to_logger(self, logger): + e = self + logger.error('ParseError: %s', e) + logger.error('Caused by: %s', repr(e.cause)) + logger.error('Path: %s', e.path) + if e.treegroup is not None: + logger.error('Treegroup: %s', e.treegroup) + if e.record: + logger.error('Record: %s', e.record['seqno']) + logger.error('Record Payload:') + for line in dumpbytes(e.record['payload'], True): + logger.error(' %s', line) + logger.error('Problem Offset: at %d (=0x%x)', e.offset, e.offset) + if self.binevents: + logger.error('Binary Parse Events:') + from hwp5.bintype import log_events + for ev, item in log_events(self.binevents, logger.error): + pass + logger.error('Model Stack:') + for level, c in enumerate(reversed(e.parse_stack_traces)): + model = c['model'] + if isinstance(model, StructType): + logger.error(' %s', model) + parsed_members = c['parsed'] + for member in parsed_members: + offset = member.get('offset', 0) + offset_end = member.get('offset_end', 1) + name = member['name'] + value = member['value'] + logger.error(' %06x:%06x: %s = %s', + offset, offset_end - 1, name, value) + logger.error(' %06x: : %s', c['offset'], c['member']) + pass + else: + logger.error(' %s%s', ' ' * level, c) + + +def typed_struct_attributes(struct, attributes, context): + attributes = dict(attributes) + + def popvalue(member): + name = member['name'] + if name in attributes: + return attributes.pop(name) + else: + return member['type']() + + for member in struct.parse_members_with_inherited(context, popvalue): + yield member + + # remnants + for name, value in attributes.items(): + yield dict(name=name, type=type(value), value=value) + + +class StructType(CompoundType): + def __init__(cls, name, bases, attrs): + super(StructType, cls).__init__(name, bases, attrs) + if 'attributes' in cls.__dict__: + members = (dict(type=member[0], name=member[1]) + if isinstance(member, tuple) + else member + for member in cls.attributes()) + cls.members = list(members) + for k, v in attrs.items(): + if isinstance(v, EnumType): + v.__name__ = k + v.scoping_struct = cls + elif isinstance(v, FlagsType): + v.__name__ = k + + def parse_members(cls, context, getvalue): + if 'attributes' not in cls.__dict__: + return + values = dict() + for member in cls.members: + member = dict(member) + if isinstance(member['type'], X_ARRAY): + member['type'] = member['type'](context, values) + elif isinstance(member['type'], SelectiveType): + member['type'] = member['type'](context, values) + + member_version = member.get('version') + if member_version is None or context['version'] >= member_version: + condition_func = member.get('condition') + if condition_func is None or condition_func(context, values): + try: + value = getvalue(member) + except ParseError as e: + tracepoint = dict(model=cls, member=member['name']) + e.parse_stack_traces.append(tracepoint) + raise + values[member['name']] = member['value'] = value + yield member + + def parse_members_with_inherited(cls, context, getvalue, up_to_cls=None): + mro = inspect.getmro(cls) + mro = takewhile(lambda cls: cls is not up_to_cls, mro) + mro = list(cls for cls in mro if 'attributes' in cls.__dict__) + mro = reversed(mro) + for cls in mro: + for member in cls.parse_members(context, getvalue): + yield member + + +class Struct(object, metaclass=StructType): + pass + + +def dumpbytes(data, crust=False): + if PY3: + _ord = int + else: + _ord = ord + + offsbase = 0 + if crust: + yield '\t 0 1 2 3 4 5 6 7 8 9 A B C D E F' + while len(data) > 16: + if crust: + line = '%05x0: ' % offsbase + else: + line = '' + line += ' '.join(['%02x' % _ord(ch) for ch in data[0:16]]) + yield line + data = data[16:] + offsbase += 1 + + if crust: + line = '%05x0: ' % offsbase + else: + line = '' + line += ' '.join(['%02x' % _ord(ch) for ch in data]) + yield line + + +def hexdump(data, crust=False): + return '\n'.join([line for line in dumpbytes(data, crust)]) + + +class IndentedOutput: + def __init__(self, base, level): + self.base = base + self.level = level + + def write(self, x): + for line in x.split('\n'): + if len(line) > 0: + self.base.write('\t' * self.level) + self.base.write(line) + self.base.write('\n') + + +class Printer: + def __init__(self, baseout): + self.baseout = baseout + + def prints(self, *args): + for x in args: + self.baseout.write(str(x) + ' ') + self.baseout.write('\n') diff --git a/src/hwp5/distdoc.py b/src/hwp5/distdoc.py new file mode 100644 index 0000000000000000000000000000000000000000..c1fc7d9909229933f08af854d2071c29a5391188 --- /dev/null +++ b/src/hwp5/distdoc.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +''' Decode distribute docs. + +Based on the algorithm described by Changwoo Ryu +See https://groups.google.com/forum/#!topic/hwp-foss/d2KL2ypR89Q +''' +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from io import BytesIO +import logging + +from .plat import get_aes128ecb_decrypt +from .recordstream import read_record +from .tagids import HWPTAG_DISTRIBUTE_DOC_DATA + +logger = logging.getLogger(__name__) + + +def decode(stream): + distdoc_data_record = read_record(stream, 0) + if distdoc_data_record['tagid'] != HWPTAG_DISTRIBUTE_DOC_DATA: + raise IOError('the first record is not an HWPTAG_DISTRIBUTE_DOC_DATA') + distdoc_data = distdoc_data_record['payload'] + key = decode_head_to_key(distdoc_data) + tail = stream.read() + decrypted = decrypt_tail(key, tail) + return BytesIO(decrypted) + + +class Random: + ''' MSVC's srand()/rand() like pseudorandom generator. + ''' + + def __init__(self, seed): + self.seed = seed + + def rand(self): + self.seed = (self.seed * 214013 + 2531011) & 0xffffffff + value = (self.seed >> 16) & 0x7fff + return value + + +def decode_head_to_sha1(record_payload): + ''' Decode HWPTAG_DISTRIBUTE_DOC_DATA. + + It's the sha1 digest of user-supplied password string, i.e., + + '12345' -> hashlib.sha1('12345').digest() + + ''' + if len(record_payload) != 256: + raise ValueError('payload size must be 256 bytes') + + data = bytearray(record_payload) + seed = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0] + random = Random(seed) + + n = 0 + for i in range(256): + if n == 0: + key = random.rand() & 0xff + n = (random.rand() & 0xf) + 1 + if i >= 4: + data[i] = data[i] ^ key + n -= 1 + + # decoded = b''.join(chr(x) for x in data) + decoded = data + sha1offset = 4 + (seed & 0xf) + + ucs16le = decoded[sha1offset:sha1offset + 80] + return ucs16le + + +def decode_head_to_key(record_payload): + sha1ucs16le = decode_head_to_sha1(record_payload) + return sha1ucs16le[:16] + + +def decrypt_tail(key, encrypted_tail): + decrypt = get_aes128ecb_decrypt() + return decrypt(key, encrypted_tail) diff --git a/src/hwp5/errors.py b/src/hwp5/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..43bb74a53c20746d7c013e2b261b987acc94fca0 --- /dev/null +++ b/src/hwp5/errors.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + +class InvalidOleStorageError(Exception): + ''' Invalid OLE2 Compound Binary File. ''' + pass + + +class InvalidHwp5FileError(Exception): + ''' Invalid HWP Document format v5 File. ''' + pass + + +class ImplementationNotAvailable(Exception): + pass + + +class ValidationFailed(Exception): + pass + + +class RelaxNGValidationFailed(ValidationFailed): + pass diff --git a/src/hwp5/filestructure.py b/src/hwp5/filestructure.py new file mode 100644 index 0000000000000000000000000000000000000000..f5fc5286f6d90688c2795781f4c2031b8cec84b3 --- /dev/null +++ b/src/hwp5/filestructure.py @@ -0,0 +1,602 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from io import BytesIO +import logging +import sys + +from .bintype import read_type +from .compressed import decompress +from .dataio import UINT32, Flags, Struct +from .errors import InvalidOleStorageError +from .errors import InvalidHwp5FileError +from .storage import ItemWrapper +from .storage import StorageWrapper +from .storage import ItemConversionStorage +from .storage import is_stream +from .storage.ole import OleStorage +from .summaryinfo import CLSID_HWP_SUMMARY_INFORMATION +from .utils import GeneratorTextReader +from .utils import cached_property +from .utils import transcoder + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + + +logger = logging.getLogger(__name__) + + +HWP5_SIGNATURE = b'HWP Document File' + (b'\x00' * 15) + + +class BYTES(type): + def __new__(mcs, size): + decode = staticmethod(lambda bytes, *args, **kwargs: bytes) + return type.__new__(mcs, str('BYTES(%d)') % size, (str,), + dict(fixed_size=size, decode=decode)) + + +class VERSION(object): + fixed_size = 4 + + if PY3: + def decode(cls, bytes): + return (bytes[3], bytes[2], bytes[1], bytes[0]) + else: + def decode(cls, bytes): + return (ord(bytes[3]), ord(bytes[2]), + ord(bytes[1]), ord(bytes[0])) + decode = classmethod(decode) + + +class FileHeader(Struct): + Flags = Flags(UINT32, + 0, 'compressed', + 1, 'password', + 2, 'distributable', + 3, 'script', + 4, 'drm', + 5, 'xmltemplate_storage', + 6, 'history', + 7, 'cert_signed', + 8, 'cert_encrypted', + 9, 'cert_signature_extra', + 10, 'cert_drm', + 11, 'ccl') + + def attributes(cls): + yield BYTES(32), 'signature' + yield VERSION, 'version' + yield cls.Flags, 'flags' + yield BYTES(216), 'reserved' + attributes = classmethod(attributes) + + +def is_hwp5file(filename): + ''' Test whether it is an HWP format v5 file. ''' + try: + olestg = OleStorage(filename) + except InvalidOleStorageError: + return False + return storage_is_hwp5file(olestg) + + +def storage_is_hwp5file(stg): + try: + fileheader = stg['FileHeader'] + except KeyError: + logger.info('stg has no FileHeader') + return False + fileheader = HwpFileHeader(fileheader) + if fileheader.signature == HWP5_SIGNATURE: + return True + else: + logger.info('fileheader.signature = %r', fileheader.signature) + return False + + +class CompressedStream(ItemWrapper): + + def open(self): + return decompress(self.wrapped.open()) + + +class CompressedStorage(StorageWrapper): + ''' decompress streams in the underlying storage ''' + def __getitem__(self, name): + item = self.wrapped[name] + if is_stream(item): + return CompressedStream(item) + else: + return item + + +class PasswordProtectedStream(ItemWrapper): + + def open(self): + # TODO: 현재로선 암호화된 내용을 그냥 반환 + logger.warning('Password-encrypted stream: currently decryption is ' + 'not supported') + return self.wrapped.open() + + +class PasswordProtectedStorage(StorageWrapper): + def __getitem__(self, name): + item = self.wrapped[name] + if is_stream(item): + return PasswordProtectedStream(item) + else: + return item + + +class Hwp5PasswordProtectedDoc(ItemConversionStorage): + + def resolve_conversion_for(self, name): + if name in ('BinData', 'BodyText', 'Scripts', 'ViewText'): + return PasswordProtectedStorage + elif name in ('DocInfo', ): + return PasswordProtectedStream + + +class VersionSensitiveItem(ItemWrapper): + + def __init__(self, item, version): + ItemWrapper.__init__(self, item) + self.version = version + + def open(self): + return self.wrapped.open() + + def other_formats(self): + return dict() + + +class Hwp5FileBase(ItemConversionStorage): + ''' Base of an Hwp5File. + + Hwp5FileBase checks basic validity of an HWP format v5 and provides + `fileheader` property. + + :param stg: an OLE2 structured storage. + :type stg: an instance of storage, OleFileIO or filename + :raises InvalidHwp5FileError: `stg` is not a valid HWP format v5 document. + ''' + + def __init__(self, stg): + if isinstance(stg, basestring): + try: + stg = OleStorage(stg) + except InvalidOleStorageError: + raise InvalidHwp5FileError('Not an OLE2 Compound Binary File.') + + if not storage_is_hwp5file(stg): + errormsg = 'Not an HWP Document format v5 storage.' + raise InvalidHwp5FileError(errormsg) + + ItemConversionStorage.__init__(self, stg) + + def resolve_conversion_for(self, name): + if name == 'FileHeader': + return HwpFileHeader + + def get_fileheader(self): + return self['FileHeader'] + + fileheader = cached_property(get_fileheader) + + header = fileheader + + +class Hwp5DistDocStream(VersionSensitiveItem): + + def open(self): + from hwp5.distdoc import decode + encodedstream = self.wrapped.open() + return decode(encodedstream) + + def head_record(self): + item = self.wrapped.open() + from .recordstream import read_record + return read_record(item, 0) + + def head_record_stream(self): + from .recordstream import record_to_json + record = self.head_record() + json = record_to_json(record) + return GeneratorTextReader(iter([json])) + + def head(self): + record = self.head_record() + return record['payload'] + + def head_stream(self): + return BytesIO(self.head()) + + def head_sha1(self): + from hwp5.distdoc import decode_head_to_sha1 + payload = self.head() + return decode_head_to_sha1(payload) + + def head_key(self): + from hwp5.distdoc import decode_head_to_key + payload = self.head() + return decode_head_to_key(payload) + + def tail(self): + item = self.wrapped.open() + from .recordstream import read_record + read_record(item, 0) + assert 4 + 256 == item.tell() + return item.read() + + def tail_decrypted(self): + from hwp5.distdoc import decrypt_tail + key = self.head_key() + tail = self.tail() + return decrypt_tail(key, tail) + + def tail_stream(self): + return BytesIO(self.tail()) + + +class Hwp5DistDocStorage(ItemConversionStorage): + + def resolve_conversion_for(self, name): + def conversion(item): + return Hwp5DistDocStream(self.wrapped[name], None) # TODO: version + return conversion + + +class Hwp5DistDoc(ItemConversionStorage): + + def resolve_conversion_for(self, name): + if name in ('Scripts', 'ViewText'): + return Hwp5DistDocStorage + + +class Hwp5Compression(ItemConversionStorage): + ''' handle compressed streams in HWPv5 files ''' + + def resolve_conversion_for(self, name): + if name in ('BinData', 'BodyText', 'ViewText'): + return CompressedStorage + elif name == 'DocInfo': + return CompressedStream + elif name == 'Scripts': + return CompressedStorage + + +class PreviewText(object): + + def __init__(self, item): + self.open = item.open + + def other_formats(self): + return {'.utf8': self.open_utf8} + + def open_utf8(self): + transcode = transcoder('utf-16le', 'utf-8') + return transcode(self.open()) + + def get_utf8(self): + f = self.open_utf8() + try: + return f.read() + finally: + f.close() + + utf8 = cached_property(get_utf8) + + def get_text(self): + fp = self.open() + try: + data = fp.read() + finally: + fp.close() + return data.decode('utf-16le') + + text = cached_property(get_text) + + def __str__(self): + if PY3: + return self.text + return self.utf8 + + def __unicode__(self): + return self.text + + +class Sections(ItemConversionStorage): + + section_class = VersionSensitiveItem + + def __init__(self, stg, version): + ItemConversionStorage.__init__(self, stg) + self.version = version + + def resolve_conversion_for(self, name): + def conversion(item): + return self.section_class(self.wrapped[name], self.version) + return conversion + + def other_formats(self): + return dict() + + def section(self, idx): + return self['Section%d' % idx] + + def section_indexes(self): + def gen(): + for name in self: + if name.startswith('Section'): + idx = name[len('Section'):] + try: + idx = int(idx) + except: + pass + else: + yield idx + indexes = list(gen()) + indexes.sort() + return indexes + + @property + def sections(self): + return list(self.section(idx) + for idx in self.section_indexes()) + + +class HwpFileHeader(object): + + def __init__(self, item): + self.open = item.open + + def to_dict(self): + f = self.open() + try: + return read_type(FileHeader, dict(), f) + finally: + f.close() + + value = cached_property(to_dict) + + def get_version(self): + return self.value['version'] + + version = cached_property(get_version) + + def get_signature(self): + return self.value['signature'] + + signature = cached_property(get_signature) + + def get_flags(self): + return FileHeader.Flags(self.value['flags']) + + flags = cached_property(get_flags) + + def open_text(self): + signature = self.value['signature'] + signature = signature.decode('latin1') + signature = signature[:len('HWP Document File')] + + d = FileHeader.Flags.dictvalue(self.value['flags']) + d['signature'] = signature + d['version'] = '%d.%d.%d.%d' % self.value['version'] + out = BytesIO() + for k, v in sorted(d.items()): + out.write('{}: {}\n'.format(k, v).encode('utf-8')) + out.seek(0) + return out + + def other_formats(self): + return {'.txt': self.open_text} + + +class HwpSummaryInfo(VersionSensitiveItem): + + def other_formats(self): + return {'.txt': self.open_text} + + def getPropertySetStream(self): + from .msoleprops import PropertySetFormat + from .msoleprops import PropertySetStreamReader + from .summaryinfo import FMTID_HWP_SUMMARY_INFORMATION + from .summaryinfo import HWP_PROPERTIES + + propertySetFormat = PropertySetFormat( + FMTID_HWP_SUMMARY_INFORMATION, + HWP_PROPERTIES + ) + reader = PropertySetStreamReader([propertySetFormat]) + f = self.open() + try: + return reader.read(f) + finally: + f.close() + + propertySetStream = cached_property(getPropertySetStream) + + def getHwpSummaryInfoPropertySet(self): + stream = self.propertySetStream + if stream.clsid == CLSID_HWP_SUMMARY_INFORMATION: + return stream.propertysets[0] + + propertySet = cached_property(getHwpSummaryInfoPropertySet) + + @property + def title(self): + from .msoleprops import PIDSI_TITLE + return self.propertySet[PIDSI_TITLE] + + @property + def subject(self): + from .msoleprops import PIDSI_SUBJECT + return self.propertySet[PIDSI_SUBJECT] + + @property + def author(self): + from .msoleprops import PIDSI_AUTHOR + return self.propertySet[PIDSI_AUTHOR] + + @property + def keywords(self): + from .msoleprops import PIDSI_KEYWORDS + return self.propertySet[PIDSI_KEYWORDS] + + @property + def comments(self): + from .msoleprops import PIDSI_COMMENTS + return self.propertySet[PIDSI_COMMENTS] + + @property + def lastSavedBy(self): + from .msoleprops import PIDSI_LASTAUTHOR + return self.propertySet[PIDSI_LASTAUTHOR] + + @property + def revisionNumber(self): + from .msoleprops import PIDSI_REVNUMBER + return self.propertySet[PIDSI_REVNUMBER] + + @property + def lastPrintedTime(self): + from .msoleprops import PIDSI_LASTPRINTED + return self.propertySet[PIDSI_LASTPRINTED] + + @property + def createdTime(self): + from .msoleprops import PIDSI_CREATE_DTM + return self.propertySet[PIDSI_CREATE_DTM] + + @property + def lastSavedTime(self): + from .msoleprops import PIDSI_LASTSAVE_DTM + return self.propertySet[PIDSI_LASTSAVE_DTM] + + @property + def numberOfPages(self): + from .msoleprops import PIDSI_PAGECOUNT + return self.propertySet[PIDSI_PAGECOUNT] + + @property + def dateString(self): + from .summaryinfo import HWPPIDSI_DATE_STR + return self.propertySet[HWPPIDSI_DATE_STR] + + @property + def numberOfParagraphs(self): + from .summaryinfo import HWPPIDSI_PARACOUNT + return self.propertySet[HWPPIDSI_PARACOUNT] + + @property + def plaintext_lines(self): + from .msoleprops import PropertySetStreamTextFormatter + stream = self.getPropertySetStream() + formatter = PropertySetStreamTextFormatter() + return formatter.formatTextLines(stream) + + def open_text(self): + out = BytesIO() + for line in self.plaintext_lines: + line = line.encode('utf-8') + out.write(line + b'\n') + out.seek(0) + return out + + +class Hwp5File(ItemConversionStorage): + ''' represents HWPv5 File + + Hwp5File(stg) + + stg: an instance of Storage + ''' + + def __init__(self, stg): + stg = Hwp5FileBase(stg) + + if stg.header.flags.password: + stg = Hwp5PasswordProtectedDoc(stg) + + # TODO: 현재로선 decryption이 구현되지 않았으므로, + # 레코드 파싱은 불가능하다. 적어도 encrypted stream에 + # 직접 접근은 가능하도록, 다음 레이어들은 bypass한다. + ItemConversionStorage.__init__(self, stg) + return + + if stg.header.flags.distributable: + stg = Hwp5DistDoc(stg) + + if stg.header.flags.compressed: + stg = Hwp5Compression(stg) + + ItemConversionStorage.__init__(self, stg) + + def resolve_conversion_for(self, name): + if name == 'DocInfo': + return self.with_version(self.docinfo_class) + if name == 'BodyText': + return self.with_version(self.bodytext_class) + if name == 'ViewText': + return self.with_version(self.bodytext_class) + if name == 'PrvText': + return PreviewText + if name == '\005HwpSummaryInformation': + return self.with_version(self.summaryinfo_class) + + def with_version(self, f): + def wrapped(item): + return f(item, self.header.version) + return wrapped + + summaryinfo_class = HwpSummaryInfo + docinfo_class = VersionSensitiveItem + bodytext_class = Sections + + @cached_property + def summaryinfo(self): + return self['\005HwpSummaryInformation'] + + @cached_property + def docinfo(self): + return self['DocInfo'] + + @cached_property + def preview_text(self): + return self['PrvText'] + + @cached_property + def bodytext(self): + return self['BodyText'] + + @cached_property + def viewtext(self): + return self['ViewText'] + + @property + def text(self): + if self.header.flags.distributable: + return self.viewtext + else: + return self.bodytext diff --git a/src/hwp5/hwp5html.py b/src/hwp5/hwp5html.py new file mode 100644 index 0000000000000000000000000000000000000000..c06aaecd9215e0937c5d19a6a0038cbd9c4e0564 --- /dev/null +++ b/src/hwp5/hwp5html.py @@ -0,0 +1,315 @@ +# -*- coding: utf-8 -*- +# + +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from argparse import ArgumentParser +from contextlib import contextmanager +from contextlib import closing +from functools import partial +import gettext +import io +import logging +import os.path +import shutil +import shutil +import sys +import base64 +import re +import tempfile +import mimetypes + +from . import __version__ as version +from .cli import init_logger +from .transforms import BaseTransform +from .utils import cached_property + + +PY3 = sys.version_info.major == 3 +logger = logging.getLogger(__name__) +locale_dir = os.path.join(os.path.dirname(__file__), 'locale') +locale_dir = os.path.abspath(locale_dir) +t = gettext.translation('hwp5html', locale_dir, fallback=True) +_ = t.gettext + + +RESOURCE_PATH_XSL_CSS = 'xsl/hwp5css.xsl' +RESOURCE_PATH_XSL_XHTML = 'xsl/hwp5html.xsl' + + +class HTMLTransform(BaseTransform): + + @property + def transform_hwp5_to_css(self): + ''' + >>> T.transform_hwp5_to_css(hwp5file, 'styles.css') + ''' + transform_xhwp5 = self.transform_xhwp5_to_css + return self.make_transform_hwp5(transform_xhwp5) + + @property + def transform_hwp5_to_xhtml(self): + ''' + >>> T.transform_hwp5_to_xhtml(hwp5file, 'index.xhtml') + ''' + transform_xhwp5 = self.transform_xhwp5_to_xhtml + return self.make_transform_hwp5(transform_xhwp5) + + def transform_hwp5_to_dir(self, hwp5file, outdir): + ''' + >>> T.transform_hwp5_to_dir(hwp5file, 'output') + ''' + with self.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: + self.transform_xhwp5_to_dir(xhwp5path, outdir) + + bindata_dir = os.path.join(outdir, 'bindata') + self.extract_bindata_dir(hwp5file, bindata_dir) + + @cached_property + def transform_xhwp5_to_css(self): + ''' + >>> T.transform_xhwp5_to_css('hwp5.xml', 'styles.css') + ''' + resource_path = RESOURCE_PATH_XSL_CSS + return self.make_xsl_transform(resource_path) + + @cached_property + def transform_xhwp5_to_xhtml(self): + ''' + >>> T.transform_xhwp5_to_xhtml('hwp5.xml', 'index.xhtml') + ''' + resource_path = RESOURCE_PATH_XSL_XHTML + return self.make_xsl_transform(resource_path) + + def transform_xhwp5_to_dir(self, xhwp5path, outdir): + ''' + >>> T.transform_xhwp5_to_dir('hwp5.xml', 'output') + ''' + html_path = os.path.join(outdir, 'index.xhtml') + with io.open(html_path, 'wb') as f: + self.transform_xhwp5_to_xhtml(xhwp5path, f) + + css_path = os.path.join(outdir, 'styles.css') + with io.open(css_path, 'wb') as f: + self.transform_xhwp5_to_css(xhwp5path, f) + + def transform_hwp5_to_single(self, hwp5file, outpath): + """ + Convert HWP file to a single HTML file with embedded CSS and images. + """ + # Create a temporary directory for intermediate conversion + with tempfile.TemporaryDirectory() as temp_dir: + # 1. Perform standard conversion to temp dir + self.transform_hwp5_to_dir(hwp5file, temp_dir) + + # Paths to generated files + html_path = os.path.join(temp_dir, 'index.xhtml') + css_path = os.path.join(temp_dir, 'styles.css') + bindata_dir = os.path.join(temp_dir, 'bindata') + + # 2. Read HTML and CSS + if os.path.exists(html_path): + with io.open(html_path, 'r', encoding='utf-8') as f: + html_content = f.read() + else: + raise RuntimeError("HTML generation failed") + + css_content = "" + if os.path.exists(css_path): + with io.open(css_path, 'r', encoding='utf-8') as f: + css_content = f.read() + + # 3. Embed CSS + # Insert \n' + html_content = html_content.replace('', f'{style_tag}') + # Remove external link to css if present (optional, but good practice) + # + html_content = re.sub(r']+href="styles.css"[^>]*/>', '', html_content) + + # 4. Embed Images + if os.path.exists(bindata_dir): + # Function to replace image src with base64 data + def replace_image(match): + src = match.group(1) + if src.startswith('bindata/'): + image_filename = os.path.basename(src) + image_path = os.path.join(bindata_dir, image_filename) + if os.path.exists(image_path): + # Guess mime type + mime_type, _ = mimetypes.guess_type(image_path) + if not mime_type: + mime_type = 'image/png' # Default fallback + + with open(image_path, 'rb') as img_f: + img_data = img_f.read() + b64_data = base64.b64encode(img_data).decode('ascii') + return f'src="data:{mime_type};base64,{b64_data}"' + return match.group(0) # Return original if not matched/found + + # Replace src="bindata/..." with data URI + # Regex looks for src="bindata/[^"]+" + html_content = re.sub(r'src="(bindata/[^"]+)"', replace_image, html_content) + + # 5. Write final output + with io.open(outpath, 'w', encoding='utf-8') as f: + f.write(html_content) + + def extract_bindata_dir(self, hwp5file, bindata_dir): + if 'BinData' not in hwp5file: + return + bindata_stg = hwp5file['BinData'] + if not os.path.exists(bindata_dir): + os.mkdir(bindata_dir) + + from hwp5.storage import unpack + unpack(bindata_stg, bindata_dir) + + +def main(): + from .dataio import ParseError + from .errors import InvalidHwp5FileError + from .utils import make_open_dest_file + from .xmlmodel import Hwp5File + + argparser = main_argparser() + args = argparser.parse_args() + init_logger(args) + + hwp5path = args.hwp5file + + html_transform = HTMLTransform() + + open_dest = make_open_dest_file(args.output) + if args.css: + transform = html_transform.transform_hwp5_to_css + open_dest = wrap_for_css(open_dest) + elif args.html: + transform = html_transform.transform_hwp5_to_xhtml + open_dest = wrap_for_xml(open_dest) + elif args.embed_image: + transform = html_transform.transform_hwp5_to_single + # For single file, we need a file path, not a file object + # transform_hwp5_to_single expects a path string + if not args.output: + args.output = os.path.splitext(os.path.basename(hwp5path))[0] + '.html' + open_dest = lambda: contextmanager(lambda: (yield args.output))() + else: + transform = html_transform.transform_hwp5_to_dir + dest_path = args.output + if not dest_path: + dest_path = os.path.splitext(os.path.basename(hwp5path))[0] + open_dest = partial(open_dir, dest_path) + + print(f"DEBUG: Input file: {hwp5path}") + print(f"DEBUG: Args: css={args.css}, html={args.html}, embed_image={getattr(args, 'embed_image', False)}") + + try: + with closing(Hwp5File(hwp5path)) as hwp5file: + with open_dest() as dest: + print(f"DEBUG: Starting transformation using {transform}") + transform(hwp5file, dest) + print("DEBUG: Transformation finished") + except Exception as e: + import traceback + traceback.print_exc() + logger.error('%s', e) + sys.exit(1) + + +def main_argparser(): + parser = ArgumentParser( + prog='hwp5html', + description=_('HWPv5 to HTML converter'), + ) + parser.add_argument( + '--version', + action='version', + version='%(prog)s {}'.format(version) + ) + parser.add_argument( + '--loglevel', + help=_('Set log level.'), + ) + parser.add_argument( + '--logfile', + help=_('Set log file.'), + ) + parser.add_argument( + '--output', + help=_('Output file'), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to convert'), + ) + generator_group = parser.add_mutually_exclusive_group() + generator_group.add_argument( + '--css', + action='store_true', + help=_('Generate CSS'), + ) + generator_group.add_argument( + '--html', + action='store_true', + help=_('Generate HTML'), + ) + generator_group.add_argument( + '--embed-image', + action='store_true', + help=_('Embed images and CSS into a single HTML file'), + ) + return parser + + +@contextmanager +def open_dir(path): + if os.path.exists(path): + shutil.rmtree(path) + os.mkdir(path) + yield path + + +def wrap_for_css(open_dest): + from .utils import wrap_open_dest_for_tty + from .utils import pager + from .utils import syntaxhighlight + return wrap_open_dest_for_tty(open_dest, [ + pager(), + syntaxhighlight('text/css'), + ]) + + +def wrap_for_xml(open_dest): + from .utils import wrap_open_dest_for_tty + from .utils import pager + from .utils import syntaxhighlight + from .utils import xmllint + return wrap_open_dest_for_tty(open_dest, [ + pager(), + syntaxhighlight('application/xml'), + xmllint(format=True, nonet=True), + ]) + + +if __name__ == '__main__': + main() diff --git a/src/hwp5/hwp5odt.py b/src/hwp5/hwp5odt.py new file mode 100644 index 0000000000000000000000000000000000000000..d020702fc2752db4dc0459e628d56b483b2d068a --- /dev/null +++ b/src/hwp5/hwp5odt.py @@ -0,0 +1,449 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from argparse import ArgumentParser +from contextlib import contextmanager +from contextlib import closing +from functools import partial +from io import BytesIO +import gettext +import io +import logging +import os.path +import sys + +from . import __version__ as version +from .cli import init_logger +from .cli import init_with_environ +from .errors import ImplementationNotAvailable +from .utils import mkstemp_open +from .utils import hwp5_resources_path +from .transforms import BaseTransform +from .plat import get_relaxng_compile +from .utils import cached_property + + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + unicode = str +logger = logging.getLogger(__name__) +locale_dir = os.path.join(os.path.dirname(__file__), 'locale') +locale_dir = os.path.abspath(locale_dir) +t = gettext.translation('hwp5odt', locale_dir, fallback=True) +_ = t.gettext + + +RESOURCE_PATH_RNG = 'odf-relaxng/OpenDocument-v1.2-os-schema.rng' +RESOURCE_PATH_XSL_SINGLE_DOCUMENT = 'xsl/odt/document.xsl' +RESOURCE_PATH_XSL_STYLE = 'xsl/odt/styles.xsl' +RESOURCE_PATH_XSL_CONTENT = 'xsl/odt/content.xsl' + + +class ODFValidate: + + def __init__(self, relaxng_compile=None): + ''' + >>> V = ODFValidate() + ''' + if relaxng_compile is None: + try: + relaxng_compile = self.get_default_relaxng_compile() + except ImplementationNotAvailable: + relaxng_compile = None + self.relaxng_compile = relaxng_compile + + @classmethod + def get_default_relaxng_compile(cls): + relaxng_compile = get_relaxng_compile() + if not relaxng_compile: + raise ImplementationNotAvailable('relaxng') + return relaxng_compile + + @cached_property + def odf_validator(self): + ''' + >>> with V.odf_validator(sys.stdout) as output: + ... output.write(xml) + ''' + return self.make_odf_validator() + + def make_odf_validator(self): + if self.relaxng_compile: + with hwp5_resources_path(RESOURCE_PATH_RNG) as rng_path: + return self.relaxng_compile(rng_path) + + +class ODTTransform(BaseTransform, ODFValidate): + + def __init__(self, xslt_compile=None, relaxng_compile=None, + embedbin=False): + ''' + >>> from hwp5.hwp5odt import ODTTransform + >>> T = ODTTransform() + ''' + BaseTransform.__init__(self, xslt_compile=xslt_compile, + embedbin=embedbin) + ODFValidate.__init__(self, relaxng_compile) + + @property + def transform_hwp5_to_styles(self): + ''' + >>> with io.open('styles.xml', 'wb') as f: + ... T.transform_hwp5_to_styles(hwp5file, f) + ''' + transform_xhwp5 = self.transform_xhwp5_to_styles + return self.make_transform_hwp5(transform_xhwp5) + + @property + def transform_hwp5_to_content(self): + ''' + >>> with io.open('content.xml', 'wb') as f: + ... T.transform_hwp5_to_content(hwp5file, f) + ''' + transform_xhwp5 = self.transform_xhwp5_to_content + return self.make_transform_hwp5(transform_xhwp5) + + @property + def transform_hwp5_to_single_document(self): + ''' + >>> with io.open('transformed.fodt', 'wb') as f: + ... T.transform_hwp5_to_single_document(hwp5file, f) + ''' + transform_xhwp5 = self.transform_xhwp5_to_single_document + return self.make_transform_hwp5(transform_xhwp5) + + def transform_hwp5_to_package(self, hwp5file, odtpkg): + ''' + >>> with open_odtpkg('transformed.odt') as odtpkg: + ... T.transform_hwp5_to_package(hwp5file, odtpkg) + ''' + with self.transformed_xhwp5_at_temp(hwp5file) as xml_path: + self.transform_xhwp5_into_package(xml_path, odtpkg) + + if 'BinData' in hwp5file: + bindata = hwp5file['BinData'] + for name in bindata: + f = bindata[name].open() + path = 'bindata/' + name + mimetype = 'application/octet-stream' + odtpkg.insert_stream(f, path, mimetype) + + @cached_property + def transform_xhwp5_to_styles(self): + ''' + >>> with io.open('styles.xml', 'wb') as f: + ... T.transform_xhwp5_to_styles('input.xml', f) + ''' + resource_path = RESOURCE_PATH_XSL_STYLE + return self.make_odf_transform(resource_path) + + @cached_property + def transform_xhwp5_to_content(self): + ''' + >>> with io.open('content.xml', 'wb') as f: + ... T.transform_xhwp5_to_content('input.xml', f) + ''' + resource_path = RESOURCE_PATH_XSL_CONTENT + return self.make_odf_transform(resource_path) + + @cached_property + def transform_xhwp5_to_single_document(self): + ''' + >>> with io.open('transformed.fodf', 'wb') as f: + ... T.transform_xhwp5_to_single_document('input.xml', f) + ''' + resource_path = RESOURCE_PATH_XSL_SINGLE_DOCUMENT + return self.make_odf_transform(resource_path) + + @property + def transform_xhwp5_into_package(self): + ''' + >>> with open_odtpkg('transformed.odt') as odtpkg: + >>> T.transform_xhwp5_into_package('input.xml', odtpkg) + ''' + def transform(xhwp5path, odtpkg): + with self.transformed_styles_at_temp(xhwp5path) as path: + odtpkg.insert_path(path, 'styles.xml', 'text/xml') + with self.transformed_content_at_temp(xhwp5path) as path: + odtpkg.insert_path(path, 'content.xml', 'text/xml') + + rdf = BytesIO() + manifest_rdf(rdf) + rdf.seek(0) + odtpkg.insert_stream(rdf, 'manifest.rdf', + 'application/rdf+xml') + return transform + + def transformed_styles_at_temp(self, xhwp5path): + ''' + >>> with T.transformed_styles_at_temp('input.xml') as styles_path: + ... pass + ''' + transform_xhwp5 = self.transform_xhwp5_to_styles + return transformed_at_temp_path(xhwp5path, transform_xhwp5) + + def transformed_content_at_temp(self, xhwp5path): + ''' + >>> with T.transformed_content_at_temp('input.xml') as content_path: + ... pass + ''' + transform_xhwp5 = self.transform_xhwp5_to_content + return transformed_at_temp_path(xhwp5path, transform_xhwp5) + + def transformed_single_document_at_temp(self, xhwp5path): + ''' + >>> with T.transformed_single_document_at_temp('input.xml') as path: + ... pass + ''' + transform_xhwp5 = self.transform_xhwp5_to_single_document + return transformed_at_temp_path(xhwp5path, transform_xhwp5) + + def make_odf_transform(self, resource_path): + transform = self.make_xsl_transform(resource_path) + validator = self.odf_validator + if validator: + def validating_transform(input, output): + with validator.validating_output(output) as output: + transform(input, output) + return validating_transform + else: + return transform + + +@contextmanager +def transformed_at_temp_path(inp_path, transform): + with mkstemp_open() as (tmp_path, f): + transform(inp_path, f) + f.flush() + yield tmp_path + + +class ODTPackage(object): + def __init__(self, path_or_zipfile): + self.files = [] + + if isinstance(path_or_zipfile, basestring): + from zipfile import ZipFile + zipfile = ZipFile(path_or_zipfile, 'w') + else: + zipfile = path_or_zipfile + self.zf = zipfile + + def insert_path(self, src_path, path, media_type): + with io.open(src_path, 'rb') as f: + self.insert_stream(f, path, media_type) + + def insert_stream(self, f, path, media_type): + if not isinstance(path, unicode): + path = path.decode('utf-8') + self.zf.writestr(path, f.read()) + self.files.append(dict(full_path=path, media_type=media_type)) + + def close(self): + + manifest = BytesIO() + manifest_xml(manifest, self.files) + manifest.seek(0) + self.zf.writestr('META-INF/manifest.xml', manifest.getvalue()) + self.zf.writestr('mimetype', 'application/vnd.oasis.opendocument.text') + + self.zf.close() + + +def manifest_xml(f, files): + from xml.sax.saxutils import XMLGenerator + xml = XMLGenerator(f, 'utf-8') + xml.startDocument() + + uri = 'urn:oasis:names:tc:opendocument:xmlns:manifest:1.0' + prefix = 'manifest' + xml.startPrefixMapping(prefix, uri) + + def startElement(name, attrs): + attrs = dict(((uri, n), v) for n, v in attrs.items()) + xml.startElementNS((uri, name), prefix + ':' + name, attrs) + + def endElement(name): + xml.endElementNS((uri, name), prefix + ':' + name) + + def file_entry(full_path, media_type, **kwargs): + attrs = {'media-type': media_type, 'full-path': full_path} + attrs.update(dict((n.replace('_', '-'), v) + for n, v in kwargs.items())) + startElement('file-entry', attrs) + endElement('file-entry') + + startElement('manifest', dict(version='1.2')) + file_entry('/', 'application/vnd.oasis.opendocument.text', version='1.2') + for e in files: + e = dict(e) + full_path = e.pop('full_path') + media_type = e.pop('media_type', 'application/octet-stream') + file_entry(full_path, media_type) + endElement('manifest') + + xml.endPrefixMapping(prefix) + xml.endDocument() + + +def manifest_rdf(f): + f.write(b''' + + + + + + + +''') + + +def main(): + from .dataio import ParseError + from .errors import InvalidHwp5FileError + from .utils import make_open_dest_file + from .xmlmodel import Hwp5File + + argparser = main_argparser() + args = argparser.parse_args() + init_logger(args) + + init_with_environ() + + hwp5path = args.hwp5file + + odt_transform = ODTTransform() + + open_dest = make_open_dest_file(args.output) + if args.document: + odt_transform.embedbin = not args.no_embed_image + transform = odt_transform.transform_hwp5_to_single_document + open_dest = wrap_for_xml(open_dest) + elif args.styles: + odt_transform.embedbin = args.embed_image + transform = odt_transform.transform_hwp5_to_styles + open_dest = wrap_for_xml(open_dest) + elif args.content: + odt_transform.embedbin = args.embed_image + transform = odt_transform.transform_hwp5_to_content + open_dest = wrap_for_xml(open_dest) + else: + odt_transform.embedbin = args.embed_image + transform = odt_transform.transform_hwp5_to_package + dest_path = args.output + dest_path = dest_path or replace_ext(hwp5path, '.odt') + open_dest = partial(open_odtpkg, dest_path) + + try: + with closing(Hwp5File(hwp5path)) as hwp5file: + with open_dest() as dest: + transform(hwp5file, dest) + except ParseError as e: + e.print_to_logger(logger) + except InvalidHwp5FileError as e: + logger.error('%s', e) + sys.exit(1) + + +def main_argparser(): + parser = ArgumentParser( + prog='hwp5odt', + description=_('HWPv5 to odt converter'), + ) + parser.add_argument( + '--version', + action='version', + version='%(prog)s {}'.format(version) + ) + parser.add_argument( + '--loglevel', + help=_('Set log level.'), + ) + parser.add_argument( + '--logfile', + help=_('Set log file.'), + ) + parser.add_argument( + '--output', + help=_('Output file'), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to convert'), + ) + generator_group = parser.add_mutually_exclusive_group() + generator_group.add_argument( + '--styles', + action='store_true', + help=_('Generate styles.xml'), + ) + generator_group.add_argument( + '--content', + action='store_true', + help=_('Generate content.xml'), + ) + generator_group.add_argument( + '--document', + action='store_true', + help=_('Generate .fodt'), + ) + embedimage = parser.add_mutually_exclusive_group() + embedimage.add_argument( + '--embed-image', + action='store_true', + help=_('Embed images in output xml.'), + ) + embedimage.add_argument( + '--no-embed-image', + action='store_true', + help=_('Do not embed images in output xml.'), + ) + return parser + + +def replace_ext(path, ext): + name = os.path.basename(path) + root = os.path.splitext(name)[0] + return root + ext + + +@contextmanager +def open_odtpkg(path): + odtpkg = ODTPackage(path) + with closing(odtpkg): + yield odtpkg + + +def wrap_for_xml(open_dest): + from .utils import wrap_open_dest_for_tty + from .utils import pager + from .utils import syntaxhighlight + from .utils import xmllint + return wrap_open_dest_for_tty(open_dest, [ + pager(), + syntaxhighlight('application/xml'), + xmllint(format=True), + ]) diff --git a/src/hwp5/hwp5proc.py b/src/hwp5/hwp5proc.py new file mode 100644 index 0000000000000000000000000000000000000000..01af0eaeff92bb67c51e1fc9d09076f055e50df3 --- /dev/null +++ b/src/hwp5/hwp5proc.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from argparse import ArgumentParser +import gettext +import logging +import os +import sys + +from . import __version__ +from .cli import init_logger +from .dataio import ParseError +from .errors import InvalidHwp5FileError + + +PY3 = sys.version_info.major == 3 +logger = logging.getLogger(__name__) + +locale_dir = os.path.join(os.path.dirname(__file__), 'locale') +locale_dir = os.path.abspath(locale_dir) +t = gettext.translation('hwp5proc', locale_dir, fallback=True) +_ = t.gettext + + +program = 'hwp5proc (pyhwp) {version}'.format(version=__version__) + +copyright = 'Copyright (C) 2010-2023 mete0r ' + +license = _('''License AGPLv3+: GNU Affero GPL version 3 or any later +. +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law.''') + +disclosure = _('''Disclosure: This program has been developed in accordance with a public +document named "HWP Binary Specification 1.1" published by Hancom Inc. +.''') # noqa + +version = '''{program} +{copyright} +{license} +{disclosure}'''.format( + program=program, + copyright=copyright, + license=license, + disclosure=disclosure, +) + + +def main(): + argparser = main_argparser() + args = argparser.parse_args() + init_logger(args) + + try: + subcommand_fn = args.func + except AttributeError: + argparser.print_help() + raise SystemExit(1) + + try: + return subcommand_fn(args) + except InvalidHwp5FileError as e: + logger.error('%s', e) + raise SystemExit(1) + except ParseError as e: + e.print_to_logger(logger) + raise SystemExit(1) + + +def main_argparser(): + from .proc.version import version_argparser + from .proc.header import header_argparser + from .proc.summaryinfo import summaryinfo_argparser + from .proc.ls import ls_argparser + from .proc.cat import cat_argparser + from .proc.unpack import unpack_argparser + from .proc.records import records_argparser + from .proc.models import models_argparser + from .proc.find import find_argparser + from .proc.xml import xml_argparser + from .proc.rawunz import rawunz_argparser + from .proc.diststream import diststream_argparser + parser = ArgumentParser( + prog='hwp5proc', + description=_('Do various operations on HWPv5 files.'), + ) + parser.add_argument( + '--loglevel', + help=_('Set log level.'), + ) + parser.add_argument( + '--logfile', + help=_('Set log file.'), + ) + subcommands = parser.add_subparsers( + title=_('subcommands'), + description=_('valid subcommands'), + ) + version_argparser(subcommands, _) + header_argparser(subcommands, _) + summaryinfo_argparser(subcommands, _) + ls_argparser(subcommands, _) + cat_argparser(subcommands, _) + unpack_argparser(subcommands, _) + records_argparser(subcommands, _) + models_argparser(subcommands, _) + find_argparser(subcommands, _) + xml_argparser(subcommands, _) + rawunz_argparser(subcommands, _) + diststream_argparser(subcommands, _) + return parser diff --git a/src/hwp5/hwp5txt.py b/src/hwp5/hwp5txt.py new file mode 100644 index 0000000000000000000000000000000000000000..a8a630e415387432d5cdd20e00b6610406ff7063 --- /dev/null +++ b/src/hwp5/hwp5txt.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from argparse import ArgumentParser +from contextlib import closing +import gettext +import logging +import os.path +import sys + +from . import __version__ as version +from .cli import init_logger +from .dataio import ParseError +from .errors import InvalidHwp5FileError +from .utils import make_open_dest_file +from .utils import cached_property +from .transforms import BaseTransform +from .xmlmodel import Hwp5File + + +PY3 = sys.version_info.major == 3 +logger = logging.getLogger(__name__) +locale_dir = os.path.join(os.path.dirname(__file__), 'locale') +locale_dir = os.path.abspath(locale_dir) +t = gettext.translation('hwp5txt', locale_dir, fallback=True) +_ = t.gettext + + +RESOURCE_PATH_XSL_TEXT = 'xsl/plaintext.xsl' + + +class TextTransform(BaseTransform): + + @property + def transform_hwp5_to_text(self): + transform_xhwp5 = self.transform_xhwp5_to_text + return self.make_transform_hwp5(transform_xhwp5) + + @cached_property + def transform_xhwp5_to_text(self): + ''' + >>> T.transform_xhwp5_to_css('hwp5.xml', 'styles.css') + ''' + resource_path = RESOURCE_PATH_XSL_TEXT + return self.make_xsl_transform(resource_path) + + +def main(): + argparser = main_argparser() + args = argparser.parse_args() + init_logger(args) + + hwp5path = args.hwp5file + + text_transform = TextTransform() + + open_dest = make_open_dest_file(args.output) + transform = text_transform.transform_hwp5_to_text + + try: + with closing(Hwp5File(hwp5path)) as hwp5file: + with open_dest() as dest: + transform(hwp5file, dest) + except ParseError as e: + e.print_to_logger(logger) + except InvalidHwp5FileError as e: + logger.error('%s', e) + sys.exit(1) + + +def main_argparser(): + parser = ArgumentParser( + prog='hwp5txt', + description=_('HWPv5 to txt converter'), + ) + parser.add_argument( + '--version', + action='version', + version='%(prog)s {}'.format(version) + ) + parser.add_argument( + '--loglevel', + help=_('Set log level.'), + ) + parser.add_argument( + '--logfile', + help=_('Set log file.'), + ) + parser.add_argument( + '--output', + help=_('Output file'), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to convert'), + ) + return parser diff --git a/src/hwp5/hwp5view.py b/src/hwp5/hwp5view.py new file mode 100644 index 0000000000000000000000000000000000000000..49c975c55860c84822c79c5963cb78f9b5be838a --- /dev/null +++ b/src/hwp5/hwp5view.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +'''HWP5 viewer (Experimental, gtk only) + +Usage:: + + hwp5view [options] + hwp5view -h | --help + hwp5view --version + +Options:: + + -h --help Show this screen + --version Show version + --loglevel= Set log level. + --logfile= Set log file. +''' +from __future__ import absolute_import +from __future__ import unicode_literals +from __future__ import print_function +from argparse import ArgumentParser +from contextlib import closing +from contextlib import contextmanager +from tempfile import mkdtemp +import gettext +import io +import logging +import os.path +import shutil +import sys +import urllib + +from . import __version__ as version +from .cli import init_logger +from .xmlmodel import Hwp5File +from .hwp5html import HTMLTransform + + +PY3 = sys.version_info.major == 3 +logger = logging.getLogger(__name__) +locale_dir = os.path.join(os.path.dirname(__file__), 'locale') +locale_dir = os.path.abspath(locale_dir) +t = gettext.translation('hwp5view', locale_dir, fallback=True) +_ = t.gettext + + +def main(): + argparser = main_argparser() + args = argparser.parse_args() + init_logger(args) + + runner = runner_factory() + + with make_temporary_directory() as out_dir: + with hwp5html(args.hwp5file, out_dir) as index_path: + runner(args, index_path, out_dir) + + +def main_argparser(): + parser = ArgumentParser( + prog='hwp5view', + description=_( + 'HWPv5 viewer (Experimental, gtk only)' + ) + ) + parser.add_argument( + '--version', + action='version', + version='%(prog)s {}'.format(version) + ) + parser.add_argument( + '--loglevel', + help=_('Set log level.'), + ) + parser.add_argument( + '--logfile', + help=_('Set log file.'), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to view'), + ) + return parser + + +def runner_factory(): + try: + return runner_factory_gi() + except ImportError: + pass + + try: + return runner_factory_pyside() + except ImportError: + pass + + raise NotImplementedError( + 'Neither gi.repository.WebKit nor pyside is found' + ) + + +def runner_factory_gi(): + from gi.repository import Gtk + from gi.repository import WebKit + + def runner(args, index_path, out_dir): + base_uri = fspath2url(out_dir) + '/' + # index_uri = fspath2url(index_path) + with io.open(index_path, 'rb') as f: + content = f.read() + + view = WebKit.WebView() + # view.load_uri(index_uri) + view.load_string(content, 'text/html', 'utf-8', base_uri) + + def on_load(webview, webframe): + script = ('window.location.href = "dimension:" ' + '+ document.body.scrollWidth + "x"' + '+ document.body.scrollHeight') + webview.execute_script(script) + + MIN_WIDTH = 300 + MIN_HEIGHT = 400 + MAX_WIDTH = 1024 + MAX_HEIGHT = 800 + + view.connect('load-finished', on_load) + + def on_navigation_requested(webview, frame, req, data=None): + uri = req.get_uri() + scheme, path = uri.split(':', 1) + if scheme == 'dimension': + width, height = path.split('x', 1) + width = int(width) + height = int(height) + width = min(width, MAX_WIDTH) + height = min(height, MAX_HEIGHT) + width = max(width, MIN_WIDTH) + height = max(height, MIN_HEIGHT) + window.resize(width + 4, height) + return True + return False + + view.connect('navigation-requested', on_navigation_requested) + + scrolled_window = Gtk.ScrolledWindow() + scrolled_window.add(view) + + vbox = Gtk.VBox() + vbox.pack_start(scrolled_window, expand=True, fill=True, padding=0) + + window = Gtk.Window() + window.add(vbox) + window.connect('delete-event', Gtk.main_quit) + window.set_default_size(600, 800) + window.show_all() + + Gtk.main() + + return runner + + +def runner_factory_pyside(): + from PySide.QtCore import QUrl + from PySide.QtGui import QApplication + from PySide.QtGui import QMainWindow + from PySide.QtWebKit import QWebView + + class MainWindow(QMainWindow): + pass + + def runner(args, index_path, out_dir): + app = QApplication(sys.argv) + + frame = MainWindow() + frame.setWindowTitle('hwp5view') + frame.setMinimumWidth(400) + + url = fspath2url(index_path) + url = QUrl(url) + view = QWebView(frame) + + logger.info('Loading...') + view.load(url) + + @view.loadFinished.connect + def onLoadFinished(): + frame.show() + + frame.setCentralWidget(view) + + app.exec_() + + return runner + + +@contextmanager +def make_temporary_directory(*args, **kwargs): + path = mkdtemp(*args, **kwargs) + try: + logger.warning('temporary directory for contents: %s', path) + yield path + finally: + shutil.rmtree(path) + + +@contextmanager +def hwp5html(filename, out_dir): + with closing(Hwp5File(filename)) as hwp5file: + HTMLTransform().transform_hwp5_to_dir(hwp5file, out_dir) + yield os.path.join(out_dir, 'index.xhtml') + + +def fspath2url(path): + path = os.path.abspath(path) + return 'file://' + urllib.pathname2url(path) diff --git a/src/hwp5/importhelper.py b/src/hwp5/importhelper.py new file mode 100644 index 0000000000000000000000000000000000000000..1a735054e7c11300617ec9746cbbf2495fa4b887 --- /dev/null +++ b/src/hwp5/importhelper.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import os.path + + +def pkg_resources_filename(pkg_name, path): + ''' the equivalent of pkg_resources.resource_filename() ''' + try: + import pkg_resources + except ImportError: + return pkg_resources_filename_fallback(pkg_name, path) + else: + return pkg_resources.resource_filename(pkg_name, path) + + +def pkg_resources_filename_fallback(pkg_name, path): + ''' a fallback implementation of pkg_resources_filename() ''' + pkg_module = __import__(pkg_name) + pkg_name = pkg_name.split('.') + for x in pkg_name[1:]: + pkg_module = getattr(pkg_module, x) + pkg_dir = os.path.dirname(pkg_module.__file__) + return os.path.join(pkg_dir, path) diff --git a/src/hwp5/locale/hwp5html.pot b/src/hwp5/locale/hwp5html.pot new file mode 100644 index 0000000000000000000000000000000000000000..b6f857266bdac98eb98fdea3d35d096a166f94db --- /dev/null +++ b/src/hwp5/locale/hwp5html.pot @@ -0,0 +1,47 @@ +# Translations template for pyhwp. +# Copyright (C) 2019 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2019. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b13.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5html.py:163 +msgid "HWPv5 to HTML converter" +msgstr "" + +#: pyhwp/hwp5/hwp5html.py:172 +msgid "Set log level." +msgstr "" + +#: pyhwp/hwp5/hwp5html.py:176 +msgid "Set log file." +msgstr "" + +#: pyhwp/hwp5/hwp5html.py:180 +msgid "Output file" +msgstr "" + +#: pyhwp/hwp5/hwp5html.py:185 +msgid ".hwp file to convert" +msgstr "" + +#: pyhwp/hwp5/hwp5html.py:191 +msgid "Generate CSS" +msgstr "" + +#: pyhwp/hwp5/hwp5html.py:196 +msgid "Generate HTML" +msgstr "" + diff --git a/src/hwp5/locale/hwp5odt.pot b/src/hwp5/locale/hwp5odt.pot new file mode 100644 index 0000000000000000000000000000000000000000..50f8b080126bdc3baff5a2c01e6ec3114ac49570 --- /dev/null +++ b/src/hwp5/locale/hwp5odt.pot @@ -0,0 +1,59 @@ +# Translations template for pyhwp. +# Copyright (C) 2019 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2019. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b13.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5odt.py:376 +msgid "HWPv5 to odt converter" +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:385 +msgid "Set log level." +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:389 +msgid "Set log file." +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:393 +msgid "Output file" +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:398 +msgid ".hwp file to convert" +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:404 +msgid "Generate styles.xml" +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:409 +msgid "Generate content.xml" +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:414 +msgid "Generate .fodt" +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:420 +msgid "Embed images in output xml." +msgstr "" + +#: pyhwp/hwp5/hwp5odt.py:425 +msgid "Do not embed images in output xml." +msgstr "" + diff --git a/src/hwp5/locale/hwp5proc.pot b/src/hwp5/locale/hwp5proc.pot new file mode 100644 index 0000000000000000000000000000000000000000..dea5c424eb83f1ec4bf571e58149c37d7e6f14c3 --- /dev/null +++ b/src/hwp5/locale/hwp5proc.pot @@ -0,0 +1,279 @@ +# Translations template for pyhwp. +# Copyright (C) 2019 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2019. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b13.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/proc/__init__.py:75 +msgid "" +"License AGPLv3+: GNU Affero GPL version 3 or any later\n" +".\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." +msgstr "" + +#: pyhwp/hwp5/proc/__init__.py:80 +msgid "" +"Disclosure: This program has been developed in accordance with a public\n" +"document named \"HWP Binary Specification 1.1\" published by Hancom Inc.\n" +"." +msgstr "" + +#: pyhwp/hwp5/proc/__init__.py:127 +msgid "Do various operations on HWPv5 files." +msgstr "" + +#: pyhwp/hwp5/proc/__init__.py:131 +msgid "Set log level." +msgstr "" + +#: pyhwp/hwp5/proc/__init__.py:135 +msgid "Set log file." +msgstr "" + +#: pyhwp/hwp5/proc/__init__.py:138 +msgid "subcommands" +msgstr "" + +#: pyhwp/hwp5/proc/__init__.py:139 +msgid "valid subcommands" +msgstr "" + +#: pyhwp/hwp5/proc/cat.py:55 +msgid "Extract out internal streams of .hwp files" +msgstr "" + +#: pyhwp/hwp5/proc/cat.py:58 +msgid "Extract out the specified stream in the to the standard output." +msgstr "" + +#: pyhwp/hwp5/proc/cat.py:66 pyhwp/hwp5/proc/header.py:53 +#: pyhwp/hwp5/proc/ls.py:107 pyhwp/hwp5/proc/models.py:91 +#: pyhwp/hwp5/proc/records.py:99 pyhwp/hwp5/proc/summaryinfo.py:61 +#: pyhwp/hwp5/proc/unpack.py:54 pyhwp/hwp5/proc/version.py:45 +#: pyhwp/hwp5/proc/xml.py:91 +msgid ".hwp file to analyze" +msgstr "" + +#: pyhwp/hwp5/proc/cat.py:71 +msgid "Internal path of a stream to extract" +msgstr "" + +#: pyhwp/hwp5/proc/cat.py:77 pyhwp/hwp5/proc/ls.py:113 +#: pyhwp/hwp5/proc/unpack.py:66 +msgid "Process with virtual streams (i.e. parsed/converted form of real streams)" +msgstr "" + +#: pyhwp/hwp5/proc/cat.py:85 pyhwp/hwp5/proc/ls.py:121 +#: pyhwp/hwp5/proc/unpack.py:74 +msgid "" +"Treat as an OLE Compound File. As a result, some streams will " +"be presented as-is. (i.e. not decompressed)" +msgstr "" + +#: pyhwp/hwp5/proc/diststream.py:93 pyhwp/hwp5/proc/diststream.py:96 +msgid "Decode a distribute document stream." +msgstr "" + +#: pyhwp/hwp5/proc/diststream.py:104 +msgid "Print SHA-1 value for decryption." +msgstr "" + +#: pyhwp/hwp5/proc/diststream.py:109 +msgid "Print decrypted key." +msgstr "" + +#: pyhwp/hwp5/proc/diststream.py:114 +msgid "Print raw binary objects as is." +msgstr "" + +#: pyhwp/hwp5/proc/find.py:107 pyhwp/hwp5/proc/find.py:110 +msgid "Find record models with specified predicates." +msgstr "" + +#: pyhwp/hwp5/proc/find.py:118 +msgid ".hwp files to analyze" +msgstr "" + +#: pyhwp/hwp5/proc/find.py:123 +msgid "get filenames from stdin" +msgstr "" + +#: pyhwp/hwp5/proc/find.py:129 +msgid "filter with record model name" +msgstr "" + +#: pyhwp/hwp5/proc/find.py:136 +msgid "filter with record HWPTAG" +msgstr "" + +#: pyhwp/hwp5/proc/find.py:143 +msgid "filter with incompletely parsed content" +msgstr "" + +#: pyhwp/hwp5/proc/find.py:148 +msgid "record output format" +msgstr "" + +#: pyhwp/hwp5/proc/find.py:155 +msgid "dump record" +msgstr "" + +#: pyhwp/hwp5/proc/header.py:47 +msgid "Print file headers of .hwp files." +msgstr "" + +#: pyhwp/hwp5/proc/header.py:48 +msgid "Print the file header of ." +msgstr "" + +#: pyhwp/hwp5/proc/ls.py:101 +msgid "List streams in .hwp files." +msgstr "" + +#: pyhwp/hwp5/proc/ls.py:102 +msgid "List streams in the ." +msgstr "" + +#: pyhwp/hwp5/proc/models.py:80 +msgid "Print parsed binary models of .hwp file record streams." +msgstr "" + +#: pyhwp/hwp5/proc/models.py:83 +msgid "Print parsed binary models in the specified ." +msgstr "" + +#: pyhwp/hwp5/proc/models.py:97 pyhwp/hwp5/proc/records.py:105 +msgid "" +"Record-structured internal streams.\n" +"(e.g. DocInfo, BodyText/*)\n" +msgstr "" + +#: pyhwp/hwp5/proc/models.py:106 +msgid "Specifies HWPv5 file format version of the standard input stream" +msgstr "" + +#: pyhwp/hwp5/proc/models.py:114 pyhwp/hwp5/proc/records.py:114 +msgid "Print records as simple tree" +msgstr "" + +#: pyhwp/hwp5/proc/models.py:121 pyhwp/hwp5/proc/records.py:121 +msgid "Print records as json" +msgstr "" + +#: pyhwp/hwp5/proc/models.py:128 +msgid "Print records formatted" +msgstr "" + +#: pyhwp/hwp5/proc/models.py:135 +msgid "Print records as events" +msgstr "" + +#: pyhwp/hwp5/proc/models.py:143 pyhwp/hwp5/proc/records.py:159 +msgid "Specifies the N-th subtree of the record structure." +msgstr "" + +#: pyhwp/hwp5/proc/models.py:150 +msgid "Print a model of -th record" +msgstr "" + +#: pyhwp/hwp5/proc/rawunz.py:46 pyhwp/hwp5/proc/rawunz.py:49 +msgid "Deflate an headerless zlib-compressed stream" +msgstr "" + +#: pyhwp/hwp5/proc/records.py:88 +msgid "Print the record structure of .hwp file record streams." +msgstr "" + +#: pyhwp/hwp5/proc/records.py:91 +msgid "Print the record structure of the specified stream." +msgstr "" + +#: pyhwp/hwp5/proc/records.py:128 +msgid "Print records as is" +msgstr "" + +#: pyhwp/hwp5/proc/records.py:135 +msgid "Print record headers as is" +msgstr "" + +#: pyhwp/hwp5/proc/records.py:142 +msgid "Print record payloads as is" +msgstr "" + +#: pyhwp/hwp5/proc/records.py:150 +msgid "" +"Specifies the range of the records.\n" +"N-M means \"from the record N to M-1 (excluding M)\"\n" +"N means just the record N\n" +msgstr "" + +#: pyhwp/hwp5/proc/summaryinfo.py:51 +msgid "Print summary informations of .hwp files." +msgstr "" + +#: pyhwp/hwp5/proc/summaryinfo.py:54 +msgid "Print the summary information of ." +msgstr "" + +#: pyhwp/hwp5/proc/unpack.py:43 +msgid "Extract out internal streams of .hwp files into a directory." +msgstr "" + +#: pyhwp/hwp5/proc/unpack.py:46 +msgid "Extract out streams in the specified to a directory." +msgstr "" + +#: pyhwp/hwp5/proc/unpack.py:60 +msgid "Output directory" +msgstr "" + +#: pyhwp/hwp5/proc/version.py:35 +msgid "Print the file format version of .hwp files." +msgstr "" + +#: pyhwp/hwp5/proc/version.py:38 +msgid "Print the file format version of ." +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:81 +msgid "Transform .hwp files into an XML." +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:84 +msgid "Transform into an XML." +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:96 +msgid "Embed BinData/* streams in the output XML." +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:101 +msgid "Do not output XML declaration." +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:106 +msgid "Output filename." +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:111 +msgid "\"flat\", \"nested\" (default: \"nested\")" +msgstr "" + +#: pyhwp/hwp5/proc/xml.py:116 +msgid "Do not validate well-formedness of output." +msgstr "" + diff --git a/src/hwp5/locale/hwp5txt.pot b/src/hwp5/locale/hwp5txt.pot new file mode 100644 index 0000000000000000000000000000000000000000..32c1bec4144abdeda4add9af28cb7f9b7c1eeaf0 --- /dev/null +++ b/src/hwp5/locale/hwp5txt.pot @@ -0,0 +1,39 @@ +# Translations template for pyhwp. +# Copyright (C) 2019 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2019. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b13.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5txt.py:95 +msgid "HWPv5 to txt converter" +msgstr "" + +#: pyhwp/hwp5/hwp5txt.py:104 +msgid "Set log level." +msgstr "" + +#: pyhwp/hwp5/hwp5txt.py:108 +msgid "Set log file." +msgstr "" + +#: pyhwp/hwp5/hwp5txt.py:112 +msgid "Output file" +msgstr "" + +#: pyhwp/hwp5/hwp5txt.py:117 +msgid ".hwp file to convert" +msgstr "" + diff --git a/src/hwp5/locale/hwp5view.pot b/src/hwp5/locale/hwp5view.pot new file mode 100644 index 0000000000000000000000000000000000000000..7c995c3dfc52b474fcfd6520f94732ee6a3c1014 --- /dev/null +++ b/src/hwp5/locale/hwp5view.pot @@ -0,0 +1,35 @@ +# Translations template for pyhwp. +# Copyright (C) 2019 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2019. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b13.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5view.py:81 +msgid "HWPv5 viewer (Experimental, gtk only)" +msgstr "" + +#: pyhwp/hwp5/hwp5view.py:92 +msgid "Set log level." +msgstr "" + +#: pyhwp/hwp5/hwp5view.py:96 +msgid "Set log file." +msgstr "" + +#: pyhwp/hwp5/hwp5view.py:101 +msgid ".hwp file to view" +msgstr "" + diff --git a/src/hwp5/locale/ko/LC_MESSAGES/hwp5html.po b/src/hwp5/locale/ko/LC_MESSAGES/hwp5html.po new file mode 100644 index 0000000000000000000000000000000000000000..9be2af71679368cc216e4f53006fd4f54c573961 --- /dev/null +++ b/src/hwp5/locale/ko/LC_MESSAGES/hwp5html.po @@ -0,0 +1,48 @@ +# Korean translations for pyhwp. +# Copyright (C) 2017 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2017. +# +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b9.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: 2017-10-09 00:54+0900\n" +"Last-Translator: FULL NAME \n" +"Language: ko\n" +"Language-Team: ko \n" +"Plural-Forms: nplurals=1; plural=0\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5html.py:163 +msgid "HWPv5 to HTML converter" +msgstr "HWPv5 HTML 변환기" + +#: pyhwp/hwp5/hwp5html.py:172 +msgid "Set log level." +msgstr "로그 수준 지정" + +#: pyhwp/hwp5/hwp5html.py:176 +msgid "Set log file." +msgstr "로그 파일 지정" + +#: pyhwp/hwp5/hwp5html.py:180 +msgid "Output file" +msgstr "출력 파일" + +#: pyhwp/hwp5/hwp5html.py:185 +msgid ".hwp file to convert" +msgstr "변환할 .hwp 파일" + +#: pyhwp/hwp5/hwp5html.py:191 +msgid "Generate CSS" +msgstr "CSS 생성" + +#: pyhwp/hwp5/hwp5html.py:196 +msgid "Generate HTML" +msgstr "HTML 생성" + diff --git a/src/hwp5/locale/ko/LC_MESSAGES/hwp5odt.po b/src/hwp5/locale/ko/LC_MESSAGES/hwp5odt.po new file mode 100644 index 0000000000000000000000000000000000000000..fb5388da613ffdb2fd8bc0c47581aaa09a285df3 --- /dev/null +++ b/src/hwp5/locale/ko/LC_MESSAGES/hwp5odt.po @@ -0,0 +1,60 @@ +# Korean translations for pyhwp. +# Copyright (C) 2017 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2017. +# +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b9.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: 2017-10-09 00:54+0900\n" +"Last-Translator: FULL NAME \n" +"Language: ko\n" +"Language-Team: ko \n" +"Plural-Forms: nplurals=1; plural=0\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5odt.py:376 +msgid "HWPv5 to odt converter" +msgstr "HWPv5 ODT 변환기" + +#: pyhwp/hwp5/hwp5odt.py:385 +msgid "Set log level." +msgstr "로그 수준 지정" + +#: pyhwp/hwp5/hwp5odt.py:389 +msgid "Set log file." +msgstr "로그 파일 지정" + +#: pyhwp/hwp5/hwp5odt.py:393 +msgid "Output file" +msgstr "출력 파일" + +#: pyhwp/hwp5/hwp5odt.py:398 +msgid ".hwp file to convert" +msgstr "변환할 .hwp 파일" + +#: pyhwp/hwp5/hwp5odt.py:404 +msgid "Generate styles.xml" +msgstr "styles.xml 생성" + +#: pyhwp/hwp5/hwp5odt.py:409 +msgid "Generate content.xml" +msgstr "content.xml 생성" + +#: pyhwp/hwp5/hwp5odt.py:414 +msgid "Generate .fodt" +msgstr ".fodt 생성" + +#: pyhwp/hwp5/hwp5odt.py:420 +msgid "Embed images in output xml." +msgstr "화상을 출력 xml에 내장" + +#: pyhwp/hwp5/hwp5odt.py:425 +msgid "Do not embed images in output xml." +msgstr "화상을 출력 xml에 내장하지 않음" + diff --git a/src/hwp5/locale/ko/LC_MESSAGES/hwp5proc.po b/src/hwp5/locale/ko/LC_MESSAGES/hwp5proc.po new file mode 100644 index 0000000000000000000000000000000000000000..2a0d53f1c03d2760452d6c41cfadadaccdff539f --- /dev/null +++ b/src/hwp5/locale/ko/LC_MESSAGES/hwp5proc.po @@ -0,0 +1,300 @@ +# Korean translations for pyhwp. +# pyhwp 한국/조선어 번역 +# Copyright (C) 2017 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# mete0r , 2017. +# +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b9.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: 2017-10-09 00:41+0900\n" +"Last-Translator: FULL NAME \n" +"Language: ko\n" +"Language-Team: ko \n" +"Plural-Forms: nplurals=1; plural=0\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/proc/__init__.py:75 +msgid "" +"License AGPLv3+: GNU Affero GPL version 3 or any later\n" +".\n" +"This is free software: you are free to change and redistribute it.\n" +"There is NO WARRANTY, to the extent permitted by law." +msgstr "" +"AGPLv3+ 사용허락조건: GNU Affero GPL version 3 혹은 그 이후 판본에 의거하여\n" +"이 프로그램의 사용을 허락합니다.\n" +".\n" +"이 프로그램은 자유 소프트웨어입니다: 자유롭게 변경하고 재배포할 수 있습니다.\n" +"또한 이 프로그램은 아무런 보증 없이 제공됩니다. 이 프로그램을 사용하여\n" +"발생한 결과에 대해 저자는 법률이 정한 바 외에는 아무런 책임을 지지 않습니다." + +#: pyhwp/hwp5/proc/__init__.py:80 +msgid "" +"Disclosure: This program has been developed in accordance with a public\n" +"document named \"HWP Binary Specification 1.1\" published by Hancom Inc.\n" +"." +msgstr "" +"공개: 본 제품은 한글과컴퓨터의 한글 문서 파일(.hwp) 공개 문서를\n" +"참고하여 개발하였습니다.\n" +"." + +#: pyhwp/hwp5/proc/__init__.py:127 +msgid "Do various operations on HWPv5 files." +msgstr "HWPv5 파일로 작업 수행" + +#: pyhwp/hwp5/proc/__init__.py:131 +msgid "Set log level." +msgstr "로그 수준 지정" + +#: pyhwp/hwp5/proc/__init__.py:135 +msgid "Set log file." +msgstr "로그 파일 지정" + +#: pyhwp/hwp5/proc/__init__.py:138 +msgid "subcommands" +msgstr "하위 명령" + +#: pyhwp/hwp5/proc/__init__.py:139 +msgid "valid subcommands" +msgstr "유효한 하위 명령" + +#: pyhwp/hwp5/proc/cat.py:55 +msgid "Extract out internal streams of .hwp files" +msgstr ".hwp 파일의 내부 스트림을 출력합니다." + +#: pyhwp/hwp5/proc/cat.py:58 +msgid "Extract out the specified stream in the to the standard output." +msgstr "의 지정된 스트림을 표준 출력으로 추출합니다." + +#: pyhwp/hwp5/proc/cat.py:66 pyhwp/hwp5/proc/header.py:53 +#: pyhwp/hwp5/proc/ls.py:107 pyhwp/hwp5/proc/models.py:91 +#: pyhwp/hwp5/proc/records.py:99 pyhwp/hwp5/proc/summaryinfo.py:61 +#: pyhwp/hwp5/proc/unpack.py:54 pyhwp/hwp5/proc/version.py:45 +#: pyhwp/hwp5/proc/xml.py:91 +msgid ".hwp file to analyze" +msgstr "분석할 .hwp 파일" + +#: pyhwp/hwp5/proc/cat.py:71 +msgid "Internal path of a stream to extract" +msgstr "추출할 스트림 내부 경로" + +#: pyhwp/hwp5/proc/cat.py:77 pyhwp/hwp5/proc/ls.py:113 +#: pyhwp/hwp5/proc/unpack.py:66 +msgid "Process with virtual streams (i.e. parsed/converted form of real streams)" +msgstr "가상 스트림 처리 (실제 스트림의 파싱/변환 형)" + +#: pyhwp/hwp5/proc/cat.py:85 pyhwp/hwp5/proc/ls.py:121 +#: pyhwp/hwp5/proc/unpack.py:74 +msgid "" +"Treat as an OLE Compound File. As a result, some streams will " +"be presented as-is. (i.e. not decompressed)" +msgstr "" +"을 OLE Compound File로 취급합니다. 결과적으로 몇몇 스트림은 있는 그대로 나타납니다. 가령 압축해제" +" 되지 않습니다." + +#: pyhwp/hwp5/proc/diststream.py:93 pyhwp/hwp5/proc/diststream.py:96 +msgid "Decode a distribute document stream." +msgstr "배포 문서 스트림을 디코드합니다." + +#: pyhwp/hwp5/proc/diststream.py:104 +msgid "Print SHA-1 value for decryption." +msgstr "복호화를 위한 SHA-1 값을 출력합니다." + +#: pyhwp/hwp5/proc/diststream.py:109 +msgid "Print decrypted key." +msgstr "복호화된 키를 출력합니다." + +#: pyhwp/hwp5/proc/diststream.py:114 +msgid "Print raw binary objects as is." +msgstr "바이너리 객체를 있는 그대로 출력합니다." + +#: pyhwp/hwp5/proc/find.py:107 pyhwp/hwp5/proc/find.py:110 +msgid "Find record models with specified predicates." +msgstr "지정한 조건으로 레코드 모형을 검색합니다." + +#: pyhwp/hwp5/proc/find.py:118 +msgid ".hwp files to analyze" +msgstr "분석할 .hwp 파일" + +#: pyhwp/hwp5/proc/find.py:123 +msgid "get filenames from stdin" +msgstr "표준 출력에서 파일 이름을 얻습니다." + +#: pyhwp/hwp5/proc/find.py:129 +msgid "filter with record model name" +msgstr "레코드 모형 이름으로 찾습니다." + +#: pyhwp/hwp5/proc/find.py:136 +msgid "filter with record HWPTAG" +msgstr "레코드 HWPTAG 값으로 찾습니다." + +#: pyhwp/hwp5/proc/find.py:143 +msgid "filter with incompletely parsed content" +msgstr "내용이 완전히 다 파싱되지 않은 레코드를 찾습니다." + +#: pyhwp/hwp5/proc/find.py:148 +msgid "record output format" +msgstr "레코드 출력 형식 지정" + +#: pyhwp/hwp5/proc/find.py:155 +msgid "dump record" +msgstr "레코드 덤프" + +#: pyhwp/hwp5/proc/header.py:47 +msgid "Print file headers of .hwp files." +msgstr ".hwp 파일의 헤더를 출력합니다." + +#: pyhwp/hwp5/proc/header.py:48 +msgid "Print the file header of ." +msgstr "의 파일 헤더를 출력합니다." + +#: pyhwp/hwp5/proc/ls.py:101 +msgid "List streams in .hwp files." +msgstr ".hwp 파일의 내부 스트림 목록을 출력합니다." + +#: pyhwp/hwp5/proc/ls.py:102 +msgid "List streams in the ." +msgstr "의 스트림 목록을 출력합니다." + +#: pyhwp/hwp5/proc/models.py:80 +msgid "Print parsed binary models of .hwp file record streams." +msgstr "지정한 .hwp 파일 레코드 스트림을 파싱한 레코드 모형을 출력합니다." + +#: pyhwp/hwp5/proc/models.py:83 +msgid "Print parsed binary models in the specified ." +msgstr "지정한 을 파싱한 레코드 모형을 출력합니다." + +#: pyhwp/hwp5/proc/models.py:97 pyhwp/hwp5/proc/records.py:105 +msgid "" +"Record-structured internal streams.\n" +"(e.g. DocInfo, BodyText/*)\n" +msgstr "" +"레코드 구조를 갖는 내부 스트림.\n" +"(예: DocInfo, BodyText/*)\n" + +#: pyhwp/hwp5/proc/models.py:106 +msgid "Specifies HWPv5 file format version of the standard input stream" +msgstr "표준 입력의 HWPv5 파일 포맷 버젼을 지정합니다." + +#: pyhwp/hwp5/proc/models.py:114 pyhwp/hwp5/proc/records.py:114 +msgid "Print records as simple tree" +msgstr "레코드열을 단순한 수형도樹形圖로 출력합니다." + +#: pyhwp/hwp5/proc/models.py:121 pyhwp/hwp5/proc/records.py:121 +msgid "Print records as json" +msgstr "레코드를 JSON 형식으로 출력합니다." + +#: pyhwp/hwp5/proc/models.py:128 +msgid "Print records formatted" +msgstr "레코드를 포맷팅하여 출력합니다." + +#: pyhwp/hwp5/proc/models.py:135 +msgid "Print records as events" +msgstr "레코드를 이벤트로 출력합니다." + +#: pyhwp/hwp5/proc/models.py:143 pyhwp/hwp5/proc/records.py:159 +msgid "Specifies the N-th subtree of the record structure." +msgstr "레코드 구조의 N번째 하위 트리를 지정합니다." + +#: pyhwp/hwp5/proc/models.py:150 +msgid "Print a model of -th record" +msgstr "번째 레코드의 모형을 출력합니다." + +#: pyhwp/hwp5/proc/rawunz.py:46 pyhwp/hwp5/proc/rawunz.py:49 +msgid "Deflate an headerless zlib-compressed stream" +msgstr "헤더 없는 zlib 압축 스트림을 복원합니다." + +#: pyhwp/hwp5/proc/records.py:88 +msgid "Print the record structure of .hwp file record streams." +msgstr ".hwp 파일 레코드 스트림의 레코드 구조를 출력합니다." + +#: pyhwp/hwp5/proc/records.py:91 +msgid "Print the record structure of the specified stream." +msgstr "지정한 스트림의 레코드 구조를 출력합니다." + +#: pyhwp/hwp5/proc/records.py:128 +msgid "Print records as is" +msgstr "레코드를 있는 그대로 출력합니다." + +#: pyhwp/hwp5/proc/records.py:135 +msgid "Print record headers as is" +msgstr "레코드 헤더를 있는 그대로 출력합니다." + +#: pyhwp/hwp5/proc/records.py:142 +msgid "Print record payloads as is" +msgstr "레코드 페이로드를 있는 그대로 출력합니다." + +#: pyhwp/hwp5/proc/records.py:150 +msgid "" +"Specifies the range of the records.\n" +"N-M means \"from the record N to M-1 (excluding M)\"\n" +"N means just the record N\n" +msgstr "" +"레코드의 범위를 지정합니다.\n" +"N-M은 \"N번째 레코드로부터 M-1번째 레코드까지를 뜻합니다(M번째 레코드 제외)\"\n" +"N은 N번째 레코드를 뜻합니다.\n" + +#: pyhwp/hwp5/proc/summaryinfo.py:51 +msgid "Print summary informations of .hwp files." +msgstr ".hwp 파일의 요약 정보를 출력합니다." + +#: pyhwp/hwp5/proc/summaryinfo.py:54 +msgid "Print the summary information of ." +msgstr "의 요약 정보를 출력합니다." + +#: pyhwp/hwp5/proc/unpack.py:43 +msgid "Extract out internal streams of .hwp files into a directory." +msgstr ".hwp 파일의 내부 스트림들을 디렉토리에 추출합니다." + +#: pyhwp/hwp5/proc/unpack.py:46 +msgid "Extract out streams in the specified to a directory." +msgstr "지정한 의 내부 스트림들을 디렉토리에 추출합니다." + +#: pyhwp/hwp5/proc/unpack.py:60 +msgid "Output directory" +msgstr "출력 디렉토리" + +#: pyhwp/hwp5/proc/version.py:35 +msgid "Print the file format version of .hwp files." +msgstr ".hwp 파일의 형식 판본을 출력합니다." + +#: pyhwp/hwp5/proc/version.py:38 +msgid "Print the file format version of ." +msgstr "의 파일 형식 판본을 출력합니다." + +#: pyhwp/hwp5/proc/xml.py:81 +msgid "Transform .hwp files into an XML." +msgstr ".hwp 파일을 XML로 변환합니다." + +#: pyhwp/hwp5/proc/xml.py:84 +msgid "Transform into an XML." +msgstr "을 XML로 변환합니다." + +#: pyhwp/hwp5/proc/xml.py:96 +msgid "Embed BinData/* streams in the output XML." +msgstr "BinData/* 스트림을 출력 XML에 내장합니다." + +#: pyhwp/hwp5/proc/xml.py:101 +msgid "Do not output XML declaration." +msgstr " XML 선언을 출력하지 않습니다." + +#: pyhwp/hwp5/proc/xml.py:106 +msgid "Output filename." +msgstr "출력 파일이름" + +#: pyhwp/hwp5/proc/xml.py:111 +msgid "\"flat\", \"nested\" (default: \"nested\")" +msgstr "\"flat\", \"nested\" (기본값: \"nested\")" + +#: pyhwp/hwp5/proc/xml.py:116 +msgid "Do not validate well-formedness of output." +msgstr "출력의 well-formed 여부를 검증하지 않습니다." + +#~ msgid "Print HWP file header." +#~ msgstr "HWP 파일 헤더를 출력합니다." + diff --git a/src/hwp5/locale/ko/LC_MESSAGES/hwp5txt.po b/src/hwp5/locale/ko/LC_MESSAGES/hwp5txt.po new file mode 100644 index 0000000000000000000000000000000000000000..f2357646633423e56c23bd895fd96a6f850adaf3 --- /dev/null +++ b/src/hwp5/locale/ko/LC_MESSAGES/hwp5txt.po @@ -0,0 +1,40 @@ +# Korean translations for pyhwp. +# Copyright (C) 2017 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2017. +# +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b9.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: 2017-10-09 00:54+0900\n" +"Last-Translator: FULL NAME \n" +"Language: ko\n" +"Language-Team: ko \n" +"Plural-Forms: nplurals=1; plural=0\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5txt.py:95 +msgid "HWPv5 to txt converter" +msgstr "HWPv5 txt 변환기" + +#: pyhwp/hwp5/hwp5txt.py:104 +msgid "Set log level." +msgstr "로그 수준 지정" + +#: pyhwp/hwp5/hwp5txt.py:108 +msgid "Set log file." +msgstr "로그 파일 지정" + +#: pyhwp/hwp5/hwp5txt.py:112 +msgid "Output file" +msgstr "출력 파일" + +#: pyhwp/hwp5/hwp5txt.py:117 +msgid ".hwp file to convert" +msgstr "변환할 .hwp 파일" + diff --git a/src/hwp5/locale/ko/LC_MESSAGES/hwp5view.po b/src/hwp5/locale/ko/LC_MESSAGES/hwp5view.po new file mode 100644 index 0000000000000000000000000000000000000000..27df7524a164218e9fb353f9c04b09e39d89bfb0 --- /dev/null +++ b/src/hwp5/locale/ko/LC_MESSAGES/hwp5view.po @@ -0,0 +1,36 @@ +# Korean translations for pyhwp. +# Copyright (C) 2017 https://github.com/mete0r +# This file is distributed under the same license as the pyhwp project. +# FIRST AUTHOR , 2017. +# +msgid "" +msgstr "" +"Project-Id-Version: pyhwp 0.1b9.dev0\n" +"Report-Msgid-Bugs-To: https://github.com/mete0r\n" +"POT-Creation-Date: 2019-04-10 22:55+0900\n" +"PO-Revision-Date: 2017-10-09 02:08+0900\n" +"Last-Translator: FULL NAME \n" +"Language: ko\n" +"Language-Team: ko \n" +"Plural-Forms: nplurals=1; plural=0\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.5.0\n" + +#: pyhwp/hwp5/hwp5view.py:81 +msgid "HWPv5 viewer (Experimental, gtk only)" +msgstr "HWPv5 뷰어 (실험적, GTK만 지원)" + +#: pyhwp/hwp5/hwp5view.py:92 +msgid "Set log level." +msgstr "로그 수준 지정" + +#: pyhwp/hwp5/hwp5view.py:96 +msgid "Set log file." +msgstr "로그 파일 지정" + +#: pyhwp/hwp5/hwp5view.py:101 +msgid ".hwp file to view" +msgstr "표시할 .hwp 파일" + diff --git a/src/hwp5/msoleprops.py b/src/hwp5/msoleprops.py new file mode 100644 index 0000000000000000000000000000000000000000..c6b6dad124f68a14bcf0724965cca4b7d606c047 --- /dev/null +++ b/src/hwp5/msoleprops.py @@ -0,0 +1,677 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from collections import namedtuple +from datetime import datetime +from datetime import timedelta +from uuid import UUID +import logging +import struct + +from hwp5.dataio import Struct +from hwp5.dataio import Flags +from hwp5.dataio import N_ARRAY +from hwp5.dataio import ARRAY +from hwp5.dataio import BYTE +from hwp5.dataio import UINT16 +from hwp5.dataio import UINT32 +from hwp5.dataio import INT32 +from hwp5.bintype import read_type + + +logger = logging.getLogger(__name__) + + +vt_types = dict() + + +def PropertyType(code): + + def decorator(cls): + cls.code = code + vt_types[code] = cls + return cls + + return decorator + + +@PropertyType(code=0x0003) +class VT_I4(object): + + @classmethod + def read_value(cls, context, f): + return read_type(INT32, context, f) + + +@PropertyType(code=0x001F) +class VT_LPWSTR(object): + + @classmethod + def read_value(cls, context, f): + length = read_type(UINT32, context, f) + data = f.read(length * 2) + return data.decode('utf-16le')[:-1] # remove null character + + +@PropertyType(code=0x0040) +class VT_FILETIME(object): + + @classmethod + def read_value(cls, context, f): + lword = read_type(UINT32, context, f) + hword = read_type(UINT32, context, f) + value = hword << 32 | lword + value = FILETIME(value) + return value + + +class FILETIME(object): + __slots__ = ('value', ) + + def __init__(self, value): + self.value = value + + def __str__(self): + return str(self.datetime) + + @property + def datetime(self): + return ( + datetime(1601, 1, 1, 0, 0, 0) + + timedelta(microseconds=self.value / 10) + ) + + +PropertyIdentifier = namedtuple('PropertyIdentifier', [ + 'id', + 'label', +]) + + +PID_DICTIONARY = PropertyIdentifier( + id=0x00000000, + label='PID_DICTIONARY', +) +PID_CODEPAGE = PropertyIdentifier( + id=0x00000001, + label='PID_CODEPAGE', +) +PID_LOCALE = PropertyIdentifier( + id=0x80000000, + label='PID_LOCALE', +) +PID_BEHAVIOR = PropertyIdentifier( + id=0x80000003, + label='PID_BEHAVIOR', +) +PIDSI_TITLE = PropertyIdentifier( + id=0x02, + label='PIDSI_TITLE' +) +PIDSI_SUBJECT = PropertyIdentifier( + id=0x03, + label='PIDSI_SUBJECT' +) +PIDSI_AUTHOR = PropertyIdentifier( + id=0x04, + label='PIDSI_AUTHOR' +) +PIDSI_KEYWORDS = PropertyIdentifier( + id=0x05, + label='PIDSI_KEYWORDS' +) +PIDSI_COMMENTS = PropertyIdentifier( + id=0x06, + label='PIDSI_COMMENTS' +) +PIDSI_TEMPLATE = PropertyIdentifier( + id=0x07, + label='PIDSI_TEMPLATE' +) +PIDSI_LASTAUTHOR = PropertyIdentifier( + id=0x08, + label='PIDSI_LASTAUTHOR' +) +PIDSI_REVNUMBER = PropertyIdentifier( + id=0x09, + label='PIDSI_REVNUMBER' +) +PIDSI_EDITTIME = PropertyIdentifier( + id=0x0a, + label='PIDSI_EDITTIME' +) +PIDSI_LASTPRINTED = PropertyIdentifier( + id=0x0b, + label='PIDSI_LASTPRINTED' +) +PIDSI_CREATE_DTM = PropertyIdentifier( + id=0x0c, + label='PIDSI_CREATE_DTM' +) +PIDSI_LASTSAVE_DTM = PropertyIdentifier( + id=0x0d, + label='PIDSI_LASTSAVE_DTM' +) +PIDSI_PAGECOUNT = PropertyIdentifier( + id=0x0e, + label='PIDSI_PAGECOUNT' +) +PIDSI_WORDCOUNT = PropertyIdentifier( + id=0x0f, + label='PIDSI_WORDCOUNT' +) +PIDSI_CHARCOUNT = PropertyIdentifier( + id=0x10, + label='PIDSI_CHARCOUNT' +) +PIDSI_THUMBNAIL = PropertyIdentifier( + id=0x11, + label='PIDSI_THUMBNAIL' +) +PIDSI_APPNAME = PropertyIdentifier( + id=0x12, + label='PIDSI_APPNAME' +) +PIDSI_SECURITY = PropertyIdentifier( + id=0x13, + label='PIDSI_SECURITY' +) + + +RESERVED_PROPERTIES = ( + PID_DICTIONARY, + PID_CODEPAGE, + PID_LOCALE, + PID_BEHAVIOR, +) + + +SUMMARY_INFORMATION_PROPERTIES = ( + PIDSI_TITLE, + PIDSI_SUBJECT, + PIDSI_AUTHOR, + PIDSI_KEYWORDS, + PIDSI_COMMENTS, + PIDSI_TEMPLATE, + PIDSI_LASTAUTHOR, + PIDSI_REVNUMBER, + PIDSI_EDITTIME, + PIDSI_LASTPRINTED, + PIDSI_CREATE_DTM, + PIDSI_LASTSAVE_DTM, + PIDSI_PAGECOUNT, + PIDSI_WORDCOUNT, + PIDSI_CHARCOUNT, + PIDSI_THUMBNAIL, + PIDSI_APPNAME, + PIDSI_SECURITY, +) + + +class Property(object): + + def __init__(self, desc, idLabel, type, value): + self.desc = desc + self.idLabel = idLabel + self.type = type + self.value = value + + @property + def id(self): + return self.desc.id + + +class PropertyDesc(Struct): + + def __init__(self, id, offset): + self.id = id + self.offset = offset + + @classmethod + def fromDict(cls, d): + return cls(id=d['id'], offset=d['offset']) + + def attributes(): + yield UINT32, 'id' + yield UINT32, 'offset' # offset from section start + attributes = staticmethod(attributes) + + +class PropertyReader(object): + + def __init__(self, propsetDesc, propDesc, idLabel, codepage, + displayName=None): + self.propsetDesc = propsetDesc + self.propDesc = propDesc + self.idLabel = idLabel + self.codepage = codepage + self.displayName = displayName + + def read(self, f): + f.seek(self.propsetDesc.offset + self.propDesc.offset) + + context = {} + propType = read_type(TypedPropertyValue, context, f) + propType = TypedPropertyValue.fromDict(propType) + vt_type = vt_types[propType.code] + propValue = vt_type.read_value(context, f) + + return Property( + desc=self.propDesc, + idLabel=self.idLabel, + type=propType, + value=propValue, + ) + + +class TypedPropertyValue(Struct): + ''' + [MS-OLEPS] 2.15 TypedPropertyValue + ''' + + def __init__(self, code): + self.code = code + + @classmethod + def fromDict(cls, d): + return cls(code=d['type'].code) + + TypeFlags = Flags(UINT32, + 0, 16, 'code') + + def attributes(cls): + yield cls.TypeFlags, 'type' + attributes = classmethod(attributes) + + @property + def vt_type(self): + try: + return vt_types[self.code] + except KeyError: + return None + + +class DictionaryEntry(Struct): + ''' + [MS-OLEPS] 2.16 DictionaryEntry + ''' + + def __init__(self, id, name): + self.id = id + self.name = name + + @classmethod + def fromDict(cls, d): + return cls( + id=d['id'], + name=nullterminated_string(d['name']), + ) + + def attributes(): + from hwp5.dataio import N_ARRAY + from hwp5.dataio import BYTE + yield UINT32, 'id' + yield N_ARRAY(UINT32, BYTE), 'name' + attributes = staticmethod(attributes) + + +class Dictionary(Struct): + ''' + [MS-OLEPS] 2.17 Dictionary + ''' + + def __init__(self, entries): + self.entries = entries + + @classmethod + def fromDict(cls, d): + entries = tuple( + DictionaryEntry.fromDict(entry) + for entry in d['entries'] + ) + return cls(entries=entries) + + def attributes(): + from hwp5.dataio import N_ARRAY + yield N_ARRAY(UINT32, DictionaryEntry), 'entries' + attributes = staticmethod(attributes) + + def get(self, id, defvalue=None): + for entry in self.entries: + if id == entry.id: + return entry.name + return defvalue + + +class DictionaryReader(object): + + def __init__(self, propsetDesc, propDesc, idLabel, codepage): + self.propsetDesc = propsetDesc + self.propDesc = propDesc + self.idLabel = idLabel + self.codepage = codepage + + def read(self, f): + propsetDesc = self.propsetDesc + propDesc = self.propDesc + idLabel = self.idLabel + + f.seek(propsetDesc.offset + propDesc.offset) + context = {} + propType = None + propValue = read_type(Dictionary, context, f) + propValue = Dictionary.fromDict(propValue) + return Property( + desc=propDesc, + idLabel=idLabel, + type=propType, + value=propValue, + ) + + +class PropertySet(object): + ''' + [MS-OLEPS] 2.20 PropertySet + ''' + + def __init__(self, desc, header, properties): + self.desc = desc + self.header = header + self.properties = properties + + @property + def fmtid(self): + return self.desc.fmtid + + def __getitem__(self, propertyIdentifier): + for property in self.properties: + if property.id == propertyIdentifier.id: + return property.value + raise KeyError(propertyIdentifier) + + +class PropertySetHeader(Struct): + + def __init__(self, bytesize, propDescList): + self.bytesize = bytesize, + self.propDescList = propDescList + + @classmethod + def fromDict(cls, d): + return cls( + bytesize=d['bytesize'], + propDescList=tuple( + PropertyDesc.fromDict( + propDesc + ) + for propDesc in d['propDescList'] + ), + ) + + def attributes(): + from hwp5.dataio import N_ARRAY + yield UINT32, 'bytesize' + yield N_ARRAY(UINT32, PropertyDesc), 'propDescList' + attributes = staticmethod(attributes) + + +class PropertySetDesc(Struct): + + def __init__(self, fmtid, offset): + self.fmtid = fmtid + self.offset = offset + + def attributes(): + yield ARRAY(BYTE, 16), 'fmtid' + yield UINT32, 'offset' + attributes = staticmethod(attributes) + + @classmethod + def fromDict(cls, d): + return cls( + fmtid=uuid_from_bytes_tuple(d['fmtid']), + offset=d['offset'], + ) + + +class PropertySetStreamHeader(Struct): + + def __init__(self, byteOrder, version, systemIdentifier, clsid, + propsetDescList): + self.byteOrder = byteOrder + self.version = version + self.systemIdentifier = systemIdentifier + self.clsid = clsid + self.propsetDescList = propsetDescList + + @classmethod + def fromDict(cls, d): + return cls( + byteOrder=d['byteOrder'], + version=d['version'], + systemIdentifier=d['systemIdentifier'], + clsid=uuid_from_bytes_tuple(d['clsid']), + propsetDescList=tuple( + PropertySetDesc.fromDict( + propsetDesc + ) + for propsetDesc in d['propsetDescList'] + ) + ) + + def attributes(): + yield UINT16, 'byteOrder' + yield UINT16, 'version' + yield UINT32, 'systemIdentifier' + yield ARRAY(BYTE, 16), 'clsid' + yield N_ARRAY(UINT32, PropertySetDesc), 'propsetDescList' + attributes = staticmethod(attributes) + + +class PropertySetStream(object): + ''' + [MS-OLEPS] 2.21 PropertySetStream + ''' + + def __init__(self, header, propertysets): + self.header = header + self.propertysets = propertysets + + @property + def byteOrder(self): + return self.header.byteOrder + + @property + def version(self): + return self.header.version + + @property + def systemIdentifier(self): + return self.header.systemIdentifier + + @property + def clsid(self): + return self.header.clsid + + +class PropertySetFormat(object): + + def __init__(self, fmtid, propertyIdentifiers): + self.fmtid = fmtid + self.propertyIdentifiers = propertyIdentifiers + + @property + def idLabels(self): + return { + p.id: p.label + for p in self.propertyIdentifiers + } + + +class PropertySetStreamReader(object): + + def __init__(self, propertySetFormats): + self.propertySetFormats = { + propsetFormat.fmtid: propsetFormat + for propsetFormat in propertySetFormats + } + + def read(self, f): + context = {} + streamHeader = read_type(PropertySetStreamHeader, context, f) + streamHeader = PropertySetStreamHeader.fromDict(streamHeader) + propertysetList = list() + for propsetDesc in streamHeader.propsetDescList: + f.seek(propsetDesc.offset) + propsetHeader = read_type(PropertySetHeader, context, f) + propsetHeader = PropertySetHeader.fromDict( + propsetHeader, + ) + try: + propsetFormat = self.propertySetFormats[propsetDesc.fmtid] + except KeyError: + idLabels = {} + else: + idLabels = propsetFormat.idLabels + + properties = [] + propDescMap = { + propDesc.id: propDesc + for propDesc in propsetHeader.propDescList + } + + propDesc = propDescMap.pop(PID_CODEPAGE.id, None) + if propDesc is not None: + idLabel = idLabels.get(propDesc.id) + propReader = PropertyReader( + propsetDesc=propsetDesc, + propDesc=propDesc, + idLabel=idLabel, + codepage=None, + displayName=None, + ) + prop = propReader.read(f) + properties.append(prop) + + codepage = prop.value + else: + codepage = None + + propDesc = propDescMap.pop(PID_DICTIONARY.id, None) + if propDesc is not None: + idLabel = idLabels.get(propDesc.id) + propReader = DictionaryReader( + propsetDesc, + propDesc, + idLabel, + codepage, + ) + prop = propReader.read(f) + properties.append(prop) + + dictionary = prop.value + else: + dictionary = None + + for propDesc in propDescMap.values(): + idLabel = idLabels.get(propDesc.id) + displayName = dictionary.get(propDesc.id, None) + propReader = PropertyReader( + propsetDesc=propsetDesc, + propDesc=propDesc, + idLabel=idLabel, + codepage=codepage, + displayName=displayName, + ) + prop = propReader.read(f) + properties.append(prop) + + propertyset = PropertySet( + desc=propsetDesc, + header=propsetHeader, + properties=properties, + ) + propertysetList.append(propertyset) + + return PropertySetStream( + header=streamHeader, + propertysets=propertysetList, + ) + + +class PropertySetStreamTextFormatter(object): + + def formatTextLines(self, stream): + yield '- ByteOrder: 0x%x' % stream.byteOrder + yield '- Version: %d' % stream.version + yield '- SystemIdentifier: 0x%08x' % stream.systemIdentifier + yield '- CLSID: %s' % stream.clsid + yield '' + + for propertyset in stream.propertysets: + title = 'Property Set {}'.format( + propertyset.fmtid, + ) + yield '- {:08x}: {}'.format( + propertyset.desc.offset, + title, + ) + yield ' {}'.format( + '-' * len(title) + ) + + properties = sorted( + propertyset.properties, + key=lambda property: property.desc.offset, + ) + for property in properties: + if property.id == PID_DICTIONARY.id: + yield '- {:08x}: {}(=0x{:08x}):'.format( + propertyset.desc.offset + property.desc.offset, + property.idLabel if property.idLabel is not None + else '', + property.id, + ) + for entry in property.value.entries: + yield ' - {}: {}'.format( + entry.id, + entry.name, + ) + else: + yield '- {:08x}: {}(=0x{:08x}): {}'.format( + propertyset.desc.offset + property.desc.offset, + property.idLabel if property.idLabel is not None + else '', + property.id, + property.value + ) + + +def uuid_from_bytes_tuple(t): + fmt = 'B' * len(t) + fmt = '<' + fmt + bytes_le = struct.pack(fmt, *t) + return UUID(bytes_le=bytes_le) + + +def nullterminated_string(bs): + return ''.join(chr(x) for x in bs)[:-1] diff --git a/src/hwp5/odf-relaxng/OpenDocument-manifest-schema-v1.0-os.rng b/src/hwp5/odf-relaxng/OpenDocument-manifest-schema-v1.0-os.rng new file mode 100644 index 0000000000000000000000000000000000000000..97fe580eab9a4a690ee8ea9bf719a57dac9d7bde --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-manifest-schema-v1.0-os.rng @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-manifest-schema-v1.1.rng b/src/hwp5/odf-relaxng/OpenDocument-manifest-schema-v1.1.rng new file mode 100644 index 0000000000000000000000000000000000000000..4082d4ba95d67d9102b9b932ac88fe3283b44193 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-manifest-schema-v1.1.rng @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-schema-v1.0-os.rng b/src/hwp5/odf-relaxng/OpenDocument-schema-v1.0-os.rng new file mode 100644 index 0000000000000000000000000000000000000000..cf4ee51741a5079c863043490347436739e50433 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-schema-v1.0-os.rng @@ -0,0 +1,17666 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + boolean + short + int + long + double + string + datetime + base64Binary + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + simple + + + + + replace + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + float + + + + + + date + + + + + + time + + + + + + boolean + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + none + + + + + condition + + + + + + + + + + + + + + + + + + + + + simple + + + + + embed + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + footnote + endnote + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + previous + current + next + + + + + + + + + + + + + + previous + next + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + number + number-and-name + plain-number-and-name + plain-number + + + + + + + + + + + + + + + + + + + full + path + name + name-and-extension + + + + + + + + + + + + + + + + + + full + path + name + name-and-extension + area + title + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:page-count + text:paragraph-count + text:word-count + text:character-count + text:table-count + text:image-count + text:object-count + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + command + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + table + text-box + image + object + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:reference-ref + text:bookmark-ref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + page + chapter + direction + text + + + + + + + + + page + chapter + direction + text + category-and-value + caption + value + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value + unit + gap + + + + + + + + + + + + + + + + + + + + + + + + + float + + + + + + + + percentage + + + + + + + + currency + + + + + + + + + + + + + date + + + + + + + + time + + + + + + + + boolean + + + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value + none + + + + + + + + + value + formula + none + + + + + + + + + value + formula + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:identifier + text:address + text:annote + text:author + text:booktitle + text:chapter + text:edition + text:editor + text:howpublished + text:institution + text:journal + text:month + text:note + text:number + text:organizations + text:pages + text:publisher + text:school + text:series + text:title + text:report-type + text:volume + text:year + text:url + text:custom1 + text:custom2 + text:custom3 + text:custom4 + text:custom5 + text:isbn + text:issn + + + + + + + + + + article + book + booklet + conference + custom1 + custom2 + custom3 + custom4 + custom5 + email + inbook + incollection + inproceedings + journal + manual + mastersthesis + misc + phdthesis + proceedings + techreport + unpublished + www + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + category-and-value + caption + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + 1 + 2 + 3 + separator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + number + number-and-name + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + address + annote + author + bibliography-type + booktitle + chapter + custom1 + custom2 + custom3 + custom4 + custom5 + edition + editor + howpublished + identifier + institution + isbn + issn + journal + month + note + number + organizations + pages + publisher + report-type + school + series + title + url + volume + year + + + + + + + + + + + + + + + + + + + + + + + + right + + + + left + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + collapse + filter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ($?([^\. ']+|'[^']+'))?\.$?[A-Z]+$?[0-9]+ + + + + + ($?([^\. ']+|'[^']+'))?\.$?[A-Z]+$?[0-9]+(:($?([^\. ']+|'[^']+'))?\.$?[A-Z]+$?[0-9]+)? + + + + + + + + + + + + + + + + + + + copy-all + copy-results-only + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trace-dependents + remove-dependents + trace-precedents + remove-precedents + trace-errors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + from-another-table + to-another-table + from-same-table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + enable + disable + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + unsorted + sort-ascending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stop + warning + information + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + row + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + + print-range + filter + repeat-row + repeat-column + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + row + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + automatic + + + + + + + + + + ascending + descending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + automatic + + + + + + + + + + ascending + descending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + self + cell-range + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + data + hidden + + + + + page + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + from-top + from-bottom + + + + + + + + + + + + + + data + + + + + + + + none + manual + name + + + + + + + + ascending + descending + + + + + + + + + + + + + tabular-layout + outline-subtotals-top + outline-subtotals-bottom + + + + + + + + + + + + + + + + + + + + + + + named + + + + + + + + previous + next + + + + + + + + none + member-difference + member-percentage + member-percentage-difference + running-total + row-percentage + column-percentage + total-percentage + index + + + + + + + + + + + + + + + + + + + + + + auto + + + + + + auto + + + + + + + + + + auto + + + + + + auto + + + + + + + + + + + + + seconds + minutes + hours + days + months + quarters + years + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + accepted + rejected + pending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + always + screen + printer + none + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + false + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + full + section + cut + arc + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + standard + lines + line + curve + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + page + frame + paragraph + char + as-char + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top-left + top + top-right + left + center + right + bottom-left + bottom-right + + + + + + + auto + left + right + up + down + horizontal + vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scale + scale-min + + + + + + + + scale + scale-min + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + onRequest + + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + nohref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parallel + perspective + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + phong + gouraud + draft + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + non-primitive + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + phong + gouraud + draft + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parallel + perspective + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + segments + rectangle + + + + + + + + + + + + + + + + + + + + + normal + path + shape + + + + + + + + + path + shape + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + title + outline + subtitle + text + graphic + object + chart + table + orgchart + page + notes + handout + header + footer + date-time + page-number + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + fade + move + stripes + open + close + dissolve + wavyline + random + lines + laser + appear + hide + move-short + checkerboard + rotate + stretch + + + + + + + + + + + + none + from-left + from-top + from-right + from-bottom + from-center + from-upper-left + from-upper-right + from-lower-left + from-lower-right + to-left + to-top + to-right + to-bottom + to-upper-left + to-upper-right + to-lower-right + to-lower-left + path + spiral-inward-left + spiral-inward-right + spiral-outward-left + spiral-outward-right + vertical + horizontal + to-center + clockwise + counter-clockwise + + + + + + + + + + + + slow + medium + fast + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + on-click + with-previous + after-previous + timing-root + main-sequence + interactive-sequence + + + + + + + + + + + + + + + + + + + + + + + custom + entrance + exit + emphasis + motion-path + ole-action + media-call + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + previous-page + next-page + first-page + last-page + hide + stop + execute + show + verb + fade-out + sound + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + current-date + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + enabled + disabled + + + + + + + + + enabled + disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start + end + top + bottom + + + + + + start + center + end + + + + + + + top-start + bottom-start + top-end + bottom-end + + + + + + + + + + + + + wide + high + balanced + + + + + custom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + x + y + z + + + + + + + + + + + + + + + + + + + + + + + + + + + major + minor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + get + post + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + command + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + current + parent + + + + + + + + + + + + + + + + + + + records + current + page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + unchecked + checked + unknown + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + submit + reset + push + url + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + 3d + + + + + + + + + center + + + + + + start + end + top + bottom + + + + + + start + center + end + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + sql + sql-pass-through + value-list + table-fields + + + + + + + + + + + + + + + + + + + + + + + + + + + void + + + + + + + + + + + + + + float + + + + + + + + + + + + percentage + + + + + + + + + + + + currency + + + + + + + + + + + + + + + + + date + + + + + + + + + + + + time + + + + + + + + + + + + boolean + + + + + + + + + + + + string + + + + + + + + + + + void + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + i + I + + + + + + + + a + A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + into-default-style-data-style + into-english-number + keep-text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + discrete + linear + paced + spline + + + + + + + + + + + + + + + + + + + + + rgb + hsl + + + + + + + + + clockwise + counter-clockwise + + + + + + + + + + + + + + + + + + translate + scale + rotate + skewX + skewY + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + forward + reverse + + + + + + + + + forward + reverse + + + + + + + + + in + out + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + discrete + linear + paced + spline + + + + + + + + + + + + + + + + + + + + + + + none + sum + + + + + + + + + replace + sum + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + first + last + all + media + + + + + + + + + + + + + + + + + + + remove + freeze + hold + auto + default + transition + + + + + + + + + remove + freeze + hold + transition + auto + inherit + + + + + + + + + never + always + whenNotActive + default + + + + + + + + + never + always + whenNotActive + inherit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all + left + right + mirrored + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + ultra-condensed + extra-condensed + condensed + semi-condensed + semi-expanded + expanded + extra-expanded + ultra-expanded + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + language + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + medium + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + gregorian + gengou + ROC + hanja_yoil + hanja + hijri + jewish + buddhist + + + + + + + + + text + + + + + + + + + + paragraph + + + + + + + + + + + + + section + + + + + + + + + + ruby + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + rigth + inner + outer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + page + + + + + + + + + text + page + section + document + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + address + annote + author + bibliography-type + booktitle + chapter + custom1 + custom2 + custom3 + custom4 + custom5 + edition + editor + howpublished + identifier + institution + isbn + issn + journal + month + note + number + organizations + pages + publisher + report-type + school + series + title + url + volume + year + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + + + + + + + + + + table-column + + + + + + + + + + table-row + + + + + + + + + + table-cell + + + + + + + + + + + + + + + + + graphic + presentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + drawing-page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + linear + axial + radial + ellipsoid + square + rectangular + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + objectBoundingBox + + + + + + + + + + + pad + reflect + repeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + single + double + triple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + rect + round + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + chart + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + + + + + + + + + + portrait + landscape + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + headers + grid + annotations + objects + charts + drawings + formulas + zero-values + + + + + + + + + + + ttb + ltr + + + + + + + + + + continue + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + both + none + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + + + + + + + + + + + + + + + none + line + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + small-caps + + + + + + + none + lowercase + uppercase + capitalize + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + super + sub + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + roman + swiss + modern + decorative + script + system + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + variable + + + + + + + + + + + + + [A-Za-z][A-Za-z0-9._\-]* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + latin + asian + complex + ignore + + + + + + + + + + normal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + italic + oblique + + + + + + + none + embossed + engraved + + + + + + + + + + + + + + + none + + + + + + + + + + + + + + + none + single + double + + + + + + + + + + + + + none + solid + dotted + dash + long-dash + dot-dash + dot-dot-dash + wave + + + + + + + + + + + + + auto + normal + bold + thin + dash + medium + thick + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + normal + bold + 100 + 200 + 300 + 400 + 500 + 600 + 700 + 800 + 900 + + + + + + + + + + + + + continuous + skip-white-space + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + letters + lines + + + + + + + + + + + + + + + + + + + + + none + + + none + accent + dot + circle + disc + + + above + below + + + + + + + + + + + + + + + + + + + + + + + + + fixed + line-height + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + none + + + + condition + + + none + + + + + + + + + + + + + + + + + + + + + + + + normal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start + end + left + right + center + justify + + + + + + + + + start + center + justify + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + + char + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + page + + + + + + + + + no-limit + + + + + + + + + + + + + + + + + + + + + + word + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + column + page + + + + + + + auto + column + page + + + + + + + + + + + + + transparent + + + + + + + + + + + + + + + + + + + + + + + + + + no-repeat + repeat + stretch + + + + + + + + + left + center + right + top + bottom + + + + + + + + + + + + + + + + left + center + right + + + + + top + center + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + none + ideograph-alpha + + + + + + + + + simple + hanging + + + + + + + + + normal + strict + + + + + + + + + top + middle + bottom + auto + + + + + + + + + + + + + lr-tb + rl-tb + tb-rl + tb-lr + lr + rl + tb + page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + above + below + + + + + + + + + left + center + right + distribute-letter + distribute-space + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + solid + dotted + dashed + dot-dashed + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + margins + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + collapsing + separating + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + automatic + + + + + + + + + fix + value-type + + + + + + + + + + + + + ltr + ttb + + + + + + + + + auto + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-wrap + wrap + + + + + + + + + + + + + + + + + + + + none + bottom + top + center + + + + + + + + + none + hidden-and-protected + + + + protected + formula-hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + dash + solid + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 1 + + + + + + + + + + + miter + round + bevel + middle + none + inherit + + + + + + + + + none + solid + bitmap + gradient + hatch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-repeat + repeat + stretch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top-left + top + top-right + left + center + right + bottom-left + bottom + bottom-right + + + + + + + + + + + + + + + + + + + + + + + + + + + + nonzero + evenodd + + + + + + + + + + + + + + + + none + scroll + alternate + slide + + + + + + + + + left + right + up + down + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + justify + + + + + + + + + left + center + right + justify + + + + + + + + + no-wrap + wrap + + + + + + + + + + + + + + greyscale + mono + watermark + standard + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + below + above + + + + + + + + + + + + + + + + automatic + left-outside + inside + right-outside + + + + + + + automatic + above + below + center + + + + + + + + + automatic + mm + cm + m + km + pt + pc + inch + ft + mi + + + + + + + + + + + + + + + + + + + + + + + straight-line + angled-line + angled-connector-line + + + + + + + + + fixed + free + + + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + correct + attractive + + + + + + + + + + + + + + + + + + + + + + + enabled + disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + standard + double-sided + + + + + + + + + object + flat + sphere + + + + + + + + + normal + inverse + + + + + + + + + object + parallel + sphere + + + + + + + object + parallel + sphere + + + + + + + + + luminance + intesity + color + + + + + + + + + enabled + disabled + + + + + + + + + replace + modulate + blend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + + content + position + size + + + + + + + + + + + + left + center + right + from-left + inside + outside + from-inside + + + + + + + + + + + + + + page + page-content + page-start-margin + page-end-margin + frame + frame-content + frame-start-margin + frame-end-margin + paragraph + paragraph-content + paragraph-start-margin + paragraph-end-margin + char + + + + + + + + + + + + + top + middle + bottom + from-top + below + + + + + + + + + + + + + + + + + + page + page-content + frame + frame-content + paragraph + paragraph-content + char + line + baseline + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + left + right + parallel + dynamic + run-through + biggest + + + + + + + + + + + + + + + + no-limit + + + + + + + + + + + + + + + + + full + outside + + + + + + + + + foreground + background + + + + + + + + + + + + + + + + clip + auto-create-new-frame + + + + + + + + + none + vertical + + + vertical + + + + + vertical + + + + + + + + + horizontal + horizontal-on-odd + horizontal-on-even + + + + + + + + + + + + + + + iterative + once-concurrent + once-successive + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)(px) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + automatic + + + + named-symbol + + + + square + diamond + arrow-down + arrow-up + arrow-right + arrow-left + bow-tie + hourglass + circle + star + x + plus + asterisk + horizontal-bar + vertical-bar + + + + + + image + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + cubic-spline + b-spline + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cuboid + cylinder + cone + pyramid + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + side-by-side + stagger-even + stagger-odd + + + + + + + + + + + + + + + none + value + percentage + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + variance + standard-deviation + percentage + error-margin + constant + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + columns + rows + + + + + + + + + none + linear + logarithmic + exponential + power + + + + + + + + + manual + automatic + semi-automatic + + + + + + + + + none + fade-from-left + fade-from-top + fade-from-right + fade-from-bottom + fade-from-upperleft + fade-from-upperright + fade-from-lowerleft + fade-from-lowerright + move-from-left + move-from-top + move-from-right + move-from-bottom + move-from-upperleft + move-from-upperright + move-from-lowerleft + move-from-lowerright + uncover-to-left + uncover-to-top + uncover-to-right + uncover-to-bottom + uncover-to-upperleft + uncover-to-upperright + uncover-to-lowerleft + uncover-to-lowerright + fade-to-center + fade-from-center + vertical-stripes + horizontal-stripes + clockwise + counterclockwise + open-vertical + open-horizontal + close-vertical + close-horizontal + wavyline-from-left + wavyline-from-top + wavyline-from-right + wavyline-from-bottom + spiralin-left + spiralin-right + spiralout-left + spiralout-right + roll-from-top + roll-from-left + roll-from-right + roll-from-bottom + stretch-from-left + stretch-from-top + stretch-from-right + stretch-from-bottom + + vertical-lines + horizontal-lines + dissolve + random + vertical-checkerboard + horizontal-checkerboard + interlocking-horizontal-left + interlocking-horizontal-right + interlocking-vertical-top + interlocking-vertical-bottom + fly-away + open + close + melt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + forward + reverse + + + + + + + + + forward + reverse + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + full + border + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + false + + + + + + + + + + + + + + + + + [A-Za-z]{1,8}(-[A-Za-z0-9]{1,8})* + + + + + [A-Za-z0-9]{1,8} + + + + + [A-Za-z]{1,8} + + + + + 1 + + + + + -?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + + -?([0-9]+(\.[0-9]*)?|\.[0-9]+)% + + + + + [0-9]+\* + + + + + + + + + + + #[0-9a-fA-F]{6} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _self + _blank + _parent + _top + + + + + + + float + time + date + percentage + currency + boolean + string + + + + + + -?[0-9]+,-?[0-9]+([ ]+-?[0-9]+,-?[0-9]+)* + + + + + + + + + \([ ]*-?([0-9]+(\.[0-9]*)?|\.[0-9]+)([ ]+-?([0-9]+(\.[0-9]*)?|\.[0-9]+)){2}[ ]*\) + + + + + + + [0-9a-zA-Z_]+:[0-9a-zA-Z._\-]+ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-schema-v1.1.rng b/src/hwp5/odf-relaxng/OpenDocument-schema-v1.1.rng new file mode 100644 index 0000000000000000000000000000000000000000..3ba6a687c49e5b1637a01c8ac295804183538b2b --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-schema-v1.1.rng @@ -0,0 +1,17891 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + boolean + short + int + long + double + string + datetime + base64Binary + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + simple + + + + + replace + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + float + + + + + + date + + + + + + time + + + + + + boolean + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + none + + + + + condition + + + + + + + + + + + + + + + + + + + + + simple + + + + + embed + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + footnote + endnote + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + previous + current + next + + + + + + + + + + + + + + previous + next + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + number + number-and-name + plain-number-and-name + plain-number + + + + + + + + + + + + + + + + + + + full + path + name + name-and-extension + + + + + + + + + + + + + + + + + + full + path + name + name-and-extension + area + title + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:page-count + text:paragraph-count + text:word-count + text:character-count + text:table-count + text:image-count + text:object-count + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + command + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + table + text-box + image + object + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:reference-ref + text:bookmark-ref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + page + chapter + direction + text + + + + + + + + + page + chapter + direction + text + category-and-value + caption + value + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value + unit + gap + + + + + + + + + + + + + + + + + + + + + + + + + float + + + + + + + + percentage + + + + + + + + currency + + + + + + + + + + + + + date + + + + + + + + time + + + + + + + + boolean + + + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value + none + + + + + + + + + value + formula + none + + + + + + + + + value + formula + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:identifier + text:address + text:annote + text:author + text:booktitle + text:chapter + text:edition + text:editor + text:howpublished + text:institution + text:journal + text:month + text:note + text:number + text:organizations + text:pages + text:publisher + text:school + text:series + text:title + text:report-type + text:volume + text:year + text:url + text:custom1 + text:custom2 + text:custom3 + text:custom4 + text:custom5 + text:isbn + text:issn + + + + + + + + + + article + book + booklet + conference + custom1 + custom2 + custom3 + custom4 + custom5 + email + inbook + incollection + inproceedings + journal + manual + mastersthesis + misc + phdthesis + proceedings + techreport + unpublished + www + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + category-and-value + caption + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + 1 + 2 + 3 + separator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + number + number-and-name + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + address + annote + author + bibliography-type + booktitle + chapter + custom1 + custom2 + custom3 + custom4 + custom5 + edition + editor + howpublished + identifier + institution + isbn + issn + journal + month + note + number + organizations + pages + publisher + report-type + school + series + title + url + volume + year + + + + + + + + + + + + + + + + + + + + + + + + right + + + + left + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + collapse + filter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+$?[0-9]+ + + + + + + ($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+$?[0-9]+(:($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+$?[0-9]+)? + + + + + + + + + + + + + + + + + + + copy-all + copy-results-only + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trace-dependents + remove-dependents + trace-precedents + remove-precedents + trace-errors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + from-another-table + to-another-table + from-same-table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + enable + disable + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + unsorted + sort-ascending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stop + warning + information + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + row + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + + print-range + filter + repeat-row + repeat-column + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + row + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + automatic + + + + + + + + + + ascending + descending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + automatic + + + + + + + + + + ascending + descending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + self + cell-range + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + data + hidden + + + + + page + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + from-top + from-bottom + + + + + + + + + + + + + + data + + + + + + + + none + manual + name + + + + + + + + ascending + descending + + + + + + + + + + + + + tabular-layout + outline-subtotals-top + outline-subtotals-bottom + + + + + + + + + + + + + + + + + + + + + + + named + + + + + + + + previous + next + + + + + + + + none + member-difference + member-percentage + member-percentage-difference + running-total + row-percentage + column-percentage + total-percentage + index + + + + + + + + + + + + + + + + + + + + + + auto + + + + + + auto + + + + + + + + + + auto + + + + + + auto + + + + + + + + + + + + + seconds + minutes + hours + days + months + quarters + years + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + accepted + rejected + pending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + always + screen + printer + none + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + false + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + full + section + cut + arc + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + standard + lines + line + curve + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + page + frame + paragraph + char + as-char + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top-left + top + top-right + left + center + right + bottom-left + bottom-right + + + + + + + + auto + left + right + up + down + horizontal + vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scale + scale-min + + + + + + + + scale + scale-min + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + onRequest + + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + nohref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parallel + perspective + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + phong + gouraud + draft + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + non-primitive + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + phong + gouraud + draft + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parallel + perspective + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + segments + rectangle + + + + + + + + + + + + + + + + + + + + + normal + path + shape + + + + + + + + + path + shape + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + title + outline + subtitle + text + graphic + object + chart + table + orgchart + page + notes + handout + header + footer + date-time + page-number + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + fade + move + stripes + open + close + dissolve + wavyline + random + lines + laser + appear + hide + move-short + checkerboard + rotate + stretch + + + + + + + + + + + + none + from-left + from-top + from-right + from-bottom + from-center + from-upper-left + from-upper-right + from-lower-left + from-lower-right + to-left + to-top + to-right + to-bottom + to-upper-left + to-upper-right + to-lower-right + to-lower-left + path + spiral-inward-left + spiral-inward-right + spiral-outward-left + spiral-outward-right + vertical + horizontal + to-center + clockwise + counter-clockwise + + + + + + + + + + + + slow + medium + fast + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + on-click + with-previous + after-previous + timing-root + main-sequence + interactive-sequence + + + + + + + + + + + + + + + + + + + + + + + custom + entrance + exit + emphasis + motion-path + ole-action + media-call + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + previous-page + next-page + first-page + last-page + hide + stop + execute + show + verb + fade-out + sound + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + current-date + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + enabled + disabled + + + + + + + + + enabled + disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start + end + top + bottom + + + + + + start + center + end + + + + + + + top-start + bottom-start + top-end + bottom-end + + + + + + + + + + + + + wide + high + balanced + + + + + custom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + x + y + z + + + + + + + + + + + + + + + + + + + + + + + + + + + major + minor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + get + post + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + command + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + current + parent + + + + + + + + + + + + + + + + + + + records + current + page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + unchecked + checked + unknown + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + submit + reset + push + url + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + 3d + + + + + + + + + center + + + + + + start + end + top + bottom + + + + + + start + center + end + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + sql + sql-pass-through + value-list + table-fields + + + + + + + + + + + + + + + + + + + + + + + + + + + void + + + + + + + + + + + + + + float + + + + + + + + + + + + percentage + + + + + + + + + + + + currency + + + + + + + + + + + + + + + + + date + + + + + + + + + + + + time + + + + + + + + + + + + boolean + + + + + + + + + + + + string + + + + + + + + + + + void + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + i + I + + + + + + + + a + A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + into-default-style-data-style + into-english-number + keep-text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + discrete + linear + paced + spline + + + + + + + + + + + + + + + + + + + + + rgb + hsl + + + + + + + + + clockwise + counter-clockwise + + + + + + + + + + + + + + + + + + translate + scale + rotate + skewX + skewY + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + forward + reverse + + + + + + + + + forward + reverse + + + + + + + + + in + out + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + discrete + linear + paced + spline + + + + + + + + + + + + + + + + + + + + + + + none + sum + + + + + + + + + replace + sum + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + first + last + all + media + + + + + + + + + + + + + + + indefinite + + + + + + + + + remove + freeze + hold + auto + default + transition + + + + + + + + + remove + freeze + hold + transition + auto + inherit + + + + + + + + + never + always + whenNotActive + default + + + + + + + + + never + always + whenNotActive + inherit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all + left + right + mirrored + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + ultra-condensed + extra-condensed + condensed + semi-condensed + semi-expanded + expanded + extra-expanded + ultra-expanded + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + language + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + medium + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + gregorian + gengou + ROC + hanja_yoil + hanja + hijri + jewish + buddhist + + + + + + + + + text + + + + + + + + + + paragraph + + + + + + + + + + + + + section + + + + + + + + + + ruby + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + right + inner + outer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + page + + + + + + + + + text + page + section + document + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + address + annote + author + bibliography-type + booktitle + chapter + custom1 + custom2 + custom3 + custom4 + custom5 + edition + editor + howpublished + identifier + institution + isbn + issn + journal + month + note + number + organizations + pages + publisher + report-type + school + series + title + url + volume + year + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + + + + + + + + + + table-column + + + + + + + + + + table-row + + + + + + + + + + table-cell + + + + + + + + + + + + + + + + + graphic + presentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + drawing-page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + linear + axial + radial + ellipsoid + square + rectangular + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + objectBoundingBox + + + + + + + + + + + pad + reflect + repeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + single + double + triple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + rect + round + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + chart + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + + + + + + + + + + portrait + landscape + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + headers + grid + annotations + objects + charts + drawings + formulas + zero-values + + + + + + + + + + + ttb + ltr + + + + + + + + + + continue + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + both + none + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + + + + + + + + + + + + + + + none + line + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + small-caps + + + + + + + none + lowercase + uppercase + capitalize + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + super + sub + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + roman + swiss + modern + decorative + script + system + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + variable + + + + + + + + + + + + + + + + + + + + + + + [A-Za-z][A-Za-z0-9._\-]* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + latin + asian + complex + ignore + + + + + + + + + + normal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + italic + oblique + + + + + + + none + embossed + engraved + + + + + + + + + + + + + + + none + + + + + + + + + + + + + + + none + single + double + + + + + + + + + + + + + none + solid + dotted + dash + long-dash + dot-dash + dot-dot-dash + wave + + + + + + + + + + + + + auto + normal + bold + thin + dash + medium + thick + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + normal + bold + 100 + 200 + 300 + 400 + 500 + 600 + 700 + 800 + 900 + + + + + + + + + + + + + continuous + skip-white-space + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + letters + lines + + + + + + + + + + + + + + + + + + + + + none + + + none + accent + dot + circle + disc + + + above + below + + + + + + + + + + + + + + + + + + + + + + + + + fixed + line-height + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + none + + + + condition + + + none + + + + + + + + + + + + + + + + + + + + + + + + normal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start + end + left + right + center + justify + + + + + + + + + start + center + justify + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + + char + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + page + + + + + + + + + no-limit + + + + + + + + + + + + + + + + + + + + + + word + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + column + page + + + + + + + auto + column + page + + + + + + + + + + + + + transparent + + + + + + + + + + + + + + + + + + + + + + + + + + no-repeat + repeat + stretch + + + + + + + + + left + center + right + top + bottom + + + + + + + + + + + + + + + + left + center + right + + + + + top + center + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + none + ideograph-alpha + + + + + + + + + simple + hanging + + + + + + + + + normal + strict + + + + + + + + + top + middle + bottom + auto + baseline + + + + + + + + + + + + + lr-tb + rl-tb + tb-rl + tb-lr + lr + rl + tb + page + + + + + + + + + + + + + + + + + + + + + + + + + + auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + above + below + + + + + + + + + left + center + right + distribute-letter + distribute-space + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + solid + dotted + dashed + dot-dashed + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + margins + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + collapsing + separating + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + automatic + + + + + + + + + fix + value-type + + + + + + + + + + + + + ltr + ttb + + + + + + + + + auto + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-wrap + wrap + + + + + + + + + + + + + + + + + + + + none + bottom + top + center + + + + + + + + + none + hidden-and-protected + + + + protected + formula-hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + dash + solid + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 1 + + + + + + + + + + + miter + round + bevel + middle + none + inherit + + + + + + + + + none + solid + bitmap + gradient + hatch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-repeat + repeat + stretch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top-left + top + top-right + left + center + right + bottom-left + bottom + bottom-right + + + + + + + + + + + + + + + + + + + + + + + + + + + + nonzero + evenodd + + + + + + + + + + + + + + + + none + scroll + alternate + slide + + + + + + + + + left + right + up + down + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + justify + + + + + + + + + left + center + right + justify + + + + + + + + + no-wrap + wrap + + + + + + + + + + + + + + greyscale + mono + watermark + standard + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + below + above + + + + + + + + + + + + + + + + automatic + left-outside + inside + right-outside + + + + + + + automatic + above + below + center + + + + + + + + + automatic + mm + cm + m + km + pt + pc + inch + ft + mi + + + + + + + + + + + + + + + + + + + + + + + straight-line + angled-line + angled-connector-line + + + + + + + + + fixed + free + + + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + correct + attractive + + + + + + + + + + + + + + + + + + + + + + + enabled + disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + standard + double-sided + + + + + + + + + object + flat + sphere + + + + + + + + + normal + inverse + + + + + + + + + object + parallel + sphere + + + + + + + object + parallel + sphere + + + + + + + + + luminance + intensity + color + + + + + + + + + enabled + disabled + + + + + + + + + replace + modulate + blend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + + content + position + size + + + + + + + + + + + + left + center + right + from-left + inside + outside + from-inside + + + + + + + + + + + + + + page + page-content + page-start-margin + page-end-margin + frame + frame-content + frame-start-margin + frame-end-margin + paragraph + paragraph-content + paragraph-start-margin + paragraph-end-margin + char + + + + + + + + + + + + + top + middle + bottom + from-top + below + + + + + + + + + + + + + + + + + + page + page-content + frame + frame-content + paragraph + paragraph-content + char + line + baseline + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + left + right + parallel + dynamic + run-through + biggest + + + + + + + + + + + + + + + + no-limit + + + + + + + + + + + + + + + + + full + outside + + + + + + + + + foreground + background + + + + + + + + + + + + + + + + clip + auto-create-new-frame + + + + + + + + + none + vertical + + + vertical + + + + + vertical + + + + + + + + + horizontal + horizontal-on-odd + horizontal-on-even + + + + + + + + + + + + + + + iterative + once-concurrent + once-successive + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)(px) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + automatic + + + + named-symbol + + + + square + diamond + arrow-down + arrow-up + arrow-right + arrow-left + bow-tie + hourglass + circle + star + x + plus + asterisk + horizontal-bar + vertical-bar + + + + + + image + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + cubic-spline + b-spline + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cuboid + cylinder + cone + pyramid + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + side-by-side + stagger-even + stagger-odd + + + + + + + + + + + + + + + none + value + percentage + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + variance + standard-deviation + percentage + error-margin + constant + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + columns + rows + + + + + + + + + none + linear + logarithmic + exponential + power + + + + + + + + + manual + automatic + semi-automatic + + + + + + + + + none + fade-from-left + fade-from-top + fade-from-right + fade-from-bottom + fade-from-upperleft + fade-from-upperright + fade-from-lowerleft + fade-from-lowerright + move-from-left + move-from-top + move-from-right + move-from-bottom + move-from-upperleft + move-from-upperright + move-from-lowerleft + move-from-lowerright + uncover-to-left + uncover-to-top + uncover-to-right + uncover-to-bottom + uncover-to-upperleft + uncover-to-upperright + uncover-to-lowerleft + uncover-to-lowerright + fade-to-center + fade-from-center + vertical-stripes + horizontal-stripes + clockwise + counterclockwise + open-vertical + open-horizontal + close-vertical + close-horizontal + wavyline-from-left + wavyline-from-top + wavyline-from-right + wavyline-from-bottom + spiralin-left + spiralin-right + spiralout-left + spiralout-right + roll-from-top + roll-from-left + roll-from-right + roll-from-bottom + stretch-from-left + stretch-from-top + stretch-from-right + stretch-from-bottom + + vertical-lines + horizontal-lines + dissolve + random + vertical-checkerboard + horizontal-checkerboard + interlocking-horizontal-left + interlocking-horizontal-right + interlocking-vertical-top + interlocking-vertical-bottom + fly-away + open + close + melt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + forward + reverse + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + full + border + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + false + + + + + + + + + + + + + + + + + + + + [A-Za-z0-9]{1,8} + + + + + [A-Za-z]{1,8} + + + + + 1 + + + + + -?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + + ([0-9]*[1-9][0-9]*(\.[0-9]*)?|0+\.[0-9]*[1-9][0-9]*|\.[0-9]*[1-9][0-9]*)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + + -?([0-9]+(\.[0-9]*)?|\.[0-9]+)% + + + + + [0-9]+\* + + + + + + + + + + + #[0-9a-fA-F]{6} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _self + _blank + _parent + _top + + + + + + + float + time + date + percentage + currency + boolean + string + + + + + + -?[0-9]+,-?[0-9]+([ ]+-?[0-9]+,-?[0-9]+)* + + + + + + + + + \([ ]*-?([0-9]+(\.[0-9]*)?|\.[0-9]+)([ ]+-?([0-9]+(\.[0-9]*)?|\.[0-9]+)){2}[ ]*\) + + + + + + + [0-9a-zA-Z_]+:[0-9a-zA-Z._\-]+ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-strict-schema-v1.1.rng b/src/hwp5/odf-relaxng/OpenDocument-strict-schema-v1.1.rng new file mode 100644 index 0000000000000000000000000000000000000000..e77fe4ba6efdffdda1d0b1c508567ddec1571c19 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-strict-schema-v1.1.rng @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-dsig-schema.rng b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-dsig-schema.rng new file mode 100644 index 0000000000000000000000000000000000000000..baab69981e8d20aff0cdce45c5e486f503089fe9 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-dsig-schema.rng @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + 1.2 + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-manifest-schema.rng b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-manifest-schema.rng new file mode 100644 index 0000000000000000000000000000000000000000..af13a26c71ad6826f9a5a6ea4a40b619cd9ae4a7 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-manifest-schema.rng @@ -0,0 +1,224 @@ + + + + + + + + + + + + + + + + + + + 1.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + edit + presentation-slide-show + read-only + + + + + + + + + + + + + + + + + + + + + + + + + + + SHA1/1K + + + + + + + + + + + + + + + + + + + Blowfish CFB + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PBKDF2 + + + + + + + + + + + + + + + + + + + + + + + + + + + SHA1 + + + + + + + + + + + + + + + + [^:]+:[^:]+ + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-metadata.owl b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-metadata.owl new file mode 100644 index 0000000000000000000000000000000000000000..a1eff11e4f1879487599ede3e8eca20fbc3a598d --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-metadata.owl @@ -0,0 +1,86 @@ + + + + + + + + + Open Document Schema Metadata Manifest Ontology + + + + + + The unique content.xml from the root path of the document + + + + + + The unique styles.xml from the root path of the document + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-package-metadata.owl b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-package-metadata.owl new file mode 100644 index 0000000000000000000000000000000000000000..24169558d0f4931fc5d7fa0444b31729acfcc454 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-package-metadata.owl @@ -0,0 +1,81 @@ + + + + + + + + Open Document Package Metadata Manifest Ontology + + + + + + + + + + + + + + + + Related to dcterms:hasPart of the Dublin Core Metadata Initiative + + + + + Used for any metadata file in the document + + + + + + + + + A string representing the MIME media type of a file (see RFC4288). + + + + + + + diff --git a/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-schema.rng b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-schema.rng new file mode 100644 index 0000000000000000000000000000000000000000..538c335b3bf69e3b24702357cbb67ff6821d4449 --- /dev/null +++ b/src/hwp5/odf-relaxng/OpenDocument-v1.2-os-schema.rng @@ -0,0 +1,18127 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + boolean + short + int + long + double + string + datetime + base64Binary + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + + + + + + + simple + + + + + + + replace + + + + + onLoad + + + + + + + + + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + float + + + + + + date + + + + + + time + + + + + + boolean + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + none + + + + + condition + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:page-count + text:paragraph-count + text:word-count + text:character-count + text:table-count + text:image-count + text:object-count + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:reference-ref + text:bookmark-ref + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value + unit + gap + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text:identifier + text:address + text:annote + text:author + text:booktitle + text:chapter + text:edition + text:editor + text:howpublished + text:institution + text:journal + text:month + text:note + text:number + text:organizations + text:pages + text:publisher + text:school + text:series + text:title + text:report-type + text:volume + text:year + text:url + text:custom1 + text:custom2 + text:custom3 + text:custom4 + text:custom5 + text:isbn + text:issn + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + footnote + endnote + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + previous + current + next + + + + + + + + + + previous + next + + + + + + + + + + + + + + name + number + number-and-name + plain-number-and-name + plain-number + + + + + + + + + + + + + full + path + name + name-and-extension + + + + + + + + + + + full + path + name + name-and-extension + area + title + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + command + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + table + text-box + image + object + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + number-no-superior + number-all-superior + number + + + + + + + + + + + + + + + + + + + + + + category-and-value + caption + value + + + + + + + page + chapter + direction + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + float + + + + + + + + percentage + + + + + + + + currency + + + + + + + + + + + + + date + + + + + + + + time + + + + + + + + boolean + + + + + + + + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + value + none + + + + + + + + + value + formula + none + + + + + + + + + value + formula + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + article + book + booklet + conference + custom1 + custom2 + custom3 + custom4 + custom5 + email + inbook + incollection + inproceedings + journal + manual + mastersthesis + misc + phdthesis + proceedings + techreport + unpublished + www + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + category-and-value + caption + + + + + + + + + + document + chapter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + + + + 1 + 2 + 3 + separator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + number + number-and-name + plain-number + plain-number-and-name + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + address + annote + author + bibliography-type + booktitle + chapter + custom1 + custom2 + custom3 + custom4 + custom5 + edition + editor + howpublished + identifier + institution + isbn + issn + journal + month + note + number + organizations + pages + publisher + report-type + school + series + title + url + volume + year + + + + + + + + + + + + + + + + + + + + + + + + right + + + + left + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + collapse + filter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+$?[0-9]+ + + + + + + ($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+$?[0-9]+(:($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+$?[0-9]+)? + + + ($?([^\. ']+|'([^']|'')+'))?\.$?[0-9]+:($?([^\. ']+|'([^']|'')+'))?\.$?[0-9]+ + + + ($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+:($?([^\. ']+|'([^']|'')+'))?\.$?[A-Z]+ + + + + + + Value is a space separated list of "cellRangeAddress" patterns + + + + + + + + + + + + + + copy-all + copy-results-only + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + trace-dependents + remove-dependents + trace-precedents + remove-precedents + trace-errors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + from-another-table + to-another-table + from-same-table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + date + + + + + + + + + + + + + + + + enable + disable + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + unsorted + sort-ascending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + stop + warning + information + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + row + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + + print-range + filter + repeat-row + repeat-column + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + row + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + alpha-numeric + integer + double + + + + + + + + + + + + + + + + + + + + text + number + automatic + + + + + + + + ascending + descending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + automatic + + + + + + + + ascending + descending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + self + cell-range + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text + number + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + data + hidden + + + + + page + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + from-top + from-bottom + + + + + + + + + + + + + + + + data + + + + + + + + none + manual + name + + + + + + ascending + descending + + + + + + + + + + + + + + + tabular-layout + outline-subtotals-top + outline-subtotals-bottom + + + + + + + + + + + + + + + + + + + + + named + + + + + + + + previous + next + + + + + + none + member-difference + member-percentage + member-percentage-difference + running-total + row-percentage + column-percentage + total-percentage + index + + + + + + + + + + + + + + + + + + + + + + auto + + + + + + auto + + + + + + + + auto + + + + + + auto + + + + + + + + + seconds + minutes + hours + days + months + quarters + years + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + average + count + countnums + max + min + product + stdev + stdevp + sum + var + varp + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + accepted + rejected + pending + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + always + screen + printer + none + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + false + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + full + section + cut + arc + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + standard + lines + line + curve + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + page + frame + paragraph + char + as-char + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top-left + top + top-right + left + center + right + bottom-left + bottom-right + + + + + + auto + left + right + up + down + horizontal + vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scale + scale-min + + + + + + + + scale + scale-min + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + new + replace + + + + + + + + + + + + nohref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parallel + perspective + + + + + + + + + + + + + + + + + + + + + + flat + phong + gouraud + draft + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + phong + gouraud + draft + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parallel + perspective + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + segments + rectangle + + + + + + + + + + + + + + + + + normal + path + shape + + + + + + + path + shape + + + + + + + + + + + + + + + + + + non-primitive + + + + + + \([ ]*-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc))([ ]+-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc))){2}[ ]*\) + + + + + -0.5 + 0.5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + title + outline + subtitle + text + graphic + object + chart + table + orgchart + page + notes + handout + header + footer + date-time + page-number + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + new + replace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + fade + move + stripes + open + close + dissolve + wavyline + random + lines + laser + appear + hide + move-short + checkerboard + rotate + stretch + + + + + none + from-left + from-top + from-right + from-bottom + from-center + from-upper-left + from-upper-right + from-lower-left + from-lower-right + to-left + to-top + to-right + to-bottom + to-upper-left + to-upper-right + to-lower-right + to-lower-left + path + spiral-inward-left + spiral-inward-right + spiral-outward-left + spiral-outward-right + vertical + horizontal + to-center + clockwise + counter-clockwise + + + + + slow + medium + fast + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + on-click + with-previous + after-previous + timing-root + main-sequence + interactive-sequence + + + + + + + + + + + + + + + + + custom + entrance + exit + emphasis + motion-path + ole-action + media-call + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + previous-page + next-page + first-page + last-page + hide + stop + execute + show + verb + fade-out + sound + last-visited-page + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + fixed + current-date + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + enabled + disabled + + + + + + + enabled + disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + start + end + top + bottom + + + + + + start + center + end + + + + + + + top-start + bottom-start + top-end + bottom-end + + + + + + + + + wide + high + balanced + + + + + custom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + row + column + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + x + y + z + + + + + + + + + + + + + + + + + + + + + + major + minor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + none + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + equal-integer + is-boolean + equal-boolean + equal-use-only-zero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + boolean + short + int + long + double + string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + none + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-nulls + nullable + + + + + + + + + + + + + + + + + + + + bit + boolean + tinyint + smallint + integer + bigint + float + real + double + numeric + decimal + char + varchar + longvarchar + date + time + timestmp + binary + varbinary + longvarbinary + sqlnull + other + object + distinct + struct + array + blob + clob + ref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + primary + unique + foreign + + + + + + + + + + + + + cascade + restrict + set-null + no-action + set-default + + + + + + + + cascade + restrict + set-null + no-action + set-default + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + + get + post + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + command + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + current + parent + + + + + records + current + page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + selection + selection-indices + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + unchecked + checked + unknown + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + horizontal + vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + submit + reset + push + url + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + flat + 3d + + + + + + + + + center + + + + + + start + end + top + bottom + + + + + + start + center + end + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + table + query + sql + sql-pass-through + value-list + table-fields + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + void + + + + + + + + float + + + + + + + + + + + + percentage + + + + + + + + + + + + currency + + + + + + + + + + + + + + + + + date + + + + + + + + + + + + time + + + + + + + + + + + + boolean + + + + + + + + + + + + string + + + + + + + + + + + void + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + i + I + + + + + + + + a + A + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + + + To avoid inclusion of the complete MathML schema, anything is allowed within a math:math top-level element + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + into-default-style-data-style + into-english-number + keep-text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + discrete + linear + paced + spline + + + + + + + + + + + rgb + hsl + + + + + + + clockwise + counter-clockwise + + + + + + + + + translate + scale + rotate + skewX + skewY + + + + + + + + + + + + + + + + + forward + reverse + + + + + + + + + + + + in + out + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + discrete + linear + paced + spline + + + + + + + + + + + + + + + + + + + + + + + + none + sum + + + + + + + replace + sum + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + first + last + all + media + + + + + + + + + + + + + + + + indefinite + + + + + + + 0.0 + + + + + + + remove + freeze + hold + auto + default + transition + + + + + + + + + remove + freeze + hold + transition + auto + inherit + + + + + + + + + never + always + whenNotActive + default + + + + + + + + + never + always + whenNotActive + inherit + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + all + left + right + mirrored + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + row + column + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normal + ultra-condensed + extra-condensed + condensed + semi-condensed + semi-expanded + expanded + extra-expanded + ultra-expanded + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + onRequest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + short + long + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + short + medium + long + + + + + + + + + + + + + + + + + fixed + language + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + gregorian + gengou + ROC + hanja_yoil + hanja + hijri + jewish + buddhist + + + + + + + + + + text + + + + + + + + paragraph + + + + + + + + + + + section + + + + + + + + ruby + + + + + + + + table + + + + + + + + table-column + + + + + + + + table-row + + + + + + + + table-cell + + + + + + + + + + + + + + + graphic + presentation + + + + + + + + + + + + + + + drawing-page + + + + + + + + chart + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + right + inner + outer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + document + chapter + page + + + + + + + text + page + section + document + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + address + annote + author + bibliography-type + booktitle + chapter + custom1 + custom2 + custom3 + custom4 + custom5 + edition + editor + howpublished + identifier + institution + isbn + issn + journal + month + note + number + organizations + pages + publisher + report-type + school + series + title + url + volume + year + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + linear + axial + radial + ellipsoid + square + rectangular + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + objectBoundingBox + + + + + + + + + + + pad + reflect + repeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + single + double + triple + + + + + + + + + + + + + + + + + + + + + + + + simple + + + + + + + embed + + + + + onLoad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + rect + round + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default + + + + + + + + portrait + landscape + + + + + + + + + + + + + + + + + + + + + + headers + grid + annotations + objects + charts + drawings + formulas + zero-values + + + + + + + + + ttb + ltr + + + + + + + + continue + + + + + + + + + + + + + + + + + horizontal + vertical + both + none + + + + + + + + + + + + + none + line + both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + lowercase + uppercase + capitalize + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + super + sub + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + latin + asian + complex + ignore + + + + + + + + normal + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + embossed + engraved + + + + + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + letters + lines + + + + + + + + + + + + + + + + + none + + + none + accent + dot + circle + disc + + + above + below + + + + + + + + + + + + + + + + + + + fixed + line-height + + + + + + + + + + + + + + + + + + + + + true + + + none + + + + condition + + + none + + + + + + + + + normal + small-caps + + + + + roman + swiss + modern + decorative + script + system + + + + + fixed + variable + + + + + [A-Za-z][A-Za-z0-9._\-]* + + + + + normal + italic + oblique + + + + + none + + + + + + none + single + double + + + + + none + solid + dotted + dash + long-dash + dot-dash + dot-dot-dash + wave + + + + + auto + normal + bold + thin + medium + thick + + + + + + + + normal + bold + 100 + 200 + 300 + 400 + 500 + 600 + 700 + 800 + 900 + + + + + continuous + skip-white-space + + + + + + + + + + + + + + + + + normal + + + + + + + + + + + + + + + + + + + + + + + + + start + center + justify + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + auto + page + + + + + + + no-limit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + ideograph-alpha + + + + + + + simple + hanging + + + + + + + normal + strict + + + + + + + top + middle + bottom + auto + baseline + + + + + + + + + + + + + + + + + + + + + + + start + end + left + right + center + justify + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + + char + + + + + + + + + + + + + + + + + + + + + + + font-color + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + word + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + column + page + + + + + + + auto + column + page + + + + + + + + + transparent + + + + + + + + + + + + + + + + + + + + + + + no-repeat + repeat + stretch + + + + + + + left + center + right + top + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + + + + + top + center + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + always + + + + + + + + + lr-tb + rl-tb + tb-rl + tb-lr + lr + rl + tb + page + + + + + + + + + + auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + above + below + + + + + + + left + center + right + distribute-letter + distribute-space + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + solid + dotted + dashed + dot-dashed + + + + + + + + + + + + + + + top + middle + bottom + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + left + center + right + margins + + + + + + + + + + + + + + + + + + + + collapsing + separating + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + auto + always + + + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + automatic + + + + + + + fix + value-type + + + + + + + + auto + 0 + 0deg + 0rad + 0grad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-wrap + wrap + + + + + + + + none + bottom + top + center + + + + + + + none + hidden-and-protected + + + + protected + formula-hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ltr + ttb + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + label-width-and-position + label-alignment + + + + + + + + + + + + + + + + + + + + + listtab + space + nothing + + + + + + + + + + + + + + + + + + + + + + + + + none + dash + solid + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 1 + + + + + + + + + miter + round + bevel + middle + none + + + + + + + butt + square + round + + + + + + + + + + + + none + scroll + alternate + slide + + + + + + + left + right + up + down + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top + middle + bottom + justify + + + + + + + left + center + right + justify + + + + + + + no-wrap + wrap + + + + + + + + + + + + greyscale + mono + watermark + standard + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + below + above + + + + + + + + + + + + automatic + left-outside + inside + right-outside + + + + + + + automatic + above + below + center + + + + + + + automatic + mm + cm + m + km + pt + pc + inch + ft + mi + + + + + + + + + + + + + + + + + straight-line + angled-line + angled-connector-line + + + + + + + fixed + free + + + + + + + + + + + + + + + + + horizontal + vertical + auto + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + correct + attractive + + + + + + + + + + + + + + + + + enabled + disabled + + + + + + + + + + + + + + + + + + + + + + standard + double-sided + + + + + + + object + flat + sphere + + + + + + + normal + inverse + + + + + + + object + parallel + sphere + + + + + + + object + parallel + sphere + + + + + + + luminance + intensity + color + + + + + + + enabled + disabled + + + + + + + replace + modulate + blend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + + content + position + size + + + + + + + + + + left + center + right + from-left + inside + outside + from-inside + + + + + + + + + + + + page + page-content + page-start-margin + page-end-margin + frame + frame-content + frame-start-margin + frame-end-margin + paragraph + paragraph-content + paragraph-start-margin + paragraph-end-margin + char + + + + + + + + + + + + + + + + + none + left + right + parallel + dynamic + run-through + biggest + + + + + + + + + + + + no-limit + + + + + + + + + + + + + full + outside + + + + + + + foreground + background + + + + + + + + + + + + clip + auto-create-new-frame + + + + + + + none + vertical + + + vertical + + + + + vertical + + + + + + + + auto + + + + + + + + iterative + once-concurrent + once-successive + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + content + thumbnail + icon + print-view + + + + + + + + + + + + + + + + none + solid + bitmap + gradient + hatch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + no-repeat + repeat + stretch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + top-left + top + top-right + left + center + right + bottom-left + bottom + bottom-right + + + + + + + + + horizontal + vertical + + + + + + + + + + + + + + + + + + nonzero + evenodd + + + + + + + + + + + + + + + + + + + top + middle + bottom + from-top + below + + + + + + + + + + + + + + page + page-content + frame + frame-content + paragraph + paragraph-content + char + line + baseline + text + + + + + + + + + + + + + + horizontal + horizontal-on-odd + horizontal-on-even + + + + + rect\([ ]*((-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)))|(auto))([ ]*,[ ]*((-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc))))|(auto)){3}[ ]*\) + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)(px) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + + + automatic + + + + named-symbol + + + + square + diamond + arrow-down + arrow-up + arrow-right + arrow-left + bow-tie + hourglass + circle + star + x + plus + asterisk + horizontal-bar + vertical-bar + + + + + + image + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + cubic-spline + b-spline + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cuboid + cylinder + cone + pyramid + + + + + + + + + + + + + + + + + use-zero + leave-gap + ignore + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + side-by-side + stagger-even + stagger-odd + + + + + + + + + none + value + percentage + value-and-percentage + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + none + variance + standard-deviation + percentage + error-margin + constant + standard-error + cell-range + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + columns + rows + + + + + + + none + linear + logarithmic + exponential + power + + + + + + + start + end + + + + + + + + near-axis + near-axis-other-side + outside-start + outside-end + + + + + + + at-labels + at-axis + at-labels-and-axis + + + + + + + + + + + + + avoid-overlap + center + top + top-right + right + bottom-right + bottom + bottom-left + left + top-left + inside + outside + near-origin + + + + + + + + manual + automatic + semi-automatic + + + + + + + none + fade-from-left + fade-from-top + fade-from-right + fade-from-bottom + fade-from-upperleft + fade-from-upperright + fade-from-lowerleft + fade-from-lowerright + move-from-left + move-from-top + move-from-right + move-from-bottom + move-from-upperleft + move-from-upperright + move-from-lowerleft + move-from-lowerright + uncover-to-left + uncover-to-top + uncover-to-right + uncover-to-bottom + uncover-to-upperleft + uncover-to-upperright + uncover-to-lowerleft + uncover-to-lowerright + fade-to-center + fade-from-center + vertical-stripes + horizontal-stripes + clockwise + counterclockwise + open-vertical + open-horizontal + close-vertical + close-horizontal + wavyline-from-left + wavyline-from-top + wavyline-from-right + wavyline-from-bottom + spiralin-left + spiralin-right + spiralout-left + spiralout-right + roll-from-top + roll-from-left + roll-from-right + roll-from-bottom + stretch-from-left + stretch-from-top + stretch-from-right + stretch-from-bottom + vertical-lines + horizontal-lines + dissolve + random + vertical-checkerboard + horizontal-checkerboard + interlocking-horizontal-left + interlocking-horizontal-right + interlocking-vertical-top + interlocking-vertical-bottom + fly-away + open + close + melt + + + + + + + + + + + + + + + + + + + + + + forward + reverse + + + + + + + + + + + + + + + + + visible + hidden + + + + + + + full + border + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + false + + + + + + + + + + + + + + + + + + + + [A-Za-z0-9]{1,8} + + + + + [A-Za-z]{1,8} + + + + + [A-Za-z0-9]{1,8} + + + + + 1 + + + + + -?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + ([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + ([0-9]*[1-9][0-9]*(\.[0-9]*)?|0+\.[0-9]*[1-9][0-9]*|\.[0-9]*[1-9][0-9]*)((cm)|(mm)|(in)|(pt)|(pc)|(px)) + + + + + -?([0-9]+(\.[0-9]*)?|\.[0-9]+)% + + + + + ([0-9]?[0-9](\.[0-9]*)?|100(\.0*)?|\.[0-9]+)% + + + + + -?([0-9]?[0-9](\.[0-9]*)?|100(\.0*)?|\.[0-9]+)% + + + + + [0-9]+\* + + + + + + + + + + + #[0-9a-fA-F]{6} + + + + + + + + (([\i-[:]][\c-[:]]*)?:)?.+ + 1 + + + + + + + + + + + + \[(([\i-[:]][\c-[:]]*)?:)?.+\] + 3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _self + _blank + _parent + _top + + + + + + float + time + date + percentage + currency + boolean + string + + + + + -?[0-9]+,-?[0-9]+([ ]+-?[0-9]+,-?[0-9]+)* + + + + + + + + \([ ]*-?([0-9]+(\.[0-9]*)?|\.[0-9]+)([ ]+-?([0-9]+(\.[0-9]*)?|\.[0-9]+)){2}[ ]*\) + + + + + [^:]+:[^:]+ + + + + + An IRI-reference as defined in [RFC3987]. See ODF 1.2 Part 1 section 18.3. + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/plat/__init__.py b/src/hwp5/plat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a23c29956b9c417c73f4a761011eeafe2c3bb0c1 --- /dev/null +++ b/src/hwp5/plat/__init__.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from binascii import b2a_hex +from functools import partial +from subprocess import CalledProcessError +from subprocess import Popen +import logging +import os +import subprocess +import tempfile + +from . import _lxml +from . import _uno +from . import gir_gsf +from . import javax_transform +from . import jython_poifs +from . import olefileio +from . import xmllint +from . import xsltproc + + +logger = logging.getLogger(__name__) + + +def get_xslt(): + if javax_transform.is_enabled(): + return javax_transform.xslt + if _lxml.is_enabled(): + return _lxml.xslt + if xsltproc.is_enabled(): + return xsltproc.xslt + if _uno.is_enabled(): + return _uno.xslt + + +def get_xslt_compile(): + modules = [ + javax_transform, + _lxml, + xsltproc, + _uno + ] + for module in modules: + if module.is_enabled(): + xslt_compile = getattr(module, 'xslt_compile', None) + if xslt_compile: + return xslt_compile + xslt = getattr(module, 'xslt', None) + if xslt: + def xslt_compile(xsl_path): + return partial(xslt, xsl_path) + + +def get_relaxng(): + if _lxml.is_enabled(): + return _lxml.relaxng + if xmllint.is_enabled(): + return xmllint.relaxng + + +def get_relaxng_compile(): + modules = [ + _lxml, + xmllint, + ] + for module in modules: + if module.is_enabled(): + relaxng_compile = getattr(module, 'relaxng_compile', None) + if relaxng_compile: + return relaxng_compile + relaxng = getattr(module, 'relaxng', None) + if relaxng: + def relaxng_compile(rng_path): + return partial(relaxng, rng_path) + + +def get_olestorage_class(): + if jython_poifs.is_enabled(): + return jython_poifs.OleStorage + if olefileio.is_enabled(): + return olefileio.OleStorage + if _uno.is_enabled(): + return _uno.OleStorage + if gir_gsf.is_enabled(): + return gir_gsf.OleStorage + + +def get_aes128ecb_decrypt(): + try: + return get_aes128ecb_decrypt_cryptography() + except Exception: + pass + + try: + return get_aes128ecb_decrypt_javax() + except Exception: + pass + + try: + return get_aes128ecb_decrypt_openssl() + except Exception: + pass + + raise NotImplementedError('aes128ecb_decrypt') + + +def get_aes128ecb_decrypt_cryptography(): + from cryptography.hazmat.primitives.ciphers import Cipher + from cryptography.hazmat.primitives.ciphers import algorithms + from cryptography.hazmat.primitives.ciphers import modes + from cryptography.hazmat.backends import default_backend + + def decrypt(key, ciphertext): + backend = default_backend() + cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=backend) + decryptor = cipher.decryptor() + return decryptor.update(ciphertext) + decryptor.finalize() + + return decrypt + + +def get_aes128ecb_decrypt_javax(): + from javax.crypto import Cipher + from javax.crypto.spec import SecretKeySpec + + def decrypt(key, ciphertext): + secretkey = SecretKeySpec(key, 'AES') + cipher = Cipher.getInstance('AES/ECB/NoPadding') + cipher.init(Cipher.DECRYPT_MODE, secretkey) + decrypted = cipher.doFinal(ciphertext) + return decrypted.tostring() + + return decrypt + + +def get_aes128ecb_decrypt_openssl(): + if not openssl_reachable(): + raise NotImplementedError() + + def decrypt(key, ciphertext): + fd, name = tempfile.mkstemp() + fp = os.fdopen(fd, 'wb') + try: + fp.write(ciphertext) + finally: + fp.close() + + args = [ + 'openssl', + 'enc', + '-d', + '-in', + name, + '-aes-128-ecb', + '-K', + b2a_hex(key), + '-nopad', + ] + try: + p = Popen(args, stdout=subprocess.PIPE) + try: + return p.stdout.read() + finally: + p.wait() + p.stdout.close() + finally: + os.unlink(name) + + return decrypt + + +def openssl_reachable(): + args = ['openssl', 'version'] + try: + subprocess.check_output(args) + except OSError: + return False + except CalledProcessError: + return False + except Exception as e: + logger.exception(e) + return False + else: + return True diff --git a/src/hwp5/plat/_lxml.py b/src/hwp5/plat/_lxml.py new file mode 100644 index 0000000000000000000000000000000000000000..8ef12dc78e9fd2e148b5ff565e02a6ef815a38bd --- /dev/null +++ b/src/hwp5/plat/_lxml.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from contextlib import contextmanager +import io +import logging +import os.path +import shutil +import sys +import tempfile + +from ..errors import ValidationFailed + + +PY3 = sys.version_info.major == 3 + + +logger = logging.getLogger(__name__) + + +def is_enabled(): + try: + from lxml import etree # noqa + except ImportError: + return False + else: + return True + + +def xslt(xsl_path, inp_path, out_path): + ''' Transform XML with XSL + + :param xsl_path: stylesheet path + :param inp_path: input path + :param out_path: output path + ''' + transform = xslt_compile(xsl_path) + with io.open(out_path, 'wb') as f: + return transform(inp_path, f) + + +def xslt_compile(xsl_path, **params): + xslt = XSLT(xsl_path, **params) + return xslt.transform_into_stream + + +class XSLT: + + def __init__(self, xsl_path, **params): + ''' Compile XSL Transform function. + :param xsl_path: stylesheet path + :returns: a transform function + ''' + from lxml import etree + + with io.open(xsl_path, 'rb') as xsl_file: + xsl_doc = etree.parse(xsl_file) + + self.xsl_path = xsl_path + self.etree_xslt = etree.XSLT(xsl_doc) + self.params = dict((name, etree.XSLT.strparam(value)) + for name, value in params.items()) + + def transform(self, input, output): + ''' + >>> T.transform('input.xml', 'output.xml') + ''' + with io.open(input, 'rb') as inp_file: + with io.open(output, 'wb') as out_file: + return self._transform(inp_file, out_file) + + def transform_into_stream(self, input, output): + ''' + >>> T.transform_into_stream('input.xml', sys.stdout) + ''' + with io.open(input, 'rb') as inp_file: + return self._transform(inp_file, output) + + def _transform(self, input, output): + # input, output: binary stream + + from lxml import etree + source = etree.parse(input) + logger.info('_lxml.xslt(%s) start', + os.path.basename(self.xsl_path)) + result = self.etree_xslt(source, **self.params) + logger.info('_lxml.xslt(%s) end', + os.path.basename(self.xsl_path)) + # https://lxml.de/1.3/FAQ.html#what-is-the-difference-between-str-xslt-doc-and-xslt-doc-write + result = bytes(result) + output.write(result) + return dict() + + +def relaxng(rng_path, inp_path): + relaxng = RelaxNG(rng_path) + return relaxng.validate(inp_path) + + +def relaxng_compile(rng_path): + ''' Compile RelaxNG file + + :param rng_path: RelaxNG path + :returns: a validation function + ''' + return RelaxNG(rng_path) + + +class RelaxNG: + + def __init__(self, rng_path): + from lxml import etree + + with io.open(rng_path, 'rb') as rng_file: + rng = etree.parse(rng_file) + + self.rng_path = rng_path + self.etree_relaxng = etree.RelaxNG(rng) + + @contextmanager + def validating_output(self, output): + fd, name = tempfile.mkstemp() + try: + with os.fdopen(fd, 'wb+') as f: + yield f + f.seek(0) + if not self.validate_stream(f): + raise ValidationFailed('RelaxNG') + f.seek(0) + shutil.copyfileobj(f, output) + finally: + try: + os.unlink(name) + except Exception as e: + logger.warning('%s: can\'t unlink %s', e, name) + + def validate(self, input): + from lxml import etree + with io.open(input, 'rb') as f: + doc = etree.parse(f) + return self._validate(doc) + + def validate_stream(self, input): + from lxml import etree + doc = etree.parse(input) + return self._validate(doc) + + def _validate(self, doc): + logger.info('_lxml.relaxng(%s) start', os.path.basename(self.rng_path)) + try: + valid = self.etree_relaxng.validate(doc) + except Exception as e: + logger.exception(e) + raise + else: + if not valid: + for error in self.etree_relaxng.error_log: + logger.error('%s', error) + return valid + finally: + logger.info( + '_lxml.relaxng(%s) end', + os.path.basename(self.rng_path) + ) + + +def errlog_to_dict(error): + return dict(message=error.message, + filename=error.filename, + line=error.line, + column=error.column, + domain=error.domain_name, + type=error.type_name, + level=error.level_name) diff --git a/src/hwp5/plat/_uno/__init__.py b/src/hwp5/plat/_uno/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..050a2139bd7e946f4262f57c684e293599590229 --- /dev/null +++ b/src/hwp5/plat/_uno/__init__.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import io +import logging +import os.path +import sys + +from ...errors import InvalidOleStorageError + + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + + +logger = logging.getLogger(__name__) + +enabled = False + + +def is_enabled(): + if 'PYHWP_PLAT_UNO' in os.environ: + PYHWP_PLAT_UNO = os.environ['PYHWP_PLAT_UNO'].strip() + try: + forced = bool(int(PYHWP_PLAT_UNO)) + logger.debug('%s: forced to be %s by PYHWP_PLAT_UNO', __name__, + 'enabled' if forced else 'disabled') + return forced + except: + logger.warning('PYHWP_PLAT_UNO=%s (invalid)', PYHWP_PLAT_UNO) + logger.debug('%s: is %s', __name__, 'enabled' if enabled else 'disabled') + return enabled + + +def enable(): + import uno + import unohelper + + g = globals() + g['uno'] = uno + g['unohelper'] = unohelper + g['enabled'] = True + logger.info('%s: enabled.', __name__) + + +def disable(): + global enabled + enabled = False + logger.info('%s: disabled.', __name__) + + +def XSLTTransformer(context, stylesheet_url, source_url, source_url_base): + from com.sun.star.beans import NamedValue + from hwp5.plat._uno.services import css + args = (NamedValue('StylesheetURL', stylesheet_url), + NamedValue('SourceURL', source_url), + NamedValue('SourceBaseURL', source_url_base)) + select = os.environ.get('PYHWP_PLAT_UNO_XSLT', 'libxslt') + logger.debug('PYHWP_PLAT_UNO_XSLT = %s', select) + if select == 'jaxthelper': + logger.debug('%s.xslt: using css.comp.JAXTHelper', __name__) + return css.comp.JAXTHelper(context, *args) + else: + logger.debug('%s.xslt: using %s', __name__, + 'css.comp.documentconversion.LibXSLTTransformer') + return css.comp.documentconversion.LibXSLTTransformer(context, *args) + + +class OneshotEvent(object): + + def __init__(self): + pin, pout = os.pipe() + self.pin = os.fdopen(pin, 'r') + self.pout = os.fdopen(pout, 'w') + + def wait(self): + self.pin.read() + self.pin.close() + + def signal(self): + self.pout.close() + + +class XSLT(object): + + def __init__(self, context): + self.context = context + + def __call__(self, xsl_path, inp_path, out_path): + import uno + import unohelper + from hwp5.plat._uno import ucb + from hwp5.plat._uno.adapters import OutputStreamToFileLike + xsl_path = os.path.abspath(xsl_path) + xsl_url = uno.systemPathToFileUrl(xsl_path) + + inp_path = os.path.abspath(inp_path) + inp_url = uno.systemPathToFileUrl(inp_path) + inp_stream = ucb.open_url(self.context, inp_url) + + out_path = os.path.abspath(out_path) + with io.open(out_path, 'wb') as out_file: + out_stream = OutputStreamToFileLike(out_file, dontclose=True) + + from com.sun.star.io import XStreamListener + + class XSLTListener(unohelper.Base, XStreamListener): + def __init__(self): + self.event = OneshotEvent() + + def started(self): + logger.info('XSLT started') + + def closed(self): + logger.info('XSLT closed') + self.event.signal() + + def terminated(self): + logger.info('XSLT terminated') + self.event.signal() + + def error(self, exception): + logger.error('XSLT error: %s', exception) + self.event.signal() + + def disposing(self, source): + logger.info('XSLT disposing: %s', source) + self.event.signal() + + listener = XSLTListener() + transformer = XSLTTransformer(self.context, xsl_url, '', '') + transformer.InputStream = inp_stream + transformer.OutputStream = out_stream + transformer.addListener(listener) + + transformer.start() + xsl_name = os.path.basename(xsl_path) + logger.info('xslt.soffice(%s) start', xsl_name) + try: + listener.event.wait() + finally: + logger.info('xslt.soffice(%s) end', xsl_name) + + transformer.removeListener(listener) + return dict() + + +def xslt(xsl_path, inp_path, out_path): + import uno + context = uno.getComponentContext() + xslt = XSLT(context) + return xslt(xsl_path, inp_path, out_path) + + +def oless_from_filename(filename): + inputstream = inputstream_from_filename(filename) + return oless_from_inputstream(inputstream) + + +def inputstream_from_filename(filename): + f = io.open(filename, 'rb') + from hwp5.plat._uno.adapters import InputStreamFromFileLike + return InputStreamFromFileLike(f) + + +def oless_from_inputstream(inputstream): + import uno + context = uno.getComponentContext() + sm = context.ServiceManager + name = 'com.sun.star.embed.OLESimpleStorage' + args = (inputstream, ) + return sm.createInstanceWithArgumentsAndContext(name, args, context) + + +class OleStorage(object): + + def __init__(self, stg): + ''' an OLESimpleStorage to hwp5 storage adapter. + + :param stg: a filename or an instance of OLESimpleStorage + ''' + if isinstance(stg, basestring): + self.oless = oless_from_filename(stg) + try: + self.oless.getElementNames() + except: + errormsg = 'Not a valid OLE2 Compound Binary File.' + raise InvalidOleStorageError(errormsg) + else: + # TODO assert stg is an instance of OLESimpleStorage + self.oless = stg + + def __iter__(self): + return iter(self.oless.getElementNames()) + + def __getitem__(self, name): + from com.sun.star.container import NoSuchElementException + try: + elem = self.oless.getByName(name) + except NoSuchElementException: + raise KeyError(name) + services = elem.SupportedServiceNames + if 'com.sun.star.embed.OLESimpleStorage' in services: + return OleStorage(elem) + else: + elem.closeInput() + return OleStorageStream(self.oless, name) + + def close(self): + return + # TODO + # if this is root, close underlying olefile + if self.path == '': + # old version of OleFileIO has no close() + if hasattr(self.olefile, 'close'): + self.olefile.close() + + +class OleStorageStream(object): + + def __init__(self, oless, name): + self.oless = oless + self.name = name + + def open(self): + from hwp5.plat._uno.adapters import FileFromStream + stream = self.oless.getByName(self.name) + return FileFromStream(stream) diff --git a/src/hwp5/plat/_uno/adapters.py b/src/hwp5/plat/_uno/adapters.py new file mode 100644 index 0000000000000000000000000000000000000000..9c78ff9dae1e6b57f0a1d307a60cb41ab72c5bf8 --- /dev/null +++ b/src/hwp5/plat/_uno/adapters.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +import uno +import unohelper +from com.sun.star.io import XInputStream, XSeekable, XOutputStream + + +class InputStreamFromFileLike(unohelper.Base, XInputStream, XSeekable): + ''' Implementation of XInputStream, XSeekable based on a file-like object + + Implements com.sun.star.io.XInputStream and com.sun.star.io.XSeekable + + :param f: a file-like object + ''' + def __init__(self, f, dontclose=False): + self.f = f + self.dontclose = dontclose + + def readBytes(self, aData, nBytesToRead): + data = self.f.read(nBytesToRead) + return len(data), uno.ByteSequence(data) + + readSomeBytes = readBytes + + def skipBytes(self, nBytesToSkip): + self.f.read(nBytesToSkip) + + def available(self): + return 0 + + def closeInput(self): + if not self.dontclose: + self.f.close() + + def seek(self, location): + self.f.seek(location) + + def getPosition(self): + pos = self.f.tell() + return pos + + def getLength(self): + pos = self.f.tell() + try: + self.f.seek(0, 2) + length = self.f.tell() + return length + finally: + self.f.seek(pos) + + +class OutputStreamToFileLike(unohelper.Base, XOutputStream): + ''' Implementation of XOutputStream based on a file-like object. + + Implements com.sun.star.io.XOutputStream. + + :param f: a file-like object + ''' + def __init__(self, f, dontclose=False): + self.f = f + self.dontclose = dontclose + + def writeBytes(self, bytesequence): + self.f.write(bytesequence.value) + + def flush(self): + self.f.flush() + + def closeOutput(self): + if not self.dontclose: + self.f.close() + + +class FileFromStream(object): + ''' A file-like object based on XInputStream/XOuputStream/XSeekable + + :param stream: a stream object which implements + com.sun.star.io.XInputStream, com.sun.star.io.XOutputStream or + com.sun.star.io.XSeekable + ''' + def __init__(self, stream): + self.stream = stream + + if hasattr(stream, 'readBytes'): + def read(size=None): + if size is None: + data = '' + while True: + bytes = uno.ByteSequence('') + n_read, bytes = stream.readBytes(bytes, 4096) + if n_read == 0: + return data + data += bytes.value + bytes = uno.ByteSequence('') + n_read, bytes = stream.readBytes(bytes, size) + return bytes.value + self.read = read + + if hasattr(stream, 'seek'): + self.tell = stream.getPosition + + def seek(offset, whence=0): + if whence == 0: + pass + elif whence == 1: + offset += stream.getPosition() + elif whence == 2: + offset += stream.getLength() + stream.seek(offset) + self.seek = seek + + if hasattr(stream, 'writeBytes'): + def write(s): + stream.writeBytes(uno.ByteSequence(s)) + self.write = write + + def flush(): + stream.flush() + self.flush = flush + + def close(self): + if hasattr(self.stream, 'closeInput'): + self.stream.closeInput() + elif hasattr(self.stream, 'closeOutput'): + self.stream.closeOutput() diff --git a/src/hwp5/plat/_uno/services.py b/src/hwp5/plat/_uno/services.py new file mode 100644 index 0000000000000000000000000000000000000000..c952f8a8dad3584fa99dd14712adacd34134c98e --- /dev/null +++ b/src/hwp5/plat/_uno/services.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# + + +def create_service(context, name, *args): + sm = context.ServiceManager + if len(args) > 0: + return sm.createInstanceWithArgumentsAndContext(name, args, context) + else: + return sm.createInstanceWithContext(name, context) + + +class Namespace(object): + def __init__(self, dotted_name): + self.dotted_name = dotted_name + + def __getattr__(self, name): + return Namespace(self.dotted_name + '.' + name) + + def __call__(self, context, *args): + return create_service(context, self.dotted_name, *args) + + def bind(self, context): + return ContextBoundNamespace(self, context) + + +class ContextBoundNamespace(object): + + def __init__(self, namespace, context): + self.namespace = namespace + self.context = context + + def __getattr__(self, name): + obj = getattr(self.namespace, name, None) + if isinstance(obj, Namespace): + return obj.bind(self.context) + return obj + + def __call__(self, *args): + return self.namespace(self.context, *args) + + def __iter__(self): + context = self.context + sm = context.ServiceManager + prefix = self.dotted_name + '.' + for name in sm.AvailableServiceNames: + if name.startswith(prefix): + basename = name[len(prefix):] + if basename.find('.') == -1: + yield basename + + +css = Namespace('com.sun.star') diff --git a/src/hwp5/plat/_uno/ucb.py b/src/hwp5/plat/_uno/ucb.py new file mode 100644 index 0000000000000000000000000000000000000000..63abe89645bcc650d7e1311fda5b959e6c12ec17 --- /dev/null +++ b/src/hwp5/plat/_uno/ucb.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# + + +def open_url(context, url): + ''' open InputStream from a URL. + + :param url: a URL to open an InputStream. + :returns: an instance of InputStream + ''' + + # see http://wiki.openoffice.org + # /wiki/Documentation/DevGuide/UCB/Using_the_UCB_API + + from hwp5.plat._uno.services import css + css = css.bind(context) + ucb = css.ucb.UniversalContentBroker('Local', 'Office') + content_id = ucb.createContentIdentifier(url) + content = ucb.queryContent(content_id) + + import unohelper + from com.sun.star.io import XActiveDataSink + + class DataSink(unohelper.Base, XActiveDataSink): + def setInputStream(self, stream): + self.stream = stream + + def getInputStream(self): + return self.stream + + datasink = DataSink() + + from com.sun.star.ucb import Command, OpenCommandArgument2 + openargs = OpenCommandArgument2() + openargs.Mode = 2 # OpenMode.DOCUMENT + openargs.Priority = 32768 + openargs.Sink = datasink + + command = Command() + command.Name = 'open' + command.Handle = -1 + command.Argument = openargs + + content.execute(command, 0, None) + return datasink.stream diff --git a/src/hwp5/plat/gir_gsf.py b/src/hwp5/plat/gir_gsf.py new file mode 100644 index 0000000000000000000000000000000000000000..a3e6a47a1092f7318aba6254c2256405dba6c8c8 --- /dev/null +++ b/src/hwp5/plat/gir_gsf.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import logging + +from ..errors import InvalidOleStorageError + + +try: + bytes +except NameError: + bytes = str + + +logger = logging.getLogger(__name__) + + +def is_enabled(): + try: + from gi.repository import Gsf + except Exception: + return False + else: + Gsf + return True + + +def open(path): + from gi.repository.Gsf import InputGio + from gi.repository.Gsf import InfileMSOle + + inp = InputGio.new_for_path(path) + return InfileMSOle.new(inp) + + +def listdir(gsfole): + for i in xrange(gsfole.num_children()): + yield gsfole.name_by_index(i) + + +class OleStorage: + + def __init__(self, gsfole): + from gi.repository.Gsf import Input + from gi.repository.Gsf import InfileMSOle + + if isinstance(gsfole, InfileMSOle): + self.gsfole = gsfole + elif isinstance(gsfole, Input): + try: + self.gsfole = InfileMSOle.new(gsfole) + except Exception: + raise InvalidOleStorageError() + else: + try: + self.gsfole = open(gsfole) + except Exception: + raise InvalidOleStorageError() + + def __iter__(self): + return listdir(self.gsfole) + + def __getitem__(self, name): + child = self.gsfole.child_by_name(name) + if child: + if child.num_children() == -1: + return OleStreamItem(self.gsfole, name) + else: + return OleStorage(child) + else: + raise KeyError(name) + + def close(self): + del self.gsfole + + +class OleStreamItem: + + def __init__(self, parent, name): + self.__parent = parent + self.__name = name + + def open(self): + gsfole = self.__parent.child_by_name(self.__name) + if gsfole: + return OleStream(gsfole) + else: + raise IOError(self.__name) + + +class OleStream: + + def __init__(self, gsfole): + self.gsfole = gsfole + + def close(self): + pass + + def read(self, size=None): + pos = self.gsfole.tell() + totalsize = self.gsfole.size + if size is not None: + if pos + size > totalsize: + size = totalsize - pos + else: + size = totalsize - pos + + if size > 0: + return self.gsfole.read(size) + return bytes() + + def seek(self, offset, whence=0): + from gi.repository.GLib import SeekType + if whence == 0: + whence = SeekType.SET + elif whence == 1: + whence = SeekType.CUR + elif whence == 2: + whence = SeekType.END + else: + raise ValueError(whence) + + self.gsfole.seek(offset, whence) + + def tell(self): + return self.gsfole.tell() diff --git a/src/hwp5/plat/javax_transform.py b/src/hwp5/plat/javax_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..9cb5d61cc9e26e6031289d88c8e0c875e85b7e93 --- /dev/null +++ b/src/hwp5/plat/javax_transform.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from contextlib import closing +import io +import logging +import os.path +import sys + + +logger = logging.getLogger(__name__) + + +def is_enabled(): + if not sys.platform.startswith('java'): + logger.info('%s: disabled', __name__) + return False + try: + import javax.xml.transform + javax + except ImportError: + logger.info('%s: disabled', __name__) + return False + else: + logger.info('%s: enabled', __name__) + return True + + +def xslt(xsl_path, inp_path, out_path): + transform = xslt_compile(xsl_path) + with io.open(out_path, 'wb') as f: + return transform(inp_path, f) + + +class XSLT: + + def __init__(self, xsl_path, **params): + from javax.xml.transform import URIResolver + from javax.xml.transform import TransformerFactory + from javax.xml.transform.stream import StreamSource + from java.io import FileInputStream + + xsl_path = os.path.abspath(xsl_path) + xsl_base = os.path.dirname(xsl_path) + + xsl_fis = FileInputStream(xsl_path) + + xsl_source = StreamSource(xsl_fis) + + class BaseURIResolver(URIResolver): + + def __init__(self, base): + self.base = base + + def resolve(self, href, base): + path = os.path.join(self.base, href) + path = os.path.abspath(path) + fis = FileInputStream(path) + return StreamSource(fis) + + uri_resolver = BaseURIResolver(xsl_base) + + xslt_factory = TransformerFactory.newInstance() + xslt_factory.setURIResolver(uri_resolver) + + self.transformer = xslt_factory.newTransformer(xsl_source) + for k, v in params.items(): + self.transformer.setParameter(k, unicode(v)) + + def transform(self, input, output): + ''' + >>> T.transform('input.xml', 'output.xml') + ''' + from java.io import FileInputStream + from java.io import FileOutputStream + out_path = os.path.abspath(output) + inp_path = os.path.abspath(input) + with closing(FileInputStream(inp_path)) as inp_fis: + with closing(FileOutputStream(out_path)) as out_fos: + return self._transform(inp_fis, out_fos) + + def transform_into_stream(self, input, output): + ''' + >>> T.transform('input.xml', sys.stdout) + ''' + from java.io import FileInputStream + inp_path = os.path.abspath(input) + with closing(FileInputStream(inp_path)) as inp_fis: + out_fos = wrap_filelike_outputstream(output) + return self._transform(inp_fis, out_fos) + + def _transform(self, input, output): + from javax.xml.transform.stream import StreamSource + from javax.xml.transform.stream import StreamResult + inp_source = StreamSource(input) + out_result = StreamResult(output) + self.transformer.transform(inp_source, out_result) + return dict() + + +def xslt_compile(xsl_path, **params): + xslt = XSLT(xsl_path, **params) + return xslt.transform_into_stream + + +def wrap_filelike_inputstream(f): + from org.python.core import FilelikeInputStream + return FilelikeInputStream(f) + + +def wrap_filelike_outputstream(f): + from java.io import OutputStream + + class FilelikeOutputStream(OutputStream): + + def write(self, *args): + if len(args) == 1: + # byte + ch = chr(args[0] & 0xff) + f.write(ch) + if len(args) == 3: + # array.array, offset, length + array, offset, length = args + buf = array.tostring() + f.write(buf[offset:offset+length]) + else: + logger.debug('%r', args) + self.super__write(*args) + + def flush(self): + f.flush() + + def close(self): + pass + return FilelikeOutputStream() diff --git a/src/hwp5/plat/jython_poifs.py b/src/hwp5/plat/jython_poifs.py new file mode 100644 index 0000000000000000000000000000000000000000..a85e374fc4ba823ddf8cad0750396003523270df --- /dev/null +++ b/src/hwp5/plat/jython_poifs.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import os.path +import sys + +from ..errors import InvalidOleStorageError + + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + + +def is_enabled(): + try: + from org.apache.poi.poifs.filesystem import POIFSFileSystem + POIFSFileSystem # silencing + return True + except ImportError: + return False + + +class OleStorage(object): + ''' Create an OleStorage instance. + + :param olefile: an OLE2 Compound Binary File. + :raises: `InvalidOleStorageError` when `olefile` is not valid OLE2 format. + ''' + + def __init__(self, olefile): + from java.io import FileInputStream + from java.io import IOException + from org.apache.poi.poifs.filesystem import POIFSFileSystem + from org.apache.poi.poifs.filesystem import DirectoryEntry + + if isinstance(olefile, basestring): + path = os.path.abspath(olefile) + fis = FileInputStream(path) + try: + fs = POIFSFileSystem(fis) + except IOException as e: + raise InvalidOleStorageError(e.getMessage()) + entry = fs.getRoot() + elif isinstance(olefile, DirectoryEntry): + entry = olefile + else: + raise ValueError('invalid olefile') + + self.entry = entry + + def __iter__(self): + return (entry.getName() for entry in self.entry.getEntries()) + + def __getitem__(self, name): + from java.io import FileNotFoundException + try: + entry = self.entry.getEntry(name) + except FileNotFoundException: + raise KeyError('%s not found' % name) + + if entry.directoryEntry: + return OleStorage(entry) + elif entry.documentEntry: + return OleStream(entry) + else: + raise KeyError('%s is invalid' % name) + + def close(self): + return + + +class OleStream(object): + + def __init__(self, entry): + self.entry = entry + + def open(self): + from org.apache.poi.poifs.filesystem import DocumentInputStream + dis = DocumentInputStream(self.entry) + return FileFromDocumentInputStream(dis) + + +class FileFromDocumentInputStream(object): + + def __init__(self, dis): + self.dis = dis + self.size = dis.available() + dis.mark(0) + + def read(self, size=None): + import jarray + dis = self.dis + available = dis.available() + if size is None: + size = available + elif size > available: + size = available + bytes = jarray.zeros(size, 'b') + n_read = dis.read(bytes) + data = bytes.tostring() + if n_read < size: + return data[:n_read] + return data + + def seek(self, offset, whence=0): + dis = self.dis + if whence == 0: + dis.reset() + dis.skip(offset) + elif whence == 1: + dis.skip(offset) + elif whence == 2: + dis.reset() + dis.skip(self.size - offset) + else: + raise ValueError('invalid whence: %s', whence) + + def tell(self): + return self.size - self.dis.available() + + def close(self): + return self.dis.close() diff --git a/src/hwp5/plat/olefileio.py b/src/hwp5/plat/olefileio.py new file mode 100644 index 0000000000000000000000000000000000000000..be3b8a0ff37d8092cb42e3a448252ef4e3146976 --- /dev/null +++ b/src/hwp5/plat/olefileio.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from ..errors import InvalidOleStorageError +from ..utils import cached_property + + +def is_enabled(): + try: + import olefile # noqa + except Exception: + pass + else: + return True + + try: + import OleFileIO_PL # noqa + except ImportError: + pass + else: + return True + + return False + + +def import_isOleFile(): + try: + from olefile import isOleFile + except ImportError: + pass + else: + return isOleFile + + try: + from OleFileIO_PL import isOleFile + except ImportError: + pass + else: + return isOleFile + + +def import_OleFileIO(): + try: + from olefile import OleFileIO + except ImportError: + pass + else: + return OleFileIO + + try: + from OleFileIO_PL import OleFileIO + except ImportError: + pass + else: + return OleFileIO + + +class OleStorageItem(object): + + def __init__(self, olefile, path, parent=None): + self.olefile = olefile + self.path = path # path DOES NOT end with '/' + + def get_name(self): + if self.path == '': + return None + segments = self.path.split('/') + return segments[-1] + + name = cached_property(get_name) + + +class OleStream(OleStorageItem): + + def open(self): + return self.olefile.openstream(self.path) + + +class OleStorage(OleStorageItem): + ''' Create an OleStorage instance. + + :param olefile: an OLE2 Compound Binary File. + :type olefile: an OleFileIO instance or an argument to OleFileIO() + :param path: internal path in the olefile. Should not end with '/'. + :raises: `InvalidOleStorageError` when `olefile` is not valid OLE2 format. + ''' + + def __init__(self, olefile, path='', parent=None): + if not hasattr(olefile, 'openstream'): + isOleFile = import_isOleFile() + OleFileIO = import_OleFileIO() + + if not isOleFile(olefile): + errormsg = 'Not an OLE2 Compound Binary File.' + raise InvalidOleStorageError(errormsg) + olefile = OleFileIO(olefile) + OleStorageItem.__init__(self, olefile, path, parent) + + def __iter__(self): + return olefile_listdir(self.olefile, self.path) + + def __getitem__(self, name): + if self.path == '' or self.path == '/': + path = name + else: + path = self.path + '/' + name + if not self.olefile.exists(path): + raise KeyError('%s not found' % path) + t = self.olefile.get_type(path) + if t == 1: # Storage + return OleStorage(self.olefile, path, self) + elif t == 2: # Stream + return OleStream(self.olefile, path, self) + else: + raise KeyError('%s is invalid' % path) + + def close(self): + # if this is root, close underlying olefile + if self.path == '': + # old version of OleFileIO has no close() + if hasattr(self.olefile, 'close'): + self.olefile.close() + + +def olefile_listdir(olefile, path): + if path == '' or path == '/': + # we use a list instead of a set + # for python 2.3 compatibility + yielded = [] + + for stream in olefile.listdir(): + top_item = stream[0] + if top_item in yielded: + continue + yielded.append(top_item) + yield top_item + return + + if not olefile.exists(path): + raise IOError('%s not exists' % path) + if olefile.get_type(path) != 1: + raise IOError('%s not a storage' % path) + path_segments = path.split('/') + for stream in olefile.listdir(): + if len(stream) == len(path_segments) + 1: + if stream[:-1] == path_segments: + yield stream[-1] diff --git a/src/hwp5/plat/xmllint.py b/src/hwp5/plat/xmllint.py new file mode 100644 index 0000000000000000000000000000000000000000..2ef1e093401521bdd50a72b9a3b79a5432a4276b --- /dev/null +++ b/src/hwp5/plat/xmllint.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from contextlib import contextmanager +from subprocess import CalledProcessError +import logging +import subprocess + + +logger = logging.getLogger(__name__) + +executable = 'xmllint' +enabled = None + + +def xmllint_reachable(): + args = [executable, '--version'] + try: + subprocess.check_output(args) + except OSError: + return False + except CalledProcessError: + return False + except Exception as e: + logger.exception(e) + return False + else: + return True + + +def is_enabled(): + global enabled + if enabled is None: + enabled = xmllint_reachable() + return enabled + + +def enable(): + global enabled + enabled = True + + +def disable(): + global enabled + enabled = False + + +def relaxng(rng_path, inp_path): + from subprocess import Popen + args = [executable, '--noout', '--relaxng', rng_path, inp_path] + p = Popen(args) + p.wait() + return p.returncode == 0 + + +def relaxng_compile(rng_path): + return RelaxNG(rng_path) + + +class RelaxNG: + + def __init__(self, rng_path): + self.rng_path = rng_path + + @contextmanager + def validating_output(self, output): + args = [executable, '--relaxng', self.rng_path, '-'] + p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=output) + try: + yield p.stdin + except: + p.stdin.close() + p.wait() + raise + else: + p.stdin.close() + p.wait() + if p.returncode != 0: + raise Exception('RelaxNG validation failed') diff --git a/src/hwp5/plat/xsltproc.py b/src/hwp5/plat/xsltproc.py new file mode 100644 index 0000000000000000000000000000000000000000..747f649a2ff053f6ec9140da9148037c5583f261 --- /dev/null +++ b/src/hwp5/plat/xsltproc.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from subprocess import CalledProcessError +from subprocess import Popen +import logging +import subprocess + + +logger = logging.getLogger(__name__) + +executable = 'xsltproc' +enabled = None + + +def xslt_reachable(): + args = [executable, '--version'] + try: + subprocess.check_output(args) + except OSError: + return False + except CalledProcessError: + return False + except Exception as e: + logger.exception(e) + return False + else: + return True + + +def is_enabled(): + global enabled + if enabled is None: + enabled = xslt_reachable() + return enabled + + +def enable(): + global enabled + enabled = True + + +def disable(): + global enabled + enabled = False + + +def xslt(xsl_path, inp_path, out_path): + xslt = XSLT(xsl_path) + return xslt.transform(inp_path, out_path) + + +class XSLT: + + def __init__(self, xsl_path, **params): + self.xsl_path = xsl_path + self.cmd = [executable] + for name, value in params.items(): + self.cmd.extend(['--stringparam', name, value]) + + def transform(self, input, output): + ''' + >>> T.transform('input.xml', 'output.xml') + ''' + cmd = self.cmd + ['-o', output, self.xsl_path, input] + logger.info('%r', cmd) + p = Popen(cmd) + p.wait() + if p.returncode == 0: + return dict() + else: + return dict(errors=[]) + + def transform_into_stream(self, input, output): + ''' + >>> T.transform_into_stream('input.xml', sys.stdout) + ''' + cmd = self.cmd + [self.xsl_path, input] + logger.info('%r', cmd) + p = Popen(cmd, stdout=output) + p.wait() + if p.returncode == 0: + return dict() + else: + return dict(errors=[]) + + +def xslt_compile(xsl_path, **params): + xslt = XSLT(xsl_path, **params) + return xslt.transform_into_stream diff --git a/src/hwp5/proc/__init__.py b/src/hwp5/proc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e20ce0423abe7ddf636b661aa86536b894698a86 --- /dev/null +++ b/src/hwp5/proc/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals diff --git a/src/hwp5/proc/cat.py b/src/hwp5/proc/cat.py new file mode 100644 index 0000000000000000000000000000000000000000..341f6222409795ae8451b291a785028309e2e5c2 --- /dev/null +++ b/src/hwp5/proc/cat.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import sys + +from ..cli import open_hwpfile +from ..storage import open_storage_item + + +PY2 = sys.version_info.major == 2 + + +def main(args): + if PY2: + output_fp = sys.stdout + else: + output_fp = sys.stdout.buffer + + hwp5file = open_hwpfile(args) + stream = open_storage_item(hwp5file, args.stream) + f = stream.open() + try: + while True: + data = f.read(4096) + if data: + output_fp.write(data) + else: + return + finally: + if hasattr(f, 'close'): + f.close() + + +def cat_argparser(subparsers, _): + parser = subparsers.add_parser( + 'cat', + help=_( + 'Extract out internal streams of .hwp files' + ), + description=_( + 'Extract out the specified stream in the ' + 'to the standard output.' + ) + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.add_argument( + 'stream', + metavar='', + help=_('Internal path of a stream to extract'), + ) + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument( + '--vstreams', + action='store_true', + help=_( + 'Process with virtual streams (i.e. parsed/converted form of ' + 'real streams)' + ) + ) + mutex_group.add_argument( + '--ole', + action='store_true', + help=_( + 'Treat as an OLE Compound File. As a result, ' + 'some streams will be presented as-is. (i.e. not decompressed)' + ) + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/diststream.py b/src/hwp5/proc/diststream.py new file mode 100644 index 0000000000000000000000000000000000000000..530af367097ef2160f10a60886d07ce8255139f4 --- /dev/null +++ b/src/hwp5/proc/diststream.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +''' Decode distribute doc stream. + +Usage:: + + hwp5proc diststream + hwp5proc diststream sha1 [--raw] + hwp5proc diststream key [--raw] + hwp5proc diststream [--loglevel=] [--logfile=] + hwp5proc diststream --help + +Options:: + + -h --help Show this screen + --loglevel= Set log level. + --logfile= Set log file. + +Example:: + + $ hwp5proc cat --ole samples/viewtext.hwp ViewText/Section0 + | tee Section0.zraw.aes128ecb | hwp5proc diststream | tee Section0.zraw + | hwp5proc rawunz > Section0 + + $ hwp5proc diststream sha1 < Section0.zraw.aes128ecb + $ echo -n '12345' | sha1sum + +''' +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from binascii import b2a_hex +from binascii import a2b_hex +import logging +import shutil +import sys + +from ..distdoc import decode +from ..distdoc import decode_head_to_sha1 +from ..distdoc import decode_head_to_key +from ..recordstream import read_record + + +PY2 = sys.version_info.major == 2 +logger = logging.getLogger(__name__) + + +def main(args): + if PY2: + input_fp = sys.stdin + output_fp = sys.stdout + else: + input_fp = sys.stdin.buffer + output_fp = sys.stdout.buffer + + if args.sha1: + head = read_record(input_fp, 0) + sha1ucs16le = decode_head_to_sha1(head['payload']) + sha1 = a2b_hex(sha1ucs16le.decode('utf-16le')) + if not args.raw: + sha1 = b2a_hex(sha1) + output_fp.write(sha1) + elif args.key: + head = read_record(input_fp, 0) + key = decode_head_to_key(head['payload']) + if not args.raw: + key = b2a_hex(key) + output_fp.write(key) + else: + result = decode(input_fp) + shutil.copyfileobj(result, output_fp) + + +def diststream_argparser(subparsers, _): + parser = subparsers.add_parser( + 'diststream', + help=_( + 'Decode a distribute document stream.' + ), + description=_( + 'Decode a distribute document stream.' + ), + ) + op_group = parser.add_mutually_exclusive_group() + op_group.add_argument( + '--sha1', + action='store_true', + help=_('Print SHA-1 value for decryption.'), + ) + op_group.add_argument( + '--key', + action='store_true', + help=_('Print decrypted key.'), + ) + parser.add_argument( + '--raw', + action='store_true', + help=_('Print raw binary objects as is.'), + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/find.py b/src/hwp5/proc/find.py new file mode 100644 index 0000000000000000000000000000000000000000..1e69bf2fab98fbb10a679c554b30f0aaeb0a629c --- /dev/null +++ b/src/hwp5/proc/find.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +''' Find record models with specified predicates. + +Usage:: + + hwp5proc find [--model= | --tag=] + [--incomplete] [--dump] [--format=] + [--loglevel=] [--logfile=] + (--from-stdin | ...) + hwp5proc find --help + +Options:: + + -h --help Show this screen + --loglevel= Set log level. + --logfile= Set log file. + + --from-stdin get filenames fro stdin + + --model= filter with record model name + --tag= filter with record HWPTAG + --incomplete filter with incompletely parsed content + + --format= record output format + %(filename)s %(stream)s %(seqno)s %(type)s + --dump dump record + + ... HWPv5 files (*.hwp) + +Example: Find paragraphs:: + + $ hwp5proc find --model=Paragraph samples/*.hwp + $ hwp5proc find --tag=HWPTAG_PARA_TEXT samples/*.hwp + $ hwp5proc find --tag=66 samples/*.hwp + +Example: Find and dump records of ``HWPTAG_LIST_HEADER`` which is parsed +incompletely:: + + $ hwp5proc find --tag=HWPTAG_LIST_HEADER --incomplete --dump samples/*.hwp + +''' +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from functools import partial +import logging +import itertools +import sys + +from ..binmodel import Hwp5File +from ..binmodel import model_to_json +from ..bintype import log_events +from ..dataio import ParseError +from ..tagids import tagnames + + +PY2 = sys.version_info.major == 2 +if PY2: + ifilter = itertools.ifilter + imap = itertools.imap +else: + ifilter = filter + imap = map + + +logger = logging.getLogger(__name__) + + +def main(args): + filenames = filenames_from_args(args) + + conditions = list(conditions_from_args(args)) + filter_conditions = partial( + ifilter, lambda m: all(condition(m) for condition in conditions) + ) + + print_model = printer_from_args(args) + + for filename in filenames: + try: + models = hwp5file_models(filename) + models = filter_conditions(models) + for model in models: + print_model(model) + except ParseError as e: + logger.error('---- On processing %s:', filename) + e.print_to_logger(logger) + + +def find_argparser(subparsers, _): + parser = subparsers.add_parser( + 'find', + help=_( + 'Find record models with specified predicates.' + ), + description=_( + 'Find record models with specified predicates.' + ), + ) + parser.add_argument( + 'hwp5files', + nargs='*', + metavar='', + help=_('.hwp files to analyze'), + ) + parser.add_argument( + '--from-stdin', + action='store_true', + help=_('get filenames from stdin'), + ) + filter_group = parser.add_mutually_exclusive_group() + filter_group.add_argument( + '--model', + metavar='', + help=_( + 'filter with record model name' + ), + ) + filter_group.add_argument( + '--tag', + metavar='', + help=_( + 'filter with record HWPTAG' + ), + ) + parser.add_argument( + '--incomplete', + action='store_true', + help=_('filter with incompletely parsed content'), + ) + parser.add_argument( + '--format', + metavar='', + help=_( + 'record output format' + ), + ) + parser.add_argument( + '--dump', + action='store_true', + help=_('dump record'), + ) + parser.set_defaults(func=main) + return parser + + +def filenames_from_args(args): + if args.from_stdin: + return filenames_from_stdin(args) + return args.hwp5files + + +def filenames_from_stdin(args): + return imap(lambda line: line[:-1], sys.stdin) + + +def conditions_from_args(args): + + if args.model: + def with_model_name(model): + return args.model == model['type'].__name__ + yield with_model_name + + if args.tag: + tag = args.tag + try: + tag = int(tag) + except ValueError: + pass + else: + tag = tagnames[tag] + + def with_tag(model): + return model['tagname'] == tag + yield with_tag + + if args.incomplete: + def with_incomplete(model): + return 'unparsed' in model + yield with_incomplete + + +def hwp5file_models(filename): + hwp5file = Hwp5File(filename) + for model in flat_models(hwp5file): + model['filename'] = filename + yield model + + +def flat_models(hwp5file, **kwargs): + for model in hwp5file.docinfo.models(**kwargs): + model['stream'] = 'DocInfo' + yield model + + for section in hwp5file.bodytext: + for model in hwp5file.bodytext[section].models(**kwargs): + model['stream'] = 'BodyText/' + section + yield model + + +def printer_from_args(args): + + if args.format: + fmt = args.format + else: + fmt = '%(filename)s %(stream)s %(seqno)s %(tagname)s %(type)s' + + dump = args.dump + + def print_model(model): + printable_model = dict(model, type=model['type'].__name__) + print(fmt % printable_model) + if dump: + print(model_to_json(model, sort_keys=True, indent=2)) + + def print_log(fmt, *args): + print(fmt % args) + list(log_events(model['binevents'], print_log)) + return print_model diff --git a/src/hwp5/proc/header.py b/src/hwp5/proc/header.py new file mode 100644 index 0000000000000000000000000000000000000000..eea0f2a3fc265295f8c5f009f35794988256c130 --- /dev/null +++ b/src/hwp5/proc/header.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import shutil +import sys + + +PY2 = sys.version_info.major == 2 + + +def main(args): + if PY2: + output_fp = sys.stdout + else: + output_fp = sys.stdout.buffer + + from hwp5.filestructure import Hwp5File + hwp5file = Hwp5File(args.hwp5file) + f = hwp5file.header.open_text() + try: + shutil.copyfileobj(f, output_fp) + finally: + hwp5file.close() + + +def header_argparser(subparsers, _): + parser = subparsers.add_parser( + 'header', + help=_('Print file headers of .hwp files.'), + description=_('Print the file header of .'), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/ls.py b/src/hwp5/proc/ls.py new file mode 100644 index 0000000000000000000000000000000000000000..ed687334d4aae6d71ee26b51a8f662ffbcae668f --- /dev/null +++ b/src/hwp5/proc/ls.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +''' List streams in the . + +Usage:: + + hwp5proc ls [--loglevel=] [--logfile=] + [--vstreams | --ole] + + hwp5proc ls --help + +Options:: + + -h --help Show this screen + --loglevel= Set log level. + --logfile= Set log file. + + --vstreams Process with virtual streams (i.e. parsed/converted + form of real streams) + --ole Treat as an OLE Compound File. As a + result, some streams will be presented as-is. (i.e. + not decompressed) + +Example: List without virtual streams:: + + $ hwp5proc ls sample/sample-5017.hwp + + \\x05HwpSummaryInformation + BinData/BIN0002.jpg + BinData/BIN0002.png + BinData/BIN0003.png + BodyText/Section0 + DocInfo + DocOptions/_LinkDoc + FileHeader + PrvImage + PrvText + Scripts/DefaultJScript + Scripts/JScriptVersion + +Example: List virtual streams too:: + + $ hwp5proc ls --vstreams sample/sample-5017.hwp + + \\x05HwpSummaryInformation + \\x05HwpSummaryInformation.txt + BinData/BIN0002.jpg + BinData/BIN0002.png + BinData/BIN0003.png + BodyText/Section0 + BodyText/Section0.models + BodyText/Section0.records + BodyText/Section0.xml + BodyText.xml + DocInfo + DocInfo.models + DocInfo.records + DocInfo.xml + DocOptions/_LinkDoc + FileHeader + FileHeader.txt + PrvImage + PrvText + PrvText.utf8 + Scripts/DefaultJScript + Scripts/JScriptVersion + +''' +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from ..storage import printstorage +from ..cli import open_hwpfile + + +def main(args): + hwpfile = open_hwpfile(args) + printstorage(hwpfile) + + +def ls_argparser(subparsers, _): + parser = subparsers.add_parser( + 'ls', + help=_('List streams in .hwp files.'), + description=_('List streams in the .'), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument( + '--vstreams', + action='store_true', + help=_( + 'Process with virtual streams (i.e. parsed/converted form of ' + 'real streams)' + ) + ) + mutex_group.add_argument( + '--ole', + action='store_true', + help=_( + 'Treat as an OLE Compound File. As a result, ' + 'some streams will be presented as-is. (i.e. not decompressed)' + ) + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/models.py b/src/hwp5/proc/models.py new file mode 100644 index 0000000000000000000000000000000000000000..4d9562613819f66d50ad9b75c073dfca5c106eba --- /dev/null +++ b/src/hwp5/proc/models.py @@ -0,0 +1,234 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from itertools import islice +import sys + +from ..binmodel import Hwp5File +from ..binmodel import ModelStream +from ..binmodel import RecordModel +from ..binmodel import model_to_json +from ..cli import parse_recordstream_name +from ..dataio import hexdump +from ..storage import Open2Stream +from ..treeop import ENDEVENT +from ..utils import generate_json_array +from ..utils import unicode_unescape + + +PY2 = sys.version_info.major == 2 + + +def main(args): + stream = stream_from_args(args) + if args.events: + for event, item in stream.parse_model_events(): + type = item['type'].__name__ + if event is not None: + if item['type'] is RecordModel: + record = item['record'] + fmt = ' %s Record %s level=%s %s' + print(fmt % (event.__name__, + record['seqno'], + record['level'], + record['tagname'])) + if event is ENDEVENT: + leftover = item['leftover'] + print('%04x' % leftover['offset']) + if len(leftover['bytes']): + print('') + print('leftover:') + print(hexdump(leftover['bytes'])) + print('-' * 20) + else: + print(' ', event.__name__, type, item.get('name', '')) + else: + offset = item['bin_offset'] + name = item.get('name', '-') + value = item.get('value', '-') + print('%04x' % offset, type, name, repr(value)) + return + + models_from_stream = models_from_args(args) + models = models_from_stream(stream) + + print_models = print_models_from_args(args) + print_models(models) + + +def models_argparser(subparsers, _): + parser = subparsers.add_parser( + 'models', + help=_( + 'Print parsed binary models of .hwp file record streams.' + ), + description=_( + 'Print parsed binary models in the specified .' + ), + ) + parser.add_argument( + 'hwp5file', + nargs='?', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.add_argument( + 'record_stream', + nargs='?', + metavar='', + help=_( + 'Record-structured internal streams.\n' + '(e.g. DocInfo, BodyText/*)\n' + ), + ) + parser.add_argument( + '--file-format-version', + '-V', + metavar='', + help=_( + 'Specifies HWPv5 file format version of the standard input stream' + ), + ) + output_formats = parser.add_mutually_exclusive_group() + output_formats.add_argument( + '--simple', + action='store_true', + help=_( + 'Print records as simple tree' + ) + ) + output_formats.add_argument( + '--json', + action='store_true', + help=_( + 'Print records as json' + ) + ) + output_formats.add_argument( + '--format', + metavar='', + help=_( + 'Print records formatted' + ) + ) + output_formats.add_argument( + '--events', + action='store_true', + help=_( + 'Print records as events' + ) + ) + subset = parser.add_mutually_exclusive_group() + subset.add_argument( + '--treegroup', + metavar='', + help=_( + 'Specifies the N-th subtree of the record structure.' + ) + ) + subset.add_argument( + '--seqno', + metavar='', + help=_( + 'Print a model of -th record' + ) + ) + parser.set_defaults(func=main) + return parser + + +def stream_from_args(args): + filename = args.hwp5file + if filename: + # TODO: args.record_stream is None + streamname = args.record_stream + hwpfile = Hwp5File(filename) + return parse_recordstream_name(hwpfile, streamname) + else: + version = args.file_format_version or '5.0.0.0' + version = version.split('.') + version = tuple(int(x) for x in version) + + if PY2: + stdin_binary = sys.stdin + else: + stdin_binary = sys.stdin.buffer + + return ModelStream(Open2Stream(lambda: stdin_binary), version) + + +def models_from_args(args): + + if args.treegroup: + treegroup = int(args.treegroup) + return lambda stream: stream.models(treegroup=treegroup) + + if args.seqno: + seqno = int(args.seqno) + return lambda stream: islice(stream.models(), + seqno, seqno + 1) + + return lambda stream: stream.models() + + +def print_models_from_args(args): + + if args.simple: + return print_models_with_print_model(print_model_simple) + + if args.format: + fmt = args.format + fmt = unicode_unescape(fmt) + print_model = print_model_with_format(fmt) + return print_models_with_print_model(print_model) + + return print_models_json + + +def print_models_json(models): + jsonobjects = (model_to_json(model, sort_keys=True, indent=2) + for model in models) + for s in generate_json_array(jsonobjects): + sys.stdout.write(s) + + +def print_models_with_print_model(print_model): + def models_printer(models): + for model in models: + print_model(model) + return models_printer + + +def print_model_simple(model): + sys.stdout.write('%04d ' % model['seqno']) + sys.stdout.write(' ' * model['level'] + model['type'].__name__) + sys.stdout.write('\n') + + +def print_model_with_format(fmt): + def print_model(model): + model = transform_model_formattable(model) + sys.stdout.write(fmt % model) + return print_model + + +def transform_model_formattable(model): + return dict(model, type=model['type'].__name__) diff --git a/src/hwp5/proc/rawunz.py b/src/hwp5/proc/rawunz.py new file mode 100644 index 0000000000000000000000000000000000000000..700ee7cd253314dc5ed20b77cbb6206c2839b840 --- /dev/null +++ b/src/hwp5/proc/rawunz.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import sys +import shutil + +from ..zlib_raw_codec import StreamReader + + +PY2 = sys.version_info.major == 2 + + +def main(args): + if PY2: + input_fp = sys.stdin + output_fp = sys.stdout + else: + input_fp = sys.stdin.buffer + output_fp = sys.stdout.buffer + + stream = StreamReader(input_fp) + shutil.copyfileobj(stream, output_fp) + + +def rawunz_argparser(subparsers, _): + parser = subparsers.add_parser( + 'rawunz', + help=_( + 'Deflate an headerless zlib-compressed stream' + ), + description=_( + 'Deflate an headerless zlib-compressed stream' + ), + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/records.py b/src/hwp5/proc/records.py new file mode 100644 index 0000000000000000000000000000000000000000..3d9efdb6848b46bbda4d0e6b81a0bbdf1f6c3665 --- /dev/null +++ b/src/hwp5/proc/records.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import sys + +from ..cli import parse_recordstream_name +from ..recordstream import Hwp5File +from ..recordstream import RecordStream +from ..recordstream import encode_record_header +from ..recordstream import dump_record +from ..storage import Open2Stream + + +PY2 = sys.version_info.major == 2 + + +def main(args): + if PY2: + stdout_text = sys.stdout + stdout_binary = sys.stdout + else: + stdout_text = sys.stdout + stdout_binary = sys.stdout.buffer + + filename = args.hwp5file + if filename: + hwpfile = Hwp5File(filename) + # TODO: args.record_stream is None + streamname = args.record_stream + stream = parse_recordstream_name(hwpfile, streamname) + else: + stream = RecordStream(Open2Stream(lambda: sys.stdin), None) + + opts = dict() + rng = args.range + if rng: + rng = rng.split('-', 1) + rng = tuple(int(x) for x in rng) + if len(rng) == 1: + rng = (rng[0], rng[0] + 1) + opts['range'] = rng + treegroup = args.treegroup + if treegroup is not None: + opts['treegroup'] = int(treegroup) + + if args.simple: + for record in stream.records(**opts): + stdout_text.write('{:04d} {} {}\n'.format( + record['seqno'], + ' ' * record['level'], + record['tagname'], + )) + elif args.raw: + for record in stream.records(**opts): + dump_record(stdout_binary, record) + elif args.raw_header: + for record in stream.records(**opts): + hdr = encode_record_header(record) + stdout_binary.write(hdr) + elif args.raw_payload: + for record in stream.records(**opts): + stdout_binary.write(record['payload']) + else: + stream.records_json(**opts).dump(stdout_text) + + +def records_argparser(subparsers, _): + parser = subparsers.add_parser( + 'records', + help=_( + 'Print the record structure of .hwp file record streams.' + ), + description=_( + 'Print the record structure of the specified stream.' + ), + ) + parser.add_argument( + 'hwp5file', + nargs='?', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.add_argument( + 'record_stream', + nargs='?', + metavar='', + help=_( + 'Record-structured internal streams.\n' + '(e.g. DocInfo, BodyText/*)\n' + ), + ) + output_formats = parser.add_mutually_exclusive_group() + output_formats.add_argument( + '--simple', + action='store_true', + help=_( + 'Print records as simple tree' + ) + ) + output_formats.add_argument( + '--json', + action='store_true', + help=_( + 'Print records as json' + ) + ) + output_formats.add_argument( + '--raw', + action='store_true', + help=_( + 'Print records as is' + ) + ) + output_formats.add_argument( + '--raw-header', + action='store_true', + help=_( + 'Print record headers as is' + ) + ) + output_formats.add_argument( + '--raw-payload', + action='store_true', + help=_( + 'Print record payloads as is' + ) + ) + subset = parser.add_mutually_exclusive_group() + subset.add_argument( + '--range', + metavar='', + help=_( + 'Specifies the range of the records.\n' + 'N-M means "from the record N to M-1 (excluding M)"\n' + 'N means just the record N\n' + ) + ) + subset.add_argument( + '--treegroup', + metavar='', + help=_( + 'Specifies the N-th subtree of the record structure.' + ) + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/summaryinfo.py b/src/hwp5/proc/summaryinfo.py new file mode 100644 index 0000000000000000000000000000000000000000..08b9afac4b20d07cb7d38c230032411648f81560 --- /dev/null +++ b/src/hwp5/proc/summaryinfo.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import sys + +from ..filestructure import Hwp5File +from ..summaryinfo import HwpSummaryInfoTextFormatter + + +PY2 = sys.version_info.major == 2 + + +def main(args): + if PY2: + output_fp = sys.stdout + else: + output_fp = sys.stdout.buffer + + formatter = HwpSummaryInfoTextFormatter() + hwpfile = Hwp5File(args.hwp5file) + try: + for textline in formatter.formatTextLines(hwpfile.summaryinfo): + line = textline.encode('utf-8') + output_fp.write(line) + output_fp.write(b'\n') + finally: + hwpfile.close() + + +def summaryinfo_argparser(subparsers, _): + parser = subparsers.add_parser( + 'summaryinfo', + help=_( + 'Print summary informations of .hwp files.' + ), + description=_( + 'Print the summary information of .' + ), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/unpack.py b/src/hwp5/proc/unpack.py new file mode 100644 index 0000000000000000000000000000000000000000..630b870dc76a1729afc4d1678f171cf0c465a4c0 --- /dev/null +++ b/src/hwp5/proc/unpack.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import os.path + +from .. import storage +from ..cli import open_hwpfile + + +def main(args): + filename = args.hwp5file + hwp5file = open_hwpfile(args) + + outdir = args.out_directory + if outdir is None: + outdir, ext = os.path.splitext(os.path.basename(filename)) + if not os.path.exists(outdir): + os.mkdir(outdir) + storage.unpack(hwp5file, outdir) + + +def unpack_argparser(subparsers, _): + parser = subparsers.add_parser( + 'unpack', + help=_( + 'Extract out internal streams of .hwp files into a directory.' + ), + description=_( + 'Extract out streams in the specified ' + 'to a directory.' + ) + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.add_argument( + 'out_directory', + nargs='?', + metavar='', + help=_('Output directory'), + ) + mutex_group = parser.add_mutually_exclusive_group() + mutex_group.add_argument( + '--vstreams', + action='store_true', + help=_( + 'Process with virtual streams (i.e. parsed/converted form of ' + 'real streams)' + ) + ) + mutex_group.add_argument( + '--ole', + action='store_true', + help=_( + 'Treat as an OLE Compound File. As a result, ' + 'some streams will be presented as-is. (i.e. not decompressed)' + ) + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/version.py b/src/hwp5/proc/version.py new file mode 100644 index 0000000000000000000000000000000000000000..c6d5aa6021affbd32ade9fd2b190670397a938e2 --- /dev/null +++ b/src/hwp5/proc/version.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from ..filestructure import Hwp5File + + +def main(args): + hwp5file = Hwp5File(args.hwp5file) + h = hwp5file.fileheader + print('%d.%d.%d.%d' % h.version) + + +def version_argparser(subparsers, _): + parser = subparsers.add_parser( + 'version', + help=_( + 'Print the file format version of .hwp files.' + ), + description=_( + 'Print the file format version of .' + ), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/proc/xml.py b/src/hwp5/proc/xml.py new file mode 100644 index 0000000000000000000000000000000000000000..437abe4cd724d579eee0fa90a78b6d6c35535709 --- /dev/null +++ b/src/hwp5/proc/xml.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from functools import partial +import logging + +from ..utils import make_open_dest_file +from ..utils import wrap_open_dest_for_tty +from ..utils import wrap_open_dest +from ..utils import pager +from ..utils import syntaxhighlight +from ..utils import xmllint +from ..xmldump_flat import xmldump_flat +from ..xmlmodel import Hwp5File + + +logger = logging.getLogger(__name__) + + +def xmldump_nested(hwp5file, output, embedbin=False, xml_declaration=True): + dump = hwp5file.xmlevents(embedbin=embedbin).dump + dump = partial(dump, xml_declaration=xml_declaration) + dump(output) + + +def main(args): + ''' Transform into an XML. + ''' + + fmt = args.format or 'nested' + if fmt == 'flat': + xmldump = partial( + xmldump_flat, + xml_declaration=not args.no_xml_decl + ) + elif fmt == 'nested': + xmldump = partial( + xmldump_nested, + xml_declaration=not args.no_xml_decl, + embedbin=args.embedbin, + ) + + open_dest = make_open_dest_file(args.output) + open_dest = wrap_open_dest_for_tty(open_dest, [ + pager(), + syntaxhighlight('application/xml'), + ] + ([ + xmllint(format=True), + ] if not args.no_validate_wellformed else [])) + open_dest = wrap_open_dest(open_dest, [ + xmllint(encode='utf-8'), + xmllint(c14n=True), + ] if not args.no_validate_wellformed else []) + + hwp5file = Hwp5File(args.hwp5file) + with open_dest() as output: + xmldump(hwp5file, output) + + +def xml_argparser(subparsers, _): + parser = subparsers.add_parser( + 'xml', + help=_( + 'Transform .hwp files into an XML.' + ), + description=_( + 'Transform into an XML.' + ), + ) + parser.add_argument( + 'hwp5file', + metavar='', + help=_('.hwp file to analyze'), + ) + parser.add_argument( + '--embedbin', + action='store_true', + help=_('Embed BinData/* streams in the output XML.'), + ) + parser.add_argument( + '--no-xml-decl', + action='store_true', + help=_('Do not output XML declaration.'), + ) + parser.add_argument( + '--output', + metavar='', + help=_('Output filename.'), + ) + parser.add_argument( + '--format', + metavar='', + help=_('"flat", "nested" (default: "nested")'), + ) + parser.add_argument( + '--no-validate-wellformed', + action='store_true', + help=_('Do not validate well-formedness of output.'), + ) + parser.set_defaults(func=main) + return parser diff --git a/src/hwp5/recordstream.py b/src/hwp5/recordstream.py new file mode 100644 index 0000000000000000000000000000000000000000..2970c28054b1199c69917977305ed874d4b3651f --- /dev/null +++ b/src/hwp5/recordstream.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from itertools import islice +import json +import struct + +from . import dataio +from . import filestructure +from .dataio import dumpbytes +from .dataio import Eof +from .dataio import UINT32 +from .tagids import HWPTAG_BEGIN +from .tagids import tagnames +from .utils import JsonObjects + + +def tagname(tagid): + return tagnames.get(tagid, 'HWPTAG%d' % (tagid - HWPTAG_BEGIN)) + + +def Record(tagid, level, payload, size=None, seqno=None): + if size is None: + size = len(payload) + d = dict(tagid=tagid, tagname=tagname(tagid), level=level, + size=size, payload=payload) + if seqno is not None: + d['seqno'] = seqno + return d + + +def decode_record_header(f): + try: + # TagID, Level, Size + rechdr = UINT32.read(f) + tagid = rechdr & 0x3ff + level = (rechdr >> 10) & 0x3ff + size = (rechdr >> 20) & 0xfff + if size == 0xfff: + size = UINT32.read(f) + return (tagid, level, size) + except Eof: + return None + + +def encode_record_header(rec): + size = len(rec['payload']) + level = rec['level'] + tagid = rec['tagid'] + if size < 0xfff: + hdr = (size << 20) | (level << 10) | tagid + return struct.pack(' +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import io +import os.path +import sys + + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + + +def is_storage(item): + return hasattr(item, '__iter__') and hasattr(item, '__getitem__') + + +def is_stream(item): + return hasattr(item, 'open') and callable(item.open) + + +class ItemWrapper(object): + def __init__(self, wrapped): + self.wrapped = wrapped + + def __getattr__(self, name): + return getattr(self.wrapped, name) + + +class StorageWrapper(ItemWrapper): + def __iter__(self): + return iter(self.wrapped) + + def __getitem__(self, name): + return self.wrapped[name] + + +class ItemConversionStorage(StorageWrapper): + + def __getitem__(self, name): + item = self.wrapped[name] + # 기반 스토리지에서 찾은 아이템에 대해, conversion()한다. + conversion = self.resolve_conversion_for(name) + if conversion: + return conversion(item) + return item + + def resolve_conversion_for(self, name): + ''' return a conversion function for the specified storage item ''' + pass + + +class ExtraItemStorage(StorageWrapper): + + def __iter__(self): + for name in self.wrapped: + yield name + + item = self.wrapped[name] + if hasattr(item, 'other_formats'): + other_formats = item.other_formats() + if other_formats: + for ext in other_formats: + yield name + ext + + def __getitem__(self, name): + try: + item = self.wrapped[name] + if is_storage(item): + item = ExtraItemStorage(item) + return item + except KeyError: + # 기반 스토리지에는 없으므로, other_formats() 중에서 찾아본다. + for root in self.wrapped: + item = self.wrapped[root] + if hasattr(item, 'other_formats'): + other_formats = item.other_formats() + if other_formats: + for ext, func in other_formats.items(): + if root + ext == name: + return Open2Stream(func) + raise + + +class Open2Stream(object): + + def __init__(self, open): + self.open = open + + +def iter_storage_leafs(stg, basepath=''): + ''' iterate every leaf nodes in the storage + + stg: an instance of Storage + ''' + for name in stg: + path = basepath + name + item = stg[name] + if is_storage(item): + for x in iter_storage_leafs(item, path + '/'): + yield x + else: + yield path + + +def unpack(stg, outbase): + ''' unpack a storage into outbase directory + + stg: an instance of Storage + outbase: path to a directory in filesystem (should not end with '/') + ''' + for name in stg: + outpath = os.path.join(outbase, name) + item = stg[name] + if is_storage(item): + if not os.path.exists(outpath): + os.mkdir(outpath) + unpack(item, outpath) + else: + f = item.open() + try: + outpath = outpath.replace('\x05', '_05') + with io.open(outpath, 'wb') as outfile: + outfile.write(f.read()) + finally: + f.close() + + +def open_storage_item(stg, path): + if isinstance(path, basestring): + path_segments = path.split('/') + else: + path_segments = path + + item = stg + for name in path_segments: + item = item[name] + return item + + +def printstorage(stg, basepath=''): + names = list(stg) + names.sort() + for name in names: + path = basepath + name + item = stg[name] + if is_storage(item): + printstorage(item, path + '/') + elif is_stream(item): + print(path.encode('unicode_escape').decode('utf-8')) diff --git a/src/hwp5/storage/fs.py b/src/hwp5/storage/fs.py new file mode 100644 index 0000000000000000000000000000000000000000..226311d8b1f4831068910cf43cbd899f372ccfaf --- /dev/null +++ b/src/hwp5/storage/fs.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import io +import os.path + + +class FileSystemStorage(object): + ''' Directory-based stroage. ''' + + def __init__(self, path): + self.path = path + + def __iter__(self): + return iter(sorted(os.listdir(self.path))) + + def __getitem__(self, name): + path = os.path.join(self.path, name) + if os.path.isdir(path): + return FileSystemStorage(path) + elif os.path.exists(path): + return FileSystemStream(path) + else: + raise KeyError(name) + + +class FileSystemStream(object): + ''' File-based stream. ''' + + def __init__(self, path): + self.path = path + + def open(self): + return io.open(self.path, 'rb') diff --git a/src/hwp5/storage/ole.py b/src/hwp5/storage/ole.py new file mode 100644 index 0000000000000000000000000000000000000000..f35e44732a198253c847167dbeab7dd6917fee18 --- /dev/null +++ b/src/hwp5/storage/ole.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import logging + +from ..plat import get_olestorage_class + + +logger = logging.getLogger(__name__) + + +class OleStorage(object): + + def __init__(self, *args, **kwargs): + impl_class = get_olestorage_class() + assert impl_class is not None, 'no OleStorage implementation available' + self.impl = impl_class(*args, **kwargs) + + def __iter__(self): + return self.impl.__iter__() + + def __getitem__(self, name): + return self.impl.__getitem__(name) + + def __getattr__(self, name): + return getattr(self.impl, name) diff --git a/src/hwp5/summaryinfo.py b/src/hwp5/summaryinfo.py new file mode 100644 index 0000000000000000000000000000000000000000..f9b9fbc7cbf182fa6befcd3d1ef4805443db5c0b --- /dev/null +++ b/src/hwp5/summaryinfo.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from uuid import UUID + +from .msoleprops import PropertyIdentifier +from .msoleprops import RESERVED_PROPERTIES +from .msoleprops import SUMMARY_INFORMATION_PROPERTIES + + +CLSID_HWP_SUMMARY_INFORMATION = UUID( + '9fa2b660-1061-11d4-b4c6-006097c09d8c' +) + +FMTID_HWP_SUMMARY_INFORMATION = CLSID_HWP_SUMMARY_INFORMATION + +HWPPIDSI_DATE_STR = PropertyIdentifier( + id=0x00000014, + label='HWPPIDSI_DATE_STR', +) + +HWPPIDSI_PARACOUNT = PropertyIdentifier( + id=0x00000015, + label='HWPPIDSI_PARACOUNT', +) + +HWP_PROPERTIES = RESERVED_PROPERTIES + SUMMARY_INFORMATION_PROPERTIES + ( + HWPPIDSI_DATE_STR, + HWPPIDSI_PARACOUNT, +) + + +class HwpSummaryInfoTextFormatter(object): + + def formatTextLines(self, hwpsummaryinfo): + yield 'Title: {}'.format(hwpsummaryinfo.title) + yield 'Subject: {}'.format(hwpsummaryinfo.subject) + yield 'Author: {}'.format(hwpsummaryinfo.author) + yield 'Keywords: {}'.format(hwpsummaryinfo.keywords) + yield 'Comments: {}'.format(hwpsummaryinfo.comments) + yield 'Last saved by: {}'.format(hwpsummaryinfo.lastSavedBy) + yield 'Revision Number: {}'.format(hwpsummaryinfo.revisionNumber) + yield 'Last Printed at: {}'.format(hwpsummaryinfo.lastPrintedTime) + yield 'Created at: {}'.format(hwpsummaryinfo.createdTime) + yield 'Last saved at: {}'.format(hwpsummaryinfo.lastSavedTime) + yield 'Number of pages: {}'.format(hwpsummaryinfo.numberOfPages) + yield 'Date: {}'.format(hwpsummaryinfo.dateString) + yield 'Number of paragraphs: {}'.format( + hwpsummaryinfo.numberOfParagraphs + ) diff --git a/src/hwp5/tagids.py b/src/hwp5/tagids.py new file mode 100644 index 0000000000000000000000000000000000000000..1e773edbe9ab5b1cbb67634e1e2d7c156289345c --- /dev/null +++ b/src/hwp5/tagids.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + +HWPTAG_BEGIN = 0x010 +tagnames = { + # DocInfo Records + HWPTAG_BEGIN + 0: 'HWPTAG_DOCUMENT_PROPERTIES', + HWPTAG_BEGIN + 1: 'HWPTAG_ID_MAPPINGS', + HWPTAG_BEGIN + 2: 'HWPTAG_BIN_DATA', + HWPTAG_BEGIN + 3: 'HWPTAG_FACE_NAME', + HWPTAG_BEGIN + 4: 'HWPTAG_BORDER_FILL', + HWPTAG_BEGIN + 5: 'HWPTAG_CHAR_SHAPE', + HWPTAG_BEGIN + 6: 'HWPTAG_TAB_DEF', + HWPTAG_BEGIN + 7: 'HWPTAG_NUMBERING', + HWPTAG_BEGIN + 8: 'HWPTAG_BULLET', + HWPTAG_BEGIN + 9: 'HWPTAG_PARA_SHAPE', + HWPTAG_BEGIN + 10: 'HWPTAG_STYLE', + HWPTAG_BEGIN + 11: 'HWPTAG_DOC_DATA', + HWPTAG_BEGIN + 12: 'HWPTAG_DISTRIBUTE_DOC_DATA', + # HWPTAG_BEGIN + 13: RESERVED, + HWPTAG_BEGIN + 14: 'HWPTAG_COMPATIBLE_DOCUMENT', + HWPTAG_BEGIN + 15: 'HWPTAG_LAYOUT_COMPATIBILITY', + HWPTAG_BEGIN + 16: 'HWPTAG_BEGIN_PLUS_16', + + # Section Records + HWPTAG_BEGIN + 50: 'HWPTAG_PARA_HEADER', + HWPTAG_BEGIN + 51: 'HWPTAG_PARA_TEXT', + HWPTAG_BEGIN + 52: 'HWPTAG_PARA_CHAR_SHAPE', + HWPTAG_BEGIN + 53: 'HWPTAG_PARA_LINE_SEG', + HWPTAG_BEGIN + 54: 'HWPTAG_PARA_RANGE_TAG', + HWPTAG_BEGIN + 55: 'HWPTAG_CTRL_HEADER', + HWPTAG_BEGIN + 56: 'HWPTAG_LIST_HEADER', + HWPTAG_BEGIN + 57: 'HWPTAG_PAGE_DEF', + HWPTAG_BEGIN + 58: 'HWPTAG_FOOTNOTE_SHAPE', + HWPTAG_BEGIN + 59: 'HWPTAG_PAGE_BORDER_FILL', + HWPTAG_BEGIN + 60: 'HWPTAG_SHAPE_COMPONENT', + HWPTAG_BEGIN + 61: 'HWPTAG_TABLE', + HWPTAG_BEGIN + 62: 'HWPTAG_SHAPE_COMPONENT_LINE', + HWPTAG_BEGIN + 63: 'HWPTAG_SHAPE_COMPONENT_RECTANGLE', + HWPTAG_BEGIN + 64: 'HWPTAG_SHAPE_COMPONENT_ELLIPSE', + HWPTAG_BEGIN + 65: 'HWPTAG_SHAPE_COMPONENT_ARC', + HWPTAG_BEGIN + 66: 'HWPTAG_SHAPE_COMPONENT_POLYGON', + HWPTAG_BEGIN + 67: 'HWPTAG_SHAPE_COMPONENT_CURVE', + HWPTAG_BEGIN + 68: 'HWPTAG_SHAPE_COMPONENT_OLE', + HWPTAG_BEGIN + 69: 'HWPTAG_SHAPE_COMPONENT_PICTURE', + HWPTAG_BEGIN + 70: 'HWPTAG_SHAPE_COMPONENT_CONTAINER', + HWPTAG_BEGIN + 71: 'HWPTAG_CTRL_DATA', + HWPTAG_BEGIN + 72: 'HWPTAG_CTRL_EQEDIT', + # HWPTAG_BEGIN + 73: RESERVED + HWPTAG_BEGIN + 74: 'HWPTAG_SHAPE_COMPONENT_TEXTART', + HWPTAG_BEGIN + 75: 'HWPTAG_FORM_OBJECT', + HWPTAG_BEGIN + 76: 'HWPTAG_MEMO_SHAPE', + HWPTAG_BEGIN + 77: 'HWPTAG_MEMO_LIST', + HWPTAG_BEGIN + 78: 'HWPTAG_FORBIDDEN_CHAR', + HWPTAG_BEGIN + 79: 'HWPTAG_CHART_DATA', + # ... + HWPTAG_BEGIN + 99: 'HWPTAG_SHAPE_COMPONENT_UNKNOWN', +} +for k, v in tagnames.items(): + globals()[v] = k +del k, v diff --git a/src/hwp5/transforms/__init__.py b/src/hwp5/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9306a5c446091ea6f472b55fa43b725c16bd7d56 --- /dev/null +++ b/src/hwp5/transforms/__init__.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# + +from contextlib import contextmanager +import logging + +from ..errors import ImplementationNotAvailable +from ..plat import get_xslt_compile +from ..utils import hwp5_resources_path +from ..utils import mkstemp_open + + +logger = logging.getLogger(__name__) + + +class BaseTransform: + + def __init__(self, xslt_compile=None, embedbin=False): + self.xslt_compile = xslt_compile or self.get_default_xslt_compile() + self.embedbin = embedbin + + @classmethod + def get_default_xslt_compile(cls): + xslt_compile = get_xslt_compile() + if not xslt_compile: + raise ImplementationNotAvailable('xslt') + return xslt_compile + + def make_transform_hwp5(self, transform_xhwp5): + def transform_hwp5(hwp5file, output): + with self.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: + return transform_xhwp5(xhwp5path, output) + return transform_hwp5 + + def make_xsl_transform(self, resource_path, **params): + with hwp5_resources_path(resource_path) as xsl_path: + return self.xslt_compile(xsl_path, **params) + + @contextmanager + def transformed_xhwp5_at_temp(self, hwp5file): + with mkstemp_open() as (tmp_path, f): + hwp5file.xmlevents(embedbin=self.embedbin).dump(f) + yield tmp_path diff --git a/src/hwp5/treeop.py b/src/hwp5/treeop.py new file mode 100644 index 0000000000000000000000000000000000000000..f093b2f4ef292cc186281799b04fb6ac8eea2f01 --- /dev/null +++ b/src/hwp5/treeop.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + + +class STARTEVENT: + pass + + +class ENDEVENT: + pass + + +def prefix_event(level_prefixed_items, root_item=None): + ''' convert iterable of (level, item) into iterable of (event, item) + ''' + baselevel = None + stack = [root_item] + for level, item in level_prefixed_items: + if baselevel is None: + baselevel = level + level = 0 + else: + level -= baselevel + + while level + 1 < len(stack): + yield ENDEVENT, stack.pop() + while len(stack) < level + 1: + raise Exception('invalid level: %d, %d, %s' % + (level, len(stack) - 1, item)) + assert(len(stack) == level + 1) + + stack.append(item) + yield STARTEVENT, item + + while 1 < len(stack): + yield ENDEVENT, stack.pop() + + +def prefix_ancestors(event_prefixed_items, root_item=None): + ''' convert iterable of (event, item) into iterable of (ancestors, item) + ''' + stack = [root_item] + for event, item in event_prefixed_items: + if event is STARTEVENT: + yield stack, item + stack.append(item) + elif event is ENDEVENT: + stack.pop() + + +def prefix_ancestors_from_level(level_prefixed_items, root_item=None): + ''' convert iterable of (level, item) into iterable of (ancestors, item) + + @param level_prefixed items: iterable of tuple(level, item) + @return iterable of tuple(ancestors, item) + ''' + baselevel = None + stack = [root_item] + for level, item in level_prefixed_items: + if baselevel is None: + baselevel = level + level = 0 + else: + level -= baselevel + + while level + 1 < len(stack): + stack.pop() + while len(stack) < level + 1: + raise Exception('invalid level: %d, %d, %s' % + (level, len(stack) - 1, item)) + assert(len(stack) == level + 1) + + yield stack, item + stack.append(item) + + +def build_subtree(event_prefixed_items): + ''' build a tree from (event, item) stream + + Example Scenario:: + + ... + (STARTEVENT, rootitem) # should be consumed by the caller + --- call build_subtree() --- + (STARTEVENT, child1) # consumed by build_subtree() + (STARTEVENT, grandchild) # (same) + (ENDEVENT, grandchild) # (same) + (ENDEVENT, child1) # (same) + (STARTEVENT, child2) # (same) + (ENDEVENT, child2) # (same) + (ENDEVENT, rootitem) # same, buildsubtree() returns + --- build_subtree() returns --- + (STARTEVENT, another_root) + ... + + result will be (rootitem, [(child1, [(grandchild, [])]), + (child2, [])]) + + ''' + childs = [] + for event, item in event_prefixed_items: + if event == STARTEVENT: + childs.append(build_subtree(event_prefixed_items)) + elif event == ENDEVENT: + return item, childs + + +def iter_subevents(event_prefixed_items): + level = 0 + for event, item in event_prefixed_items: + yield event, item + if event is STARTEVENT: + level += 1 + elif event is ENDEVENT: + if level > 0: + level -= 1 + else: + return + + +def tree_events(rootitem, childs): + ''' generate tuples of (event, item) from a tree + ''' + yield STARTEVENT, rootitem + for k in tree_events_multi(childs): + yield k + yield ENDEVENT, rootitem + + +def tree_events_multi(trees): + ''' generate tuples of (event, item) from trees + ''' + for rootitem, childs in trees: + for k in tree_events(rootitem, childs): + yield k diff --git a/src/hwp5/utils.py b/src/hwp5/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..83138f9a47df1e37e96a50a8121330486ad57aed --- /dev/null +++ b/src/hwp5/utils.py @@ -0,0 +1,421 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from contextlib import contextmanager +from functools import partial +import codecs +import logging +import os +import shlex +import shutil +import subprocess +import sys +import tempfile + +from .importhelper import pkg_resources_filename + + +PY3 = sys.version_info.major == 3 +logger = logging.getLogger(__name__) + + +class NIL: + pass + + +class cached_property(object): + + def __init__(self, func): + self.func = func + self.__name__ = func.__name__ + self.__doc__ = func.__doc__ + + def __get__(self, obj, type=None): + if obj is None: + return self + value = obj.__dict__.get(self.__name__, NIL) + if value is NIL: + value = self.func(obj) + obj.__dict__[self.__name__] = value + return value + + def __set__(self, obj, value): + obj.__dict__[self.__name__] = value + + +def generate_json_array(tokens): + ''' generate json array with given tokens ''' + first = True + for token in tokens: + if first: + yield '[\n' + first = False + else: + yield ',\n' + yield token + yield '\n]' + + +class JsonObjects(object): + + def __init__(self, objects, object_to_json): + self.objects = objects + self.object_to_json = object_to_json + + def generate(self, **kwargs): + kwargs.setdefault('sort_keys', True) + kwargs.setdefault('indent', 2) + + tokens = (self.object_to_json(obj, **kwargs) + for obj in self.objects) + return generate_json_array(tokens) + + def open(self, **kwargs): + chunks = self.generate(**kwargs) + chunks = (chunk.encode('utf-8') for chunk in chunks) + return GeneratorReader(chunks) + + def dump(self, outfile, **kwargs): + for s in self.generate(**kwargs): + outfile.write(s) + + +def unicode_escape(s): + ''' + Escape a string. + + :param s: + a string to escape + :type s: + unicode + :returns: + escaped string + :rtype: + unicode + ''' + return s.encode('unicode_escape').decode('utf-8') + + +def unicode_unescape(s): + ''' + Unescape a string. + + :param s: + a string to unescape + :type s: + unicode + :returns: + unescaped string + :rtype: + unicode + ''' + return s.encode('utf-8').decode('unicode_escape') + + +def transcode(backend_stream, backend_encoding, frontend_encoding, + errors='strict'): + enc = codecs.getencoder(frontend_encoding) + dec = codecs.getdecoder(frontend_encoding) + rd = codecs.getreader(backend_encoding) + wr = codecs.getwriter(backend_encoding) + return codecs.StreamRecoder(backend_stream, enc, dec, rd, wr, errors) + + +def transcoder(backend_encoding, frontend_encoding, errors='strict'): + return partial(transcode, + backend_encoding=backend_encoding, + frontend_encoding=frontend_encoding, + errors=errors) + + +class GeneratorReader(object): + ''' convert a string generator into file-like reader + + def gen(): + yield b'hello' + yield b'world' + + f = GeneratorReader(gen()) + assert 'hell' == f.read(4) + assert 'oworld' == f.read() + ''' + + def __init__(self, gen): + self.gen = gen + self.buffer = b'' + + def read(self, size=None): + if size is None: + d, self.buffer = self.buffer, b'' + return d + b''.join(self.gen) + + for data in self.gen: + self.buffer += data + bufsize = len(self.buffer) + if bufsize >= size: + size = min(bufsize, size) + d, self.buffer = self.buffer[:size], self.buffer[size:] + return d + + d, self.buffer = self.buffer, b'' + return d + + def close(self): + self.gen = self.buffer = None + + +class GeneratorTextReader(object): + ''' convert a string generator into file-like reader + + def gen(): + yield 'hello' + yield 'world' + + f = GeneratorTextReader(gen()) + assert 'hell' == f.read(4) + assert 'oworld' == f.read() + ''' + + def __init__(self, gen): + self.gen = gen + self.buffer = '' + + def read(self, size=None): + if size is None: + d = self.buffer + self.buffer = '' + return d + ''.join(self.gen) + + for data in self.gen: + self.buffer += data + bufsize = len(self.buffer) + if bufsize >= size: + size = min(bufsize, size) + d, self.buffer = self.buffer[:size], self.buffer[size:] + return d + + d = self.buffer + self.buffer = '' + return d + + def close(self): + self.gen = self.buffer = None + + +@contextmanager +def hwp5_resources_path(res_path): + try: + path = pkg_resources_filename('hwp5', res_path) + except Exception: + logger.info('%s: pkg_resources_filename failed; using resource_stream', + res_path) + with mkstemp_open() as (path, g): + import pkg_resources + f = pkg_resources.resource_stream('hwp5', res_path) + try: + shutil.copyfileobj(f, g) + g.close() + yield path + finally: + f.close() + else: + yield path + + +def make_open_dest_file(path): + if path: + @contextmanager + def open_dest_path(): + with open(path, 'wb') as f: + yield f + return open_dest_path + else: + if PY3: + @contextmanager + def open_stdout(): + yield sys.stdout.buffer + return open_stdout + else: + @contextmanager + def open_stdout(): + yield sys.stdout + return open_stdout + + +def wrap_open_dest_for_tty(open_dest, wrappers): + @contextmanager + def open_dest_wrapped(): + with open_dest() as output: + if output.isatty(): + with cascade_contextmanager_filters(output, + wrappers) as output: + yield output + else: + yield output + return open_dest_wrapped + + +def wrap_open_dest(open_dest, wrappers): + @contextmanager + def open_dest_wrapped(): + with open_dest() as output: + with cascade_contextmanager_filters(output, wrappers) as output: + yield output + return open_dest_wrapped + + +@contextmanager +def cascade_contextmanager_filters(arg, filters): + if len(filters) == 0: + yield arg + else: + flt, filters = filters[0], filters[1:] + with flt(arg) as ret: + with cascade_contextmanager_filters(ret, filters) as ret: + yield ret + + +@contextmanager +def null_contextmanager_filter(output): + yield output + + +def output_thru_subprocess(cmd): + @contextmanager + def filter(output): + logger.debug('%r', cmd) + try: + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=output) + except Exception as e: + logger.error('%r: %s', ' '.join(cmd), e) + yield output + else: + try: + yield p.stdin + except IOError as e: + import errno + if e.errno != errno.EPIPE: + raise + finally: + p.stdin.close() + p.wait() + retcode = p.returncode + logger.debug('%r exit %d', cmd, retcode) + return filter + + +def xmllint(c14n=False, encode=None, format=False, nonet=True): + cmd = ['xmllint'] + if c14n: + cmd.append('--c14n') + if encode: + cmd += ['--encode', encode] + if format: + cmd.append('--format') + if nonet: + cmd.append('--nonet') + cmd.append('-') + return output_thru_subprocess(cmd) + + +def syntaxhighlight(mimetype): + try: + return syntaxhighlight_pygments(mimetype) + except Exception as e: + logger.info(e) + return null_contextmanager_filter + + +def syntaxhighlight_pygments(mimetype): + from pygments import highlight + from pygments.lexers import get_lexer_for_mimetype + from pygments.formatters import TerminalFormatter + + lexer = get_lexer_for_mimetype(mimetype, encoding='utf-8') + formatter = TerminalFormatter(encoding='utf-8') + + @contextmanager + def filter(output): + with make_temp_file() as f: + yield f + f.seek(0) + code = f.read() + highlight(code, lexer, formatter, output) + return filter + + +@contextmanager +def make_temp_file(): + fd, name = tempfile.mkstemp() + with unlink_path(name): + with os.fdopen(fd, 'w+') as f: + yield f + + +@contextmanager +def unlink_path(path): + import os + try: + yield + finally: + os.unlink(path) + + +def pager(): + pager_cmd = os.environ.get('PAGER') + if pager_cmd: + pager_cmd = shlex.split(pager_cmd) + return output_thru_subprocess(pager_cmd) + return pager_less + + +pager_less = output_thru_subprocess(['less', '-R']) + + +@contextmanager +def mkstemp_open(*args, **kwargs): + + if (kwargs.get('text', False) or (len(args) >= 4 and args[3])): + text = True + else: + text = False + + mode = 'w+' if text else 'wb+' + fd, path = tempfile.mkstemp(*args, **kwargs) + try: + f = os.fdopen(fd, mode) + try: + yield path, f + finally: + try: + f.close() + except Exception: + pass + finally: + unlink_or_warning(path) + + +def unlink_or_warning(path): + try: + os.unlink(path) + except Exception as e: + logger.exception(e) + logger.warning('%s cannot be deleted', path) diff --git a/src/hwp5/xmldump_flat.py b/src/hwp5/xmldump_flat.py new file mode 100644 index 0000000000000000000000000000000000000000..36a77575aa3cc6da0ce2b97a0c66b5487b187cc5 --- /dev/null +++ b/src/hwp5/xmldump_flat.py @@ -0,0 +1,211 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2015 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from .binmodel import ControlChar +from .binmodel import ParaTextChunks +from .binmodel import Text +from .bintype import resolve_type_events +from .bintype import resolve_values_from_stream +from .dataio import ArrayType +from .dataio import EnumType +from .dataio import FlagsType +from .dataio import SelectiveType +from .dataio import StructType +from .dataio import X_ARRAY +from .filestructure import FileHeader +from .treeop import ENDEVENT +from .treeop import STARTEVENT +from .xmlformat import xmlevents_to_bytechunks + + +def xmldump_flat(hwp5file, output, xml_declaration=True): + xmlevents = xmlevents_from_hwp5file(hwp5file) + bytechunks = xmlevents_to_bytechunks(xmlevents) + if xml_declaration: + output.write('') + for x in bytechunks: + output.write(x) + + +def xmlevents_from_hwp5file(hwp5file): + version = '%d.%d.%d.%d' % hwp5file.fileheader.version + hexversion = '%02x%02x%02x%02x' % hwp5file.fileheader.version + yield STARTEVENT, ('Hwp5Doc', { + 'version': version, + 'hexversion': hexversion + }) + + with hwp5file.fileheader.open() as f: + yield STARTEVENT, ('FileHeader', { + }) + resolve_values = resolve_values_from_stream(f) + model_events = resolve_type_events(FileHeader, {}, resolve_values) + for x in xmlevents_from_modelevents(model_events): + yield x + yield ENDEVENT, 'FileHeader' + + yield STARTEVENT, ('DocInfo', { + }) + model_events = hwp5file.docinfo.parse_model_events() + for x in xmlevents_from_modelevents(model_events): + yield x + yield ENDEVENT, 'DocInfo' + + for section_name in hwp5file.bodytext: + section = hwp5file.bodytext[section_name] + yield STARTEVENT, ('Section', { + 'name': section_name + }) + model_events = section.parse_model_events() + for x in xmlevents_from_modelevents(model_events): + yield x + yield ENDEVENT, 'Section' + + yield ENDEVENT, 'Hwp5Doc' + + +def expand_item_value(ev, data): + if ev is None and data['type'] is ParaTextChunks: + yield STARTEVENT, data + for (start, end), item in data['value']: + if isinstance(item, unicode): + yield None, { + 'bin_offset': data['bin_offset'] + start * 2, + 'type': Text, + 'value': item, + } + else: + x = { + 'bin_offset': data['bin_offset'] + start * 2, + 'type': ControlChar, + 'value': item, + } + yield None, x + yield ENDEVENT, data + else: + yield ev, data + + +def expand_item_values(model_events): + for ev, item in model_events: + for x in expand_item_value(ev, item): + yield x + + +def xmlevents_from_modelevents(model_events): # noqa + expanded_events = expand_item_values(model_events) + for ev, data in expanded_events: + record = data.get('record') + if record: + if ev is STARTEVENT: + yield ev, ('Record', { + 'tagname': record['tagname'], + 'tagid': unicode(record['tagid']), + 'seqno': unicode(record['seqno']), + 'level': unicode(record['level']), + 'size': unicode(record['size']), + }) + elif ev is ENDEVENT: + yield ev, 'Record' + else: + assert False + else: + datatype = data['type'] + typename = datatype.__name__ + + if ev in (STARTEVENT, ENDEVENT): + if isinstance(datatype, (ArrayType, X_ARRAY)): + elem = 'array' + atrs = { + } + elif isinstance(datatype, (StructType, SelectiveType)): + elem = 'struct' + atrs = { + 'type': typename + } + else: + raise Exception(datatype.__name__) + + if 'name' in data: + atrs['name'] = data['name'] + if ev is STARTEVENT: + yield ev, (elem, atrs) + else: + yield ev, elem + elif ev is None: + atrs = { + 'type': typename, + 'value': unicode(data['value']) + } + if 'name' in data: + atrs['name'] = data['name'] + if 'bin_offset' in data: + atrs['offset'] = unicode(data['bin_offset']) + if 'bin_value' in data: + atrs['bin_value'] = unicode(data['bin_value']) + if 'bin_type' in data: + atrs['type'] = data['bin_type'].__name__ + yield STARTEVENT, ('item', atrs) + + if isinstance(datatype, FlagsType): + fixed_size = datatype.basetype.fixed_size + b = bin(data['value'])[2:] + if len(b) < fixed_size * 8: + b = '0' * (fixed_size * 8 - len(b)) + b + h = hex(data['value'])[2:] + if len(h) < fixed_size * 2: + h = '0' * (fixed_size * 2 - len(h)) + h + if h.endswith('L'): + h = h[:-1] + atrs = { + 'hex': h, + 'bin': b + } + yield STARTEVENT, ('bitflags', atrs) + + for bitfield_name in datatype.bitfields: + desc = datatype.bitfields[bitfield_name] + bitfield_type = desc.valuetype + value = desc.__get__(data['value'], None) + + atrs = { + 'type': datatype.basetype.__name__, + 'name': bitfield_name, + 'msb': unicode(desc.msb), + 'lsb': unicode(desc.lsb), + 'value': unicode(int(value)) + } + yield STARTEVENT, ('bits', atrs) + + if isinstance(bitfield_type, EnumType): + atrs = { + 'type': bitfield_type.__name__, + 'value': value.name + } + yield STARTEVENT, ('enum', atrs) + yield ENDEVENT, 'enum' + + yield ENDEVENT, 'bits' + + yield ENDEVENT, 'bitflags' + + yield ENDEVENT, 'item' diff --git a/src/hwp5/xmlformat.py b/src/hwp5/xmlformat.py new file mode 100644 index 0000000000000000000000000000000000000000..7a3a8e66e9870b529be85f198982608fe8a68daa --- /dev/null +++ b/src/hwp5/xmlformat.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from itertools import chain +from xml.sax.saxutils import escape +from xml.sax.saxutils import quoteattr +import logging +import sys + +from .filestructure import VERSION +from .dataio import typed_struct_attributes +from .dataio import Struct +from .dataio import StructType +from .dataio import ArrayType +from .dataio import FlagsType +from .dataio import EnumType +from .dataio import WCHAR +from .dataio import HWPUNIT +from .dataio import HWPUNIT16 +from .dataio import SHWPUNIT +from .binmodel import COLORREF +from .binmodel import BinStorageId +from .binmodel import Margin +from .binmodel import Text +from .treeop import STARTEVENT +from .treeop import ENDEVENT + + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + unichr = chr + + +logger = logging.getLogger(__name__) + + +def xmlattrval(value): + if isinstance(value, basestring): + return value + elif isinstance(value, float): + # https://stackoverflow.com/questions/25898733/why-does-strfloat-return-more-digits-in-python-3-than-python-2 + return repr(value) + elif isinstance(type(value), EnumType): + return value.name.lower() if value.name else str(int(value)) + elif isinstance(value, type): + return value.__name__ + else: + return str(value) + + +def expanded_xmlattribute(ntv): + name, (t, value) = ntv + if isinstance(t, FlagsType): + fmt = '%0' + fmt += '%d' % (t.basetype.fixed_size * 2) + fmt += 'X' + yield name, fmt % int(value) + for k, v in t.dictvalue(t(value)).items(): + yield k, xmlattrval(v) + elif t is Margin: + for pos in ('left', 'right', 'top', 'bottom'): + yield '-'.join([name, pos]), xmlattrval(value.get(pos)) + elif t is COLORREF: + yield name, xmlattrval(t(value)) + elif t is VERSION: + yield name, '.'.join(str(x) for x in value) + elif t in (HWPUNIT, SHWPUNIT, HWPUNIT16): + yield name, str(value) + elif t is WCHAR: + if value == 0: + yield name, u'' + else: + if value in PUA_SYMBOLS: + yield name, PUA_SYMBOLS[value] + else: + yield name, unichr(value) + elif t is BinStorageId: + yield name, 'BIN%04X' % value + else: + yield name, xmlattrval(value) + + +# TODO: arbitrary assignment; not based on any standards +PUA_SYMBOLS = { + 0xF046: u'☞', # U+261E WHITE RIGHT POINTING INDEX + 0xF06C: u'●', # U+25CF BLACK CIRCLE + # F06C: u'⚫', # U+26AB MEDIUM BLACK CIRCLE + 0xF09F: u'•', # U+2022 BULLET = black small circle + 0xF0A1: u'○', # U+25CB WHITE CIRCLE + # F0A1: u'⚪', # U+26AA MEDIUM WHITE CIRCLE + # F0A1: u'⚬', # U+26AC MEDIUM SMALL WHITE CIRCLE + # F0A1: u' ', # U+25E6 WHITE BULLET + 0xF06E: u'■', # U+25A0 BLACK SQUARE = molding mark + 0xF0A7: u'▪', # U+25AA BLACK SMALL SQUARE = square bullet + 0xF06F: u'☐', # U+2610 BALLOT BOX + # F06F: u'□', # U+25A1 WHITE SQUARE = quadrature + 0xF075: u'◆', # U+25C6 BLACK DIAMOND + 0xF077: u'⬩', # U+2B29 BLACK SMALL DIAMOND + # F077: u'⬥', # U+2B25 BLACK MEDIUM DIAMOND + # F077: u'⬦', # U+2B26 WHITE MEDIUM DIAMOND + 0xF076: u'❖', # U+2756 BLACK DIAMOND MINUS WHITE X + 0xF0A4: u'◉', # U+25C9 FISHEYE + # F0A4: u'⦿ ', # U+29BF CIRCLED BULLET + 0xF0AB: u'★', # U+2605 BLACK STAR + 0xF0Fc: u'✓', # U+2713 CHECK MARK + 0xF0FE: u'☑', # U+2611 BALLOT BOX WITH CHECK +} + + +def xmlattr_dashednames(attrs): + for k, v in attrs: + yield k.replace('_', '-'), v + + +def xmlattr_uniqnames(attrs): + names = set([]) + for k, v in attrs: + assert k not in names, 'name clashes: %s' % k + yield k, v + names.add(k) + + +def xmlattributes_for_plainvalues(context, plainvalues): + ntvs = plainvalues.items() + ntvs = chain(*(expanded_xmlattribute(ntv) for ntv in ntvs)) + return dict(xmlattr_uniqnames(xmlattr_dashednames(ntvs))) + + +def is_complex_type(type, value): + if isinstance(value, dict): + return True + elif isinstance(type, ArrayType) and issubclass(type.itemtype, Struct): + return True + elif isinstance(type, ArrayType) and issubclass(type.itemtype, COLORREF): + return True + else: + return False + + +def separate_plainvalues(typed_attributes): + d = [] + p = dict() + for named_item in typed_attributes: + name, item = named_item + t, value = item + try: + if t is Margin: + p[name] = item + elif is_complex_type(t, value): + d.append(named_item) + else: + p[name] = item + except Exception as e: + logger.error('%s', (name, t, value)) + logger.error('%s', t.__dict__) + logger.exception(e) + raise e + return d, p + + +def startelement(context, ma): + model, attributes = ma + if isinstance(model, StructType): + typed_attributes = ((v['name'], (v['type'], v['value'])) + for v in typed_struct_attributes(model, attributes, + context)) + else: + typed_attributes = ((k, (type(v), v)) + for k, v in attributes.items()) + + typed_attributes, plainvalues = separate_plainvalues(typed_attributes) + + if model is Text: + text = plainvalues.pop('text')[1] + elif '' in plainvalues: + text = plainvalues.pop('')[1] + else: + text = None + + yield STARTEVENT, (model.__name__, + xmlattributes_for_plainvalues(context, plainvalues)) + if text: + yield Text, text + + for _name, (_type, _value) in typed_attributes: + if isinstance(_value, dict): + assert isinstance(_value, dict) + _value = dict(_value) + _value['attribute-name'] = _name + for x in element(context, (_type, _value)): + yield x + else: + assert isinstance(_value, (tuple, list)), (_value, _type) + # assert issubclass(_type.itemtype, Struct), (_value, _type) + if issubclass(_type.itemtype, Struct): + yield STARTEVENT, ('Array', {'name': _name}) + for _itemvalue in _value: + for x in element(context, (_type.itemtype, _itemvalue)): + yield x + yield ENDEVENT, 'Array' + elif issubclass(_type.itemtype, COLORREF): + for _itemvalue in _value: + yield STARTEVENT, (_name, { + 'r': '%d' % ((_itemvalue >> 0) & 0xff), + 'g': '%d' % ((_itemvalue >> 8) & 0xff), + 'b': '%d' % ((_itemvalue >> 16) & 0xff), + 'alpha': '%d' % ((_itemvalue >> 24) & 0xff), + 'hex': xmlattrval(_type.itemtype(_itemvalue)) + }) + yield ENDEVENT, _name + else: + assert False, (_value, _type) + + +def element(context, ma): + model, attributes = ma + for x in startelement(context, ma): + yield x + yield ENDEVENT, model.__name__ + + +def xmlevents_to_bytechunks(xmlevents, encoding='utf-8'): + for textchunk in xmlevents_to_textchunks(xmlevents): + yield textchunk.encode(encoding) + + +def xmlevents_to_textchunks(xmlevents): + entities = {'\r': ' ', + '\n': ' ', + '\t': ' '} + for event, item in xmlevents: + if event is STARTEVENT: + yield '<' + yield item[0] + for n, v in item[1].items(): + yield ' ' + yield n + yield '=' + v = quoteattr(v, entities) + v = v.replace('\x00', '') + yield v + yield '>' + elif event is Text: + text = escape(item) + text = text.replace('\x00', '') + yield text + elif event is ENDEVENT: + yield '' diff --git a/src/hwp5/xmlmodel.py b/src/hwp5/xmlmodel.py new file mode 100644 index 0000000000000000000000000000000000000000..84024c2a09adc810b09edebb6155d66bb3449908 --- /dev/null +++ b/src/hwp5/xmlmodel.py @@ -0,0 +1,816 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +from collections import deque +from itertools import chain +from pprint import pformat +from tempfile import TemporaryFile +import base64 +import logging +import sys + +from . import binmodel +from . import filestructure +from .binmodel.controls import SectionDef +from .binmodel.controls import TableControl +from .binmodel.controls import GShapeObjectControl +from .binmodel import BinData +from .binmodel import ListHeader +from .binmodel import Paragraph +from .binmodel import Text +from .binmodel import ShapeComponent +from .binmodel import TableBody +from .binmodel import TableCell +from .binmodel import ParaText +from .binmodel import ParaLineSeg +from .binmodel import ParaCharShape +from .binmodel import LineSeg +from .binmodel import ParaRangeTag +from .binmodel import Field +from .binmodel import ControlChar +from .binmodel import Control +from .charsets import tokenize_unicode_by_lang +from .dataio import Struct +from .filestructure import VERSION +from .treeop import STARTEVENT, ENDEVENT +from .treeop import prefix_event +from .treeop import build_subtree +from .treeop import tree_events +from .treeop import tree_events_multi +from .xmlformat import startelement +from .xmlformat import xmlevents_to_bytechunks + + +PY3 = sys.version_info.major == 3 +if PY3: + basestring = str + unichr = chr + + +logger = logging.getLogger(__name__) + + +def give_elements_unique_id(event_prefixed_mac): + paragraph_id = 0 + table_id = 0 + gshape_id = 0 + shape_id = 0 + for event, item in event_prefixed_mac: + (model, attributes, context) = item + if event == STARTEVENT: + if model == Paragraph: + attributes['paragraph_id'] = paragraph_id + paragraph_id += 1 + elif model == TableControl: + attributes['table_id'] = table_id + table_id += 1 + elif model == GShapeObjectControl: + attributes['gshape_id'] = gshape_id + gshape_id += 1 + elif model == ShapeComponent: + attributes['shape_id'] = shape_id + shape_id += 1 + yield event, item + + +def make_ranged_shapes(shapes): + last = None + for item in shapes: + if last is not None: + yield (last[0], item[0]), last[1] + last = item + yield (item[0], 0x7fffffff), item[1] + + +def split_and_shape(chunks, ranged_shapes): + try: + (chunk_start, chunk_end), chunk_attr, chunk = next(chunks) + except StopIteration: + return + for (shape_start, shape_end), shape in ranged_shapes: + while True: + # case 0: chunk has left intersection + # vvvv + # ----... + if chunk_start < shape_start: + assert False + + # case 1: chunk is far right: get next shape + # vvvv + # ---- + if shape_end <= chunk_start: # (1) + break + + assert chunk_start < shape_end # by (1) + assert shape_start <= chunk_start + # case 2: chunk has left intersection + # vvvv + # ..---- + if shape_end < chunk_end: # (2) + prev = ((chunk_start, shape_end), + chunk[:shape_end - chunk_start]) + nexT = ((shape_end, chunk_end), + chunk[shape_end - chunk_start:]) + (chunk_start, chunk_end), chunk = prev + else: + nexT = None + + assert chunk_end <= shape_end # by (2) + yield (chunk_start, chunk_end), (shape, chunk_attr), chunk + + if nexT is not None: + (chunk_start, chunk_end), chunk = nexT + continue + + try: + (chunk_start, chunk_end), chunk_attr, chunk = next(chunks) + except StopIteration: + return + + +def line_segmented(chunks, ranged_linesegs): + prev_lineseg = None + line = None + for ((chunk_start, chunk_end), + (lineseg, chunk_attr), + chunk) in split_and_shape(chunks, ranged_linesegs): + if lineseg is not prev_lineseg: + if line is not None: + yield prev_lineseg, line + line = [] + line.append(((chunk_start, chunk_end), chunk_attr, chunk)) + prev_lineseg = lineseg + if line is not None: + yield prev_lineseg, line + + +def make_texts_linesegmented_and_charshaped(event_prefixed_mac): + ''' lineseg/charshaped text chunks ''' + + stack = [] # stack of ancestor Paragraphs + for event, item in event_prefixed_mac: + model, attributes, context = item + if model is Paragraph: + if event == STARTEVENT: + stack.append(dict()) + yield STARTEVENT, item + else: + paratext = stack[-1].get(ParaText) + paracharshape = stack[-1].get(ParaCharShape) + paralineseg = stack[-1].get(ParaLineSeg) + # TODO: RangeTags are not used for now + # pararangetag = stack[-1].get(ParaRangeTag) + if paratext is None: + paratext = (ParaText, + dict(chunks=[((0, 0), '')]), + dict(context)) + for x in merge_paragraph_text_charshape_lineseg(paratext, + paracharshape, + paralineseg): + yield x + + yield ENDEVENT, (model, attributes, context) + stack.pop() + elif model in (ParaText, ParaCharShape, ParaLineSeg, ParaRangeTag): + if event == STARTEVENT: + stack[-1][model] = model, attributes, context + else: + yield event, (model, attributes, context) + + +def merge_paragraph_text_charshape_lineseg(paratext, paracharshape, + paralineseg): + + paratext_model, paratext_attributes, paratext_context = paratext + + chunks = ((range, None, chunk) + for range, chunk in paratext_attributes['chunks']) + charshapes = paracharshape[1]['charshapes'] + shaped_chunks = split_and_shape(chunks, make_ranged_shapes(charshapes)) + + if paralineseg: + paralineseg_content = paralineseg[1] + paralineseg_context = paralineseg[2] + else: + # 배포용 문서의 더미 BodyText 에는 LineSeg 정보가 없음 + # (see https://github.com/mete0r/pyhwp/issues/33) + # 더미 LineSeg를 만들어 준다 + lineseg = dict(chpos=0, y=0, height=0, height2=0, height85=0, + space_below=0, x=0, width=0, a8=0, flags=0) + paralineseg_content = dict(linesegs=[lineseg]) + paralineseg_context = dict() + linesegs = ((lineseg['chpos'], lineseg) + for lineseg in paralineseg_content['linesegs']) + lined_shaped_chunks = line_segmented(shaped_chunks, + make_ranged_shapes(linesegs)) + for lineseg_content, shaped_chunks in lined_shaped_chunks: + lineseg = (LineSeg, lineseg_content, paralineseg_context) + chunk_events = range_shaped_textchunk_events(paratext_context, + shaped_chunks) + for x in wrap_modelevents(lineseg, chunk_events): + yield x + + +def range_shaped_textchunk_events(paratext_context, range_shaped_textchunks): + for (startpos, endpos), (shape, none), chunk in range_shaped_textchunks: + if isinstance(chunk, basestring): + textitem = (Text, + dict(text=chunk, charshape_id=shape), + paratext_context) + yield STARTEVENT, textitem + yield ENDEVENT, textitem + elif isinstance(chunk, dict): + code = chunk['code'] + uch = unichr(code) + name = ControlChar.get_name_by_code(code) + kind = ControlChar.kinds[uch] + chunk_attributes = dict(name=name, + code=code, + kind=kind, + charshape_id=shape) + if code in (0x9, 0xa, 0xd): # http://www.w3.org/TR/xml/#NT-Char + chunk_attributes['char'] = uch + ctrlch = (ControlChar, chunk_attributes, paratext_context) + yield STARTEVENT, ctrlch + yield ENDEVENT, ctrlch + + +def wrap_section(event_prefixed_mac, sect_id=None): + ''' wrap a section with SectionDef ''' + starting_buffer = list() + started = False + sectiondef = None + for event, item in event_prefixed_mac: + if started: + yield event, item + else: + model, attributes, context = item + if model is SectionDef and event is STARTEVENT: + sectiondef, sectdef_child = build_subtree(event_prefixed_mac) + if sect_id is not None: + attributes['section_id'] = sect_id + yield STARTEVENT, sectiondef + for k in tree_events_multi(sectdef_child): + yield k + for evented_item in starting_buffer: + yield evented_item + started = True + else: + starting_buffer.append((event, item)) + yield ENDEVENT, sectiondef + + +class ColumnSet: + pass + + +def wrap_columns(event_prefixed_mac): + + stack = [] + + for event, item in event_prefixed_mac: + model, attributes, context = item + + if model is Paragraph: + if event is STARTEVENT: + + split = attributes['split'] + split = Paragraph.SplitFlags(split) + + if split.new_columnsdef: + if stack[-1][0] is ColumnSet: + yield ENDEVENT, stack.pop() + + columns = (ColumnSet, {}, {}) + stack.append(columns) + yield STARTEVENT, columns + + else: + if event is STARTEVENT: + stack.append(item) + else: + if model != stack[-1][0]: + assert stack[-1][0] is ColumnSet + yield ENDEVENT, stack.pop() + stack.pop() + + yield event, item + + +def make_extended_controls_inline(event_prefixed_mac, stack=None): + ''' inline extended-controls into paragraph texts ''' + if stack is None: + stack = [] # stack of ancestor Paragraphs + for event, item in event_prefixed_mac: + model, attributes, context = item + if model is Paragraph: + for x in meci_paragraph(event, stack, item): + yield x + elif model is ControlChar: + for x in meci_controlchar(event, stack, item, attributes): + yield x + elif issubclass(model, Control) and event == STARTEVENT: + control_subtree = build_subtree(event_prefixed_mac) + paragraph = stack[-1] + paragraph_controls = paragraph.setdefault(Control, []) + paragraph_controls.append(control_subtree) + else: + yield event, item + + +def meci_paragraph(event, stack, item): + if event == STARTEVENT: + stack.append(dict()) + yield STARTEVENT, item + else: + yield ENDEVENT, item + stack.pop() + + +def meci_controlchar(event, stack, item, attributes): + if event is STARTEVENT: + if attributes['kind'] is ControlChar.EXTENDED: + paragraph = stack[-1] + paragraph_controls = paragraph.get(Control) + control_subtree = paragraph_controls.pop(0) + tev = tree_events(*control_subtree) + # to evade the Control/STARTEVENT trigger + # in parse_models_pass3() + yield next(tev) + + for k in make_extended_controls_inline(tev, stack): + yield k + else: + yield STARTEVENT, item + yield ENDEVENT, item + + +def make_paragraphs_children_of_listheader(event_prefixed_mac, + parentmodel=ListHeader, + childmodel=Paragraph): + ''' make paragraphs children of the listheader ''' + stack = [] + level = 0 + for event, item in event_prefixed_mac: + model, attributes, context = item + if event is STARTEVENT: + level += 1 + if len(stack) > 0 and ((event is STARTEVENT + and stack[-1][0] == level + and model is not childmodel) or + (event is ENDEVENT + and stack[-1][0] - 1 == level)): + lh_level, lh_item = stack.pop() + yield ENDEVENT, lh_item + + if issubclass(model, parentmodel): + if event is STARTEVENT: + stack.append((level, item)) + yield event, item + else: + pass + else: + yield event, item + + if event is ENDEVENT: + level -= 1 + + +def match_field_start_end(event_prefixed_mac): + stack = [] + for event, item in event_prefixed_mac: + (model, attributes, context) = item + if issubclass(model, Field): + for x in mfse_field(event, stack, item): + yield x + elif model is LineSeg: + for x in mfse_lineseg(event, stack, item): + yield x + elif model is ControlChar and attributes['name'] == 'FIELD_END': + for x in mfse_field_end(event, stack, item): + yield x + else: + yield event, item + + +def mfse_field(event, stack, item): + if event is STARTEVENT: + stack.append(item) + yield event, item + else: + pass + + +def mfse_lineseg(event, stack, item): + if event is ENDEVENT: + # fields still not closed; temporarily close them + for field_item in reversed(stack): + yield ENDEVENT, field_item + yield event, item + elif event is STARTEVENT: + yield event, item + # fields temporarily closed; open them again + for field_item in stack: + yield STARTEVENT, field_item + + +def mfse_field_end(event, stack, item): + if event is ENDEVENT: + if len(stack) > 0: + yield event, stack.pop() + else: + logger.warning('unmatched field end') + + +class TableRow: + pass + + +ROW_OPEN = 1 +ROW_CLOSE = 2 + + +def restructure_tablebody(event_prefixed_mac): + ''' Group table columns in each rows and wrap them with TableRow. ''' + stack = [] + for event, item in event_prefixed_mac: + (model, attributes, context) = item + if model is TableBody: + for x in rstbody_tablebody(event, stack, item, attributes, + context): + yield x + elif model is TableCell: + for x in rstbody_tablecell(event, stack, item): + yield x + else: + yield event, item + + +def rstbody_tablebody(event, stack, item, attributes, context): + if event is STARTEVENT: + rowcols = deque() + for cols in attributes.pop('rowcols'): + if cols == 1: + rowcols.append(ROW_OPEN | ROW_CLOSE) + else: + rowcols.append(ROW_OPEN) + for i in range(0, cols - 2): + rowcols.append(0) + rowcols.append(ROW_CLOSE) + stack.append((context, rowcols)) + yield event, item + else: + yield event, item + stack.pop() + + +def rstbody_tablecell(event, stack, item): + table_context, rowcols = stack[-1] + row_context = dict(table_context) + if event is STARTEVENT: + how = rowcols[0] + if how & ROW_OPEN: + yield STARTEVENT, (TableRow, dict(), row_context) + yield event, item + if event is ENDEVENT: + how = rowcols.popleft() + if how & ROW_CLOSE: + yield ENDEVENT, (TableRow, dict(), row_context) + + +def tokenize_text_by_lang(event_prefixed_mac): + ''' Group table columns in each rows and wrap them with TableRow. ''' + for event, item in event_prefixed_mac: + (model, attributes, context) = item + if model is Text: + if event is STARTEVENT: + charshape_id = attributes['charshape_id'] + for lang, text in tokenize_unicode_by_lang(attributes['text']): + token = (Text, { + 'charshape_id': charshape_id, + 'lang': lang, + 'text': text, + }, context) + yield STARTEVENT, token + yield ENDEVENT, token + else: + yield event, item + + +def embed_bindata(event_prefixed_mac, bindata): + for event, item in event_prefixed_mac: + (model, attributes, context) = item + if event is STARTEVENT and model is BinData: + if attributes['flags'].storage is BinData.StorageType.EMBEDDING: + name = ('BIN%04X' % attributes['bindata']['storage_id'] + + '.' + + attributes['bindata']['ext']) + bin_stream = bindata[name].open() + try: + binary = bin_stream.read() + finally: + bin_stream.close() + b64 = base64.b64encode(binary) + b64 = b64.decode('ascii') + truncated = [] + while b64: + if len(b64) > 64: + truncated.append(b64[:64]) + b64 = b64[64:] + else: + truncated.append(b64) + b64 = '' + b64 = '\n'.join(truncated) + b64 = '\n' + b64 + '\n' + attributes['bindata'][''] = b64 + attributes['bindata']['inline'] = 'true' + yield event, item + + +def prefix_binmodels_with_event(context, models): + level_prefixed = ((model['level'], + (model['type'], model['content'], context)) + for model in models) + return prefix_event(level_prefixed) + + +def wrap_modelevents(wrapper_model, modelevents): + yield STARTEVENT, wrapper_model + for mev in modelevents: + yield mev + yield ENDEVENT, wrapper_model + + +def modelevents_to_xmlevents(modelevents): + for event, (model, attributes, context) in modelevents: + try: + if event is STARTEVENT: + for x in startelement(context, (model, attributes)): + yield x + elif event is ENDEVENT: + yield ENDEVENT, model.__name__ + except: + logger.error('model: %s', pformat({ + 'event': event, + 'model': model, + 'attributes': attributes, + 'context': context + })) + raise + + +class XmlEvents(object): + + def __init__(self, events): + self.events = events + + def __iter__(self): + return modelevents_to_xmlevents(self.events) + + def bytechunks(self, xml_declaration=True, **kwargs): + encoding = kwargs.get('xml_encoding', 'utf-8') + if xml_declaration: + yield '\n'.format( + encoding + ).encode( + encoding + ) + bytechunks = xmlevents_to_bytechunks(self, encoding) + for chunk in bytechunks: + yield chunk + + def dump(self, outfile, **kwargs): + bytechunks = self.bytechunks(**kwargs) + for chunk in bytechunks: + outfile.write(chunk) + if hasattr(outfile, 'flush'): + outfile.flush() + + def open(self, **kwargs): + tmpfile = TemporaryFile() + try: + self.dump(tmpfile, **kwargs) + except: + tmpfile.close() + raise + + tmpfile.seek(0) + return tmpfile + + +class XmlEventsMixin(object): + + def xmlevents(self, **kwargs): + return XmlEvents(self.events(**kwargs)) + + +class ModelEventStream(binmodel.ModelStream, XmlEventsMixin): + + def modelevents(self, **kwargs): + models = self.models(**kwargs) + + # prepare modelevents context + kwargs.setdefault('version', self.version) + return prefix_binmodels_with_event(kwargs, models) + + def other_formats(self): + d = super(ModelEventStream, self).other_formats() + d['.xml'] = self.xmlevents().open + return d + + +class HwpSummaryInfo(filestructure.HwpSummaryInfo, XmlEventsMixin): + + def events(self, **context): + generator = PropertySetStreamModelEventsGenerator(context) + events = generator.generateModelEvents(self.propertySetStream) + element = HwpSummaryInfo, {}, context + return wrap_modelevents(element, events) + + +class PropertySetStreamModelEventsGenerator(object): + + def __init__(self, context): + self.context = context + + def generateModelEvents(self, stream): + return self.getPropertySetStreamEvents(stream) + + def getPropertySetStreamEvents(self, stream): + from .msoleprops import PropertySetStream + sectionEvents = [ + self.getPropertySetEvents(propertyset) + for propertyset in stream.propertysets + ] + events = chain(*sectionEvents) + + content = dict( + byte_order='{:04x}'.format( + stream.byteOrder, + ), + version=str(stream.version), + system_identifier='{:08x}'.format( + stream.systemIdentifier, + ), + clsid=str(stream.clsid) + ) + element = PropertySetStream, content, self.context + return wrap_modelevents(element, events) + + def getPropertySetEvents(self, propertyset): + from .msoleprops import PropertySet + propertyEvents = [ + self.getPropertyEvents(property) + for property in sorted( + propertyset.properties, + key=lambda property: property.desc.offset + ) + ] + events = chain(*propertyEvents) + + content = dict( + fmtid=propertyset.fmtid, + offset=propertyset.desc.offset, + ) + element = PropertySet, content, self.context + return wrap_modelevents(element, events) + + def getPropertyEvents(self, property): + from .msoleprops import PID_DICTIONARY + from .msoleprops import Property + content = dict( + id=property.desc.id, + offset=property.desc.offset, + ) + if property.idLabel is not None: + content['id_label'] = property.idLabel + if property.type is not None: + content['type'] = str(property.type.vt_type.__name__) + content['type_code'] = '0x{:04x}'.format(property.type.code) + if property.id == PID_DICTIONARY.id: + events = self.getDictionaryEvents(property.value) + else: + events = () + content['value'] = property.value + element = Property, content, self.context + return wrap_modelevents(element, events) + + def getDictionaryEvents(self, dictionary): + events = list(self.getDictionaryEntryEvents(entry) + for entry in dictionary.entries) + return chain(*events) + + def getDictionaryEntryEvents(self, entry): + from .msoleprops import DictionaryEntry + content = dict( + id=entry.id, + name=entry.name, + ) + element = DictionaryEntry, content, self.context + return wrap_modelevents(element, ()) + + +class DocInfo(ModelEventStream): + + def events(self, **kwargs): + docinfo = DocInfo, dict(), dict() + events = self.modelevents(**kwargs) + if 'embedbin' in kwargs: + events = embed_bindata(events, kwargs['embedbin']) + events = wrap_modelevents(docinfo, events) + return events + + +class Section(ModelEventStream): + + def events(self, **kwargs): + events = self.modelevents(**kwargs) + + events = make_texts_linesegmented_and_charshaped(events) + events = make_extended_controls_inline(events) + events = match_field_start_end(events) + events = make_paragraphs_children_of_listheader(events) + events = make_paragraphs_children_of_listheader(events, TableBody, + TableCell) + events = restructure_tablebody(events) + events = tokenize_text_by_lang(events) + + section_idx = kwargs.get('section_idx') + events = wrap_section(events, section_idx) + events = wrap_columns(events) + + return events + + +class Sections(binmodel.Sections, XmlEventsMixin): + + section_class = Section + + def events(self, **kwargs): + bodytext_events = [] + for idx in self.section_indexes(): + kwargs['section_idx'] = idx + section = self.section(idx) + events = section.events(**kwargs) + bodytext_events.append(events) + + class BodyText(object): + pass + bodytext_events = chain(*bodytext_events) + bodytext = BodyText, dict(), dict() + return wrap_modelevents(bodytext, bodytext_events) + + def other_formats(self): + d = super(Sections, self).other_formats() + d['.xml'] = self.xmlevents().open + return d + + +class HwpDoc(Struct): + + def attributes(): + yield VERSION, 'version' + attributes = staticmethod(attributes) + + +class Hwp5File(binmodel.Hwp5File, XmlEventsMixin): + + summaryinfo_class = HwpSummaryInfo + docinfo_class = DocInfo + bodytext_class = Sections + + def events(self, **kwargs): + if 'embedbin' in kwargs and kwargs['embedbin'] and 'BinData' in self: + kwargs['embedbin'] = self['BinData'] + else: + kwargs.pop('embedbin', None) + + events = chain(self.summaryinfo.events(**kwargs), + self.docinfo.events(**kwargs), + self.text.events(**kwargs)) + + hwpdoc = HwpDoc, dict(version=self.header.version), dict() + events = wrap_modelevents(hwpdoc, events) + + # for easy references in styles + events = give_elements_unique_id(events) + + return events diff --git a/src/hwp5/xsl/binspec2html.xsl b/src/hwp5/xsl/binspec2html.xsl new file mode 100644 index 0000000000000000000000000000000000000000..6b599426826872dd65970f9b6264daf5629c82ea --- /dev/null +++ b/src/hwp5/xsl/binspec2html.xsl @@ -0,0 +1,453 @@ + + + + + + + + Content-Type + text/xhtml; charset=utf-8 + + + + text/javascript + + $(document).ready(function(){ + $('a.toggle-definition').parent().parent().siblings().css('display', 'none'); + $('a.toggle-definition').click(function(){ + $(this).parent().parent().siblings().toggle(); + }); + }); + + + + text/css + + table.StructType { + width: 100%; + } + table.simple { + border: 1px solid black; + border-collapse: collapse; + } + table.simple th , + table.simple td { + border: 1px solid black; + padding: 1em; + } + thead tr.StructType-name th { + background-color: #ccc; + } + tr.extension-header th { + background-color: #ddd; + } + tr.extension-header th .condition { + font-weight: normal; + } + tr.extends-header th { + background-color: #eee; + color: #777; + font-weight: normal; + } + table.SelectiveType, + table.SelectiveType th, + table.SelectiveType td { + border: 0; + padding: 0; + } + table.EnumType { + border: 0; + width: 100%; + } + table.EnumType tr.name th { + border: 0; + } + table.FlagsType { + border: 0; + width: 100%; + } + table.FlagsType tr.name th { + border: 0; + } + a.toggle-definition { + text-decoration: underline; + cursor: pointer; + color: blue; + } + + + + + + hwp5spec + + + + + Version: + + + + + + + Records + + + Structs + + Primitives + + simple + + + name + + + size + + + binfmt + + + + + + + + + + toc simple + + + + + + + + + # + + + + + + + + + + + + + + + + + StructType simple + + + + + + + + + + + + + + + + + + + + + + + + + + + StructType simple + + + + + + + + + + + StructType-name + + 4 + + + + + name + type + condition + version + + + + + + + + + + + + + + + + + + + + + + extends-header + + 4 + (see + + + + members) + + + + + + + extension-header + + 4 + Extension: + + + + + condition + (if ) + + + + + + + + member + + + + + + + + + + + + + + + + + + + + + + # + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + # + + + + + + + + # + + + + + + + FlagsType simple + + name + + 3 + + + + + + + + bits + name + type + + + + + + + + + + ~ + + + + + + + + + + + + + + + + ARRAY(, ) + + + + ARRAY(, ) + + + + N_ARRAY(, + ) + + + + if + + is: + + SelectiveType + + + ; then + + + + + + + + + EnumType simple + + name + + 2 + Enum + + + + . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + toggle-definition + + + + diff --git a/src/hwp5/xsl/hwp5css-common.xsl b/src/hwp5/xsl/hwp5css-common.xsl new file mode 100644 index 0000000000000000000000000000000000000000..c8943ade8513ef826cf6b33648e7182b70907e20 --- /dev/null +++ b/src/hwp5/xsl/hwp5css-common.xsl @@ -0,0 +1,1008 @@ + + + + + + + + + + + + + + /* Styles */ + + /* Paragraph attributes */ + + /* Text attributes */ + + + + + + + + . + + + + + > + span + + + + + + + + /* + @parashape-id = + + */ + + + + + + + + + + + + /* + @charshape-id = + + */ + + + + + + + + + + + + p.parashape- + + + + + > + span + + + + + + + + + + + + + + + + + + + margin + + + + + + + + + + + + + + + + + + + + + + + + min-height + + + em + + + + + + + + + + + line-height + + + + + + + + + + + + . + Bullet- + + ::before + + + + content + + " + + " + + + + display + inline-block + + + text-align + + + + + + width + 1em + + + margin-right + + + em + + + + + + width + + + + + + + + margin-right + + + + + + + + + + + + + + + + + + + Bullet- + + + + + + + + span.charshape- + + + + + + + + + + + + + + + + + + + + + + + ko + + + + + + en + + + + + + cn + + + + + + jp + + + + + + other + + + + + + symbol + + + + + + user + + + + + + + + color + + + + + + font-style + italic + + + + + + font-weight + bold + + + + + + + + + + + + + + + + .lang- + + + + + font-family + + + + + + + font-size + + + + + + + + + + + + " + + " + + + + + , serif + , sans-serif + , serif + , sans-serif + + + + + + + + + + , monospace + , sans-serif + , serif + + , cursive + + , fantasy + + + + + + + + + .borderfill- + + + + + border-top + + + + + + + border-right + + + + + + + border-bottom + + + + + + + border-left + + + + + + + + + + + + + + + + + + + + + + + 1px + 1px + 1px + 1px + 1px + 2px + 2px + + + + + + + + + none + solid + dashed + dotted + dashed + dashed + dahsed + dotted + double + double + double + double + solid + double + inset + outset + groove + ridge + solid + + + + + + background-color + + + + + + + + + + + + + background-image + + url( +  + ) + /* + + */ + + + + + + background-image + + url( +  + ) + + + + + + background-image + + url( +  + ) + /* + + */ + + + + + + background-image + + url( +  + ) + /* + + */ + + + + + + background-image + + url( +  + ) + /* + + */ + + + + + + background-image + + url( +  + ) + /* + + */ + + + + + /* + unrecognized @pattern-type: + + */ + + + + + + + background-image + + linear-gradient + ( + + deg + + , + + + ) + + + + + background-image + + -webkit-linear-gradient + ( + + deg + + , + + + ) + + + + + background-image + + -moz-linear-gradient + ( + + deg + + , + + + ) + + + + + + + + + + + background-image + + url( + + ) + + + + + background-size + + 100% 100% + + + + + /* + unsupported @fillimage-type: + + */ + + + + + + + + + + bindata/ + + . + + + + + + .Section- + + + + + + + + + + + + + + .HeaderPageFooter + + + + + + + + + + + .Page + + + + + + + + + + position + relative + + + + margin-top + + + + + + + + + margin-right + + + + + + + + + margin-bottom + + + + + + + + + margin-left + + + + + + + + + + + padding-top + + + + + + + + + padding-bottom + + + + + + + + + + + + + + + + + + + + + + width + + + + + + + + + + + width + + + + + + + + + + + .HeaderArea + + + position + absolute + + + left + 0 + + + top + 0 + + + width + + + + + + + + height + + + + + + + + + + + + + .FooterArea + + + position + absolute + + + left + 0 + + + bottom + 0 + + + width + + + + + + + + height + + + + + + + + + + + + + text-align + + + center + left + right + justify + justify + + + + + + + + text-indent + + + + + + + + + + + padding-left + + + + + + + + + + + + + + text-decoration + underline + + + + + + + text-decoration + overline + + + + + + + text-decoration + line-through + + + + + + + + + + text-decoration-color + + + + + + + -moz-text-decoration-color + + + + + + + -webkit-text-decoration-color + + + + + + + + + text-decoration-style + + + + + + + -moz-text-decoration-style + + + + + + + -webkit-text-decoration-style + + + + + + + + + solid + dashed + dotted + dashed + dashed + dashed + dotted + double + double + double + double + + + + + + + mm + + + + + + pt + + + + + + + { + + } + + + + + + + + + : + + ; + + diff --git a/src/hwp5/xsl/hwp5css.xsl b/src/hwp5/xsl/hwp5css.xsl new file mode 100644 index 0000000000000000000000000000000000000000..f990c03e4bdccc55371ee40669f5676cc9046884 --- /dev/null +++ b/src/hwp5/xsl/hwp5css.xsl @@ -0,0 +1,60 @@ + + + + + + + body + + + background-color + #eee + + + padding + 4px + + + margin + 0 + + + + + .Paper + + + background-color + #fff + + + border + 1px solid black + + + margin + 1em auto + + + + + .Paper:first-child + + + margin-top + 0 + + + + + .Paper:last-child + + + margin-bottom + 0 + + + + + + diff --git a/src/hwp5/xsl/hwp5fodt.xsl b/src/hwp5/xsl/hwp5fodt.xsl new file mode 100644 index 0000000000000000000000000000000000000000..09ed4c3c191f63a68a93fe237e0a71de7ea898d6 --- /dev/null +++ b/src/hwp5/xsl/hwp5fodt.xsl @@ -0,0 +1,329 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/xsl/hwp5html.xsl b/src/hwp5/xsl/hwp5html.xsl new file mode 100644 index 0000000000000000000000000000000000000000..1aa1344719aa9873a3337da2a5c99c54a7230e93 --- /dev/null +++ b/src/hwp5/xsl/hwp5html.xsl @@ -0,0 +1,695 @@ + + + + + + + + + + + + + + + + content-type + text/html; charset=utf-8 + + + + + text/css + + + + + + + + + + + + + + + + + + + + + + + + stylesheet + styles.css + text/css + + + + + + + + text/css + + + + + + + + + + + + + + Section + + Section- + + + Paper + + + HeaderPageFooter + + + Page + + + + + + + + + + HeaderArea + + + + + + + FooterArea + + + + + + + + + + + + + + + + + + + + + parashape- + + + + + + + + + + + + + + + + + + + + + + + lang- + + + + + + charshape- + + + + + + + + + + + + autonumbering + + autonumbering- + + + + + + + + + + + + TableControl + + + + + + borderfill- + + + + + + + + + + + + + + TableControl + + borderfill- + + + + + + + + + + + + + + + border-collapse + collapse + + + + + + TableCaption + + + + + caption-side + + + + margin-bottom + + + + + + + + width + + + + + + + + + + caption-side + + + + margin-top + + + + + + + + width + + + + + + + + + /* + not supported @position: + + */ + + + + + + + + + + + + + + + + borderfill- + + + width + + + + + + + + height + + + + + + + + padding + + + + + + + + + + + + + + + + + + + + + + + + + + + + GShapeObjectControl + + + + + + + + + + + GShapeObjectControl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + + + + + + + data:;base64, + + + + + + + width + + + + + + + + + + + height + + + + + + + + + + + display + inline-block + + + + + + width + + + + + + + + + + + + + + + + /* + hrelto: + + halign: + + */ + + + + + + + + margin-left + + + + + + + + + + + margin-left + + + + + + + + + + + + margin-left + + + + + + + + + + + + + + + + + margin-left + + + + + + + + + + + margin-left + + + + + + + + + + + + margin-left + + + + + + + + + + + + + + + + + margin-left + + + + + + + + + + + margin-left + + + + + + + + + + + + margin-left + + + + + + + + + + + + + + + + + margin-left + + + + + + + + + + + margin-left + + + + + + + + + + + + margin-left + + + + + + + + + + + + + diff --git a/src/hwp5/xsl/odt/common.xsl b/src/hwp5/xsl/odt/common.xsl new file mode 100644 index 0000000000000000000000000000000000000000..2e4fe0cb45386bf9570e296b84d9c5d8e276ec51 --- /dev/null +++ b/src/hwp5/xsl/odt/common.xsl @@ -0,0 +1,1630 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MasterPage- + PageLayout- + + + + + + + + + '' + + + + + + roman + swiss + + + + variable + + + + + + + + + + + + + + + + + + + + + paragraph + text + + + + + + + + + + + + + + + !"#$%&'()*+,/:;<=>?@[\]^`{|}~ + ______________________________ + + + + + + + PageLayout- + + + + + + + + + + + + + cm + cm + + + cm + cm + + + cm + cm + cm + cm + + + + + + + + + + + + + + + + + + justify + left + right + center + justify + justify + justify + + + + + pt + + + pt + pt + + + pt + pt + pt + + + + + % + + + + pt + top + + + + pt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pt + + + + + + + + pt + + + + + + + + pt + + + + + + italic + + + + + + italic + + + + + + italic + + + + + + bold + + + + + + bold + + + + + + bold + + + + + + solid + dash + dotted + dot-dash + dot-dot-dash + long-dash + dotted + solid + solid + solid + solid + + + + + + single + single + single + single + single + single + single + double + double + double + double + + + + + + + + + auto + + + + + + none + + + + + + + + + + auto + + + + + + none + + + + + + + + + + auto + + + + + + none + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + p + + - + + - + + + + + + + + + + + + + + + + + + text + + + + + + + + + + + + + + + + + + paragraph + text + + Paragraph- + + + + + + + + + + + + + + + + + + + page + + + + + + + + + + + + + MasterPage- + + + + + + + + + + + + + + + + + + + Paragraph- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + p-- + + + + + + + + + + + Table- + + + + + + + + + + Table--- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Table--- + table-cell + + + + + + + + + mm + mm + mm + mm + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Table- + table + + + + + + mm + + margins + + + mm + mm + + + + + + + mm + mm + + + + + collapsing + + + separating + + + + + + + + + + mm + mm + + mm + mm + + + + + + + + parallel + + + left + + + right + + + biggest + + + + + none + + + run-through + + + + run-through + + + + + + + page-content + + + page + + + paragraph + + + paragraph + + + + + + + + page-content + + + page + + + + page-content + + + paragraph + + + + + + + + + from-left + + + + from-inside + + + + center + + + + from-left + + + + outside + + + + outside + + + + right + + + + right + + + + + + + + + from-top + + + + middle + + + + middle + + + + bottom + + + + bottom + + + + + top + + + + + + baseline + top + + + + + + + + + + + + solid + + + + + none + + + + + + + + + none + + + solid + + + dash + + + dash + + + solid + + + + + + + mm + + + + + + + Shape- + graphic + + + + + + + + + + + + + + + + + + + translate ( + mm + mm) + + + + + + + + + + + + + + translate ( + mm + mm) + + + + + + + matrix ( + + + + + mm + mm) + + + + + + + + + + + + + + Shape- + + + + + + + + + + + + + + + + Shape- + + + + + + + + + + + + + + + + + + + + + + + + + + + onLoad + embed + simple + . + + + + + + + + + + + + + mm + mm + + + + + + + as-char + + + paragraph + + + + + + + + + + + + pt + pt + + + + + + + mm + + + + + + mm + + + + + + + + + + + Shape- + mm + mm + + + mm + mm + + + + + + + + + + + + + + + + + + + + + + + + mm + mm + mm + mm + Shape- + + + + + + + + + + + + + + + + + + - + + + + + + + + footnote + + + + endnote + + + + + + + + + + + + + + + diff --git a/src/hwp5/xsl/odt/content.xsl b/src/hwp5/xsl/odt/content.xsl new file mode 100644 index 0000000000000000000000000000000000000000..a38c883b8ec45b090de382ca0dd9f2c7f050aa34 --- /dev/null +++ b/src/hwp5/xsl/odt/content.xsl @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/xsl/odt/document.xsl b/src/hwp5/xsl/odt/document.xsl new file mode 100644 index 0000000000000000000000000000000000000000..955167ebd17afeb9a921feba29995cba4a0c0c54 --- /dev/null +++ b/src/hwp5/xsl/odt/document.xsl @@ -0,0 +1,44 @@ + + + + + + + + + + diff --git a/src/hwp5/xsl/odt/styles.xsl b/src/hwp5/xsl/odt/styles.xsl new file mode 100644 index 0000000000000000000000000000000000000000..100e08bddb82b760e767bf43a045fbc5ae4e6dec --- /dev/null +++ b/src/hwp5/xsl/odt/styles.xsl @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/src/hwp5/xsl/plaintext.xsl b/src/hwp5/xsl/plaintext.xsl new file mode 100644 index 0000000000000000000000000000000000000000..d1c997d076cf1c96068bebdad332d4d701c2752e --- /dev/null +++ b/src/hwp5/xsl/plaintext.xsl @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + <표> + + <그림> + + diff --git a/src/hwp5/zlib_raw_codec.py b/src/hwp5/zlib_raw_codec.py new file mode 100644 index 0000000000000000000000000000000000000000..a34b19fa768d893c934ad8f2bb8c165448cc76dc --- /dev/null +++ b/src/hwp5/zlib_raw_codec.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +# +# pyhwp : hwp file format parser in python +# Copyright (C) 2010-2023 mete0r +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals +import codecs +import zlib # this codec needs the optional zlib module ! + +_wbits = -15 + + +def zlib_raw_encode(input, errors='strict'): + assert errors == 'strict' + output = zlib.compress(input)[2:-4] + return (output, len(input)) + + +def zlib_raw_decode(input, errors='strict'): + assert errors == 'strict' + output = zlib.decompress(input, _wbits) + return (output, len(input)) + + +class Codec(codecs.Codec): + + def encode(self, input, errors='strict'): + return zlib_raw_encode(input, errors) + + def decode(self, input, errors='strict'): + return zlib_raw_decode(input, errors) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + assert errors == 'strict' + self.errors = errors + self.compressobj = zlib.compressobj() + self.initial = True + + def encode(self, input, final=False): + c = self.compressobj.compress(input) + if self.initial: + c = c[2:] + self.initial = False + if final: + c += self.compressobj.flush()[:-4] + return c + + def reset(self): + self.compressobj = zlib.compressobj() + + +class IncrementalDecoder(codecs.IncrementalDecoder): + def __init__(self, errors='strict'): + assert errors == 'strict' + self.errors = errors + self.decompressobj = zlib.decompressobj(_wbits) + + def decode(self, input, final=False): + if final: + if len(input) > 0: + d = self.decompressobj.decompress(input) + else: + d = b'' + return d + self.decompressobj.flush() + else: + return self.decompressobj.decompress(input) + + def reset(self): + self.decompressobj = zlib.decompressobj(_wbits) + + +class StreamWriter(object): + def __init__(self, stream, errors='strict'): + assert errors == 'strict' + self.stream = stream + self.encoder = IncrementalEncoder(errors) + + def write(self, data): + raise NotImplementedError + + +class StreamReader(object): + def __init__(self, stream, errors='strict'): + assert errors == 'strict' + self.stream = stream + self.decoder = IncrementalDecoder(errors) + self.buffer = b'' + self.offset = 0 + + def read(self, size=-1): + if size < 0: + c = self.stream.read() + d = self.buffer + self.decoder.decode(c, True) + self.buffer = b'' + self.offset += len(d) + return d + + final = False + while True: + if size <= len(self.buffer): + d = self.buffer[:size] + self.buffer = self.buffer[size:] + self.offset += size + return d + + if final: + d = self.buffer + self.buffer = b'' + self.offset += len(d) + return d + + c = self.stream.read(8196) + final = len(c) < 8196 or len(c) + self.buffer += self.decoder.decode(c, final) + + def tell(self): + return self.offset + + +_codecinfo = codecs.CodecInfo( + name='zlib_raw', + encode=zlib_raw_encode, + decode=zlib_raw_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, +) diff --git a/src/pyhwp/__init__.py b/src/pyhwp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/pyhwp/html_converter.py b/src/pyhwp/html_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..04b8aef6318533309055cc5c24808417a1bf2907 --- /dev/null +++ b/src/pyhwp/html_converter.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +""" +HWP to HTML Converter module. +""" +import os + +from hwp5.hwp5html import HTMLTransform +from hwp5.xmlmodel import Hwp5File +from contextlib import closing + +class HwpToHtmlConverter: + def __init__(self, hwp_file): + self.hwp_file = hwp_file + + def convert(self, output_path): + """ + Convert the HWP file to HTML. + :param output_path: Path to save the generated HTML file. + """ + if not os.path.exists(self.hwp_file): + raise FileNotFoundError(f"HWP file not found: {self.hwp_file}") + + # Ensure output directory exists + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Use HTMLTransform from hwp5 package + # The existing transform_hwp5_to_xhtml takes a file path or file object + # but transform_hwp5_to_xhtml in HTMLTransform returns a transform function + # We need to instantiate HTMLTransform and use its methods correctly. + + # Looking at hwp5html.py: + # transform = html_transform.transform_hwp5_to_xhtml + # transform(hwp5file, dest) + + transformer = HTMLTransform() + output_dir = os.path.dirname(os.path.abspath(output_path)) + + with closing(Hwp5File(self.hwp_file)) as hwp5file: + with transformer.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: + # 1. Generage HTML + with open(output_path, 'wb') as f: + transformer.transform_xhwp5_to_xhtml(xhwp5path, f) + + # 2. Generate CSS + # The XSLT usually expects styles.css + css_path = os.path.join(output_dir, 'styles.css') + with open(css_path, 'wb') as f: + transformer.transform_xhwp5_to_css(xhwp5path, f) + + # 3. Extract BinData + bindata_dir = os.path.join(output_dir, 'bindata') + transformer.extract_bindata_dir(hwp5file, bindata_dir) + + +def main(): + import argparse + import sys + + parser = argparse.ArgumentParser(description='Convert HWP file to HTML with CSS and images.') + parser.add_argument('input', help='Input HWP file') + parser.add_argument('--output', '-o', help='Output HTML file path (default: input_filename.html)') + + args = parser.parse_args() + + input_file = args.input + if args.output: + output_file = args.output + else: + base_name = os.path.splitext(input_file)[0] + output_file = base_name + '.html' + + try: + converter = HwpToHtmlConverter(input_file) + print(f"Converting '{input_file}' to '{output_file}'...") + converter.convert(output_file) + print("Conversion successful!") + print(f"Generated files:") + print(f" - HTML: {output_file}") + print(f" - CSS: {os.path.join(os.path.dirname(os.path.abspath(output_file)), 'styles.css')}") + print(f" - Data: {os.path.join(os.path.dirname(os.path.abspath(output_file)), 'bindata')}") + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()