seawolf2357 commited on
Commit
38bc84f
·
verified ·
1 Parent(s): 22cbf6d

Add pyhwp_uno

Browse files
pyhwp_uno/hwp5_uno/__init__.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # pyhwp : hwp file format parser in python
4
+ # Copyright (C) 2010,2011,2012 https://github.com/mete0r
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU Affero General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU Affero General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Affero General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ #
19
+
20
+ import uno
21
+ import unohelper
22
+ from unokit.services import css
23
+ from unokit.adapters import InputStreamFromFileLike
24
+ from unokit.adapters import OutputStreamToFileLike
25
+ from unokit.adapters import FileFromStream
26
+ from com.sun.star.io import XStreamListener
27
+ import logging
28
+
29
+
30
+ logger = logging.getLogger('hwp5.uno')
31
+
32
+
33
+ class OleStorageAdapter(object):
34
+
35
+ def __init__(self, oless):
36
+ ''' an OLESimpleStorage to hwp5 storage adapter.
37
+
38
+ :param oless: an instance of OLESimpleStorage
39
+ '''
40
+ self.oless = oless
41
+
42
+ def __iter__(self):
43
+ return iter(self.oless.getElementNames())
44
+
45
+ def __getitem__(self, name):
46
+ from com.sun.star.container import NoSuchElementException
47
+ try:
48
+ elem = self.oless.getByName(name)
49
+ except NoSuchElementException:
50
+ raise KeyError(name)
51
+ services = elem.SupportedServiceNames
52
+ if 'com.sun.star.embed.OLESimpleStorage' in services:
53
+ return OleStorageAdapter(elem)
54
+ else:
55
+ elem.closeInput()
56
+ return OleStorageStream(self.oless, name)
57
+
58
+
59
+ class OleStorageStream(object):
60
+
61
+ def __init__(self, oless, name):
62
+ self.oless = oless
63
+ self.name = name
64
+
65
+ def open(self):
66
+ stream = self.oless.getByName(self.name)
67
+ return FileFromStream(stream)
68
+
69
+
70
+ def HwpFileFromInputStream(inputstream):
71
+ ''' Hwp5File from com.sun.star.io.InputStream '''
72
+ olestorage = css.embed.OLESimpleStorage(inputstream)
73
+ adapter = OleStorageAdapter(olestorage)
74
+ from hwp5.xmlmodel import Hwp5File
75
+ return Hwp5File(adapter)
76
+
77
+
78
+ def StorageFromInputStream(inputstream):
79
+ factory = css.embed.StorageFactory()
80
+ return factory.createInstanceWithArguments((inputstream, 1)) # com.sun.star.embed.ElementModes.READ
81
+
82
+
83
+ def XSLTTransformer(stylesheet_url, source_url, source_url_base):
84
+ from com.sun.star.beans import NamedValue
85
+ args = (NamedValue('StylesheetURL', stylesheet_url),
86
+ NamedValue('SourceURL', source_url),
87
+ NamedValue('SourceBaseURL', source_url_base))
88
+ #return css.comp.documentconversion.LibXSLTTransformer(*args)
89
+ return css.comp.JAXTHelper(*args)
90
+
91
+
92
+ def haveXSLTTransformer():
93
+ transformer = XSLTTransformer('', '', '')
94
+ return transformer is not None
95
+
96
+
97
+ class OneshotEvent(object):
98
+
99
+ def __init__(self):
100
+ import os
101
+ pin, pout = os.pipe()
102
+ self.pin = os.fdopen(pin, 'r')
103
+ self.pout = os.fdopen(pout, 'w')
104
+
105
+ def wait(self):
106
+ self.pin.read()
107
+ self.pin.close()
108
+
109
+ def signal(self):
110
+ self.pout.close()
111
+
112
+
113
+ class XSLTListener(unohelper.Base, XStreamListener):
114
+ def __init__(self):
115
+ self.event = OneshotEvent()
116
+
117
+ def started(self):
118
+ logger.info('XSLT started')
119
+
120
+ def closed(self):
121
+ logger.info('XSLT closed')
122
+ self.event.signal()
123
+
124
+ def terminated(self):
125
+ logger.info('XSLT terminated')
126
+ self.event.signal()
127
+
128
+ def error(self, exception):
129
+ logger.error('XSLT error: %s', exception)
130
+ self.event.signal()
131
+
132
+ def disposing(self, source):
133
+ logger.info('XSLT disposing: %s', source)
134
+ self.event.signal()
135
+
136
+
137
+ def xslt_with_libreoffice(xsl_path, inp_path, out_path):
138
+ import os.path
139
+ xsl_path = os.path.abspath(xsl_path)
140
+ xsl_name = os.path.basename(xsl_path)
141
+ xsl_url = uno.systemPathToFileUrl(xsl_path)
142
+
143
+ inp_path = os.path.abspath(inp_path)
144
+ inp_file = file(inp_path)
145
+ inp_strm = InputStreamFromFileLike(inp_file, dontclose=True)
146
+
147
+ out_path = os.path.abspath(out_path)
148
+ out_file = file(out_path, 'w')
149
+ out_strm = OutputStreamToFileLike(out_file, dontclose=True)
150
+
151
+ transformer = XSLTTransformer(xsl_url, '', '')
152
+ transformer.InputStream = inp_strm
153
+ transformer.OutputStream = out_strm
154
+
155
+ listener = XSLTListener()
156
+ transformer.addListener(listener)
157
+
158
+ transformer.start()
159
+ logger.info('xslt.soffice(%s) start', xsl_name)
160
+ try:
161
+ listener.event.wait()
162
+ finally:
163
+ logger.info('xslt.soffice(%s) end', xsl_name)
164
+
165
+ transformer.removeListener(listener)
166
+ return dict()
167
+
168
+
169
+ def load_hwp5file_into_doc(hwp5file, doc, statusindicator=None):
170
+ odtpkg = convert_hwp5file_into_odtpkg(hwp5file)
171
+ logger.debug('hwp to odtpkg completed')
172
+
173
+ load_odt_from_storage(doc, odtpkg, statusindicator)
174
+
175
+
176
+ def convert_hwp5file_into_odtpkg(hwp5file):
177
+ from tempfile import TemporaryFile
178
+ tmpfile = TemporaryFile()
179
+ import os
180
+ tmpfile2 = os.fdopen( os.dup(tmpfile.fileno()), 'r')
181
+
182
+ from zipfile import ZipFile
183
+ zf = ZipFile(tmpfile, 'w')
184
+ from hwp5.hwp5odt import ODTPackage
185
+ odtpkg = ODTPackage(zf)
186
+ try:
187
+ from hwp5.hwp5odt import Converter
188
+ import hwp5.plat
189
+
190
+ if haveXSLTTransformer():
191
+ xslt = xslt_with_libreoffice
192
+ else:
193
+ # we use default xslt
194
+ xslt = hwp5.plat.get_xslt()
195
+
196
+ # convert without RelaxNG validation
197
+ convert = Converter(xslt)
198
+
199
+ # Embed images: see #32 - https://github.com/mete0r/pyhwp/issues/32
200
+ convert(hwp5file, odtpkg, embedimage=True)
201
+ finally:
202
+ odtpkg.close()
203
+
204
+ tmpfile2.seek(0)
205
+ odtpkg_stream = InputStreamFromFileLike(tmpfile2)
206
+ odtpkg_storage = StorageFromInputStream(odtpkg_stream)
207
+ return odtpkg_storage
208
+
209
+
210
+ def load_odt_from_storage(doc, storage, statusindicator=None):
211
+
212
+ infoset = css.beans.PropertySet()
213
+ uri = css.rdf.URI('/')
214
+
215
+ # re-initialize the document metadata, loads the stream named
216
+ # 'manifest.rdf' from the storage, and then loads all metadata streams
217
+ # mentioned in the manifest.
218
+ # (>= OOo 3.2)
219
+ doc.loadMetadataFromStorage(storage, uri, None)
220
+ # SfxBaseModel::loadMetadataFromStorage
221
+ # -> sfx2::DocumentMetadataAccess::loadMetadataFromStorage
222
+ # ---> initLoading
223
+ # ---> collectFilesFromStorage
224
+
225
+ Writer = css.comp.Writer
226
+
227
+ meta_xml = InputSourceFromStorage(storage, 'meta.xml')
228
+ if meta_xml:
229
+ meta_importer = Writer.XMLOasisMetaImporter(infoset, statusindicator)
230
+ let_document_import_xml(doc, meta_importer, meta_xml)
231
+
232
+ # svx/inc/svx/xmlgrhlp.hxx
233
+ # svx/source/xml/xmlgrhlp.cxx
234
+ # svx/util/svx.component
235
+ # new SvXMLGraphicImportExportHelper
236
+ graphicresolver = css.comp.Svx.GraphicImportHelper(storage)
237
+ objectresolver = None
238
+ lateinitsettings = None
239
+ filterargs = (infoset, statusindicator, graphicresolver, objectresolver, lateinitsettings)
240
+
241
+ styles_xml = InputSourceFromStorage(storage, 'styles.xml')
242
+ if styles_xml:
243
+ styles_importer = Writer.XMLOasisStylesImporter(*filterargs)
244
+ let_document_import_xml(doc, styles_importer, styles_xml)
245
+
246
+ content_xml = InputSourceFromStorage(storage, 'content.xml')
247
+ if content_xml:
248
+ content_importer = Writer.XMLOasisContentImporter(*filterargs)
249
+ let_document_import_xml(doc, content_importer, content_xml)
250
+
251
+
252
+ def InputSourceFromStorage(storage, streamname):
253
+ if storage.hasByName(streamname):
254
+ stream = storage.openStreamElement(streamname, 1) # READ
255
+ return InputSourceFromStream(stream)
256
+
257
+
258
+ def InputSourceFromStream(stream):
259
+ from com.sun.star.xml.sax import InputSource
260
+ inputsource = InputSource()
261
+ inputsource.sSystemId = ''
262
+ inputsource.aInputStream = stream
263
+ inputsource.sEncoding = 'utf-8'
264
+ return inputsource
265
+
266
+
267
+ def let_document_import_xml(doc, filter, inputsource):
268
+ filter.setTargetDocument(doc)
269
+ parser = css.xml.sax.Parser()
270
+ parser.setDocumentHandler(filter)
271
+ parser.parseStream(inputsource)
272
+
273
+
274
+ def inputstream_is_hwp5file(inputstream):
275
+ try:
276
+ olestorage = css.embed.OLESimpleStorage(inputstream)
277
+ adapter = OleStorageAdapter(olestorage)
278
+
279
+ from hwp5.filestructure import storage_is_hwp5file
280
+ return storage_is_hwp5file(adapter)
281
+ except Exception as e:
282
+ logger.exception(e)
283
+ return False
284
+
285
+
286
+ def typedetect(inputstream):
287
+ if inputstream_is_hwp5file(inputstream):
288
+ return 'hwp5'
289
+ return ''
pyhwp_uno/hwp5_uno/tests/__init__.py ADDED
File without changes
pyhwp_uno/hwp5_uno/tests/test_hwp5_uno.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ from unittest import TestCase
3
+
4
+
5
+ class TestBase(TestCase):
6
+
7
+ def get_fixture_path(self, filename):
8
+ from hwp5_tests.fixtures import get_fixture_path
9
+ return get_fixture_path(filename)
10
+
11
+ def open_fixture(self, filename, *args, **kwargs):
12
+ from hwp5_tests.fixtures import open_fixture
13
+ return open_fixture(filename, *args, **kwargs)
14
+
15
+
16
+ class OleStorageAdapterTest(TestBase):
17
+
18
+ def get_adapter(self):
19
+ from unokit.services import css
20
+ from hwp5_uno import InputStreamFromFileLike
21
+ from hwp5_uno import OleStorageAdapter
22
+ f = self.open_fixture('sample-5017.hwp', 'rb')
23
+ inputstream = InputStreamFromFileLike(f)
24
+ oless = css.embed.OLESimpleStorage(inputstream)
25
+ return OleStorageAdapter(oless)
26
+
27
+ def test_iter(self):
28
+ adapter = self.get_adapter()
29
+
30
+ self.assertTrue('FileHeader' in adapter)
31
+ self.assertTrue('DocInfo' in adapter)
32
+ self.assertTrue('BodyText' in adapter)
33
+
34
+ def test_getitem(self):
35
+ adapter = self.get_adapter()
36
+
37
+ bodytext = adapter['BodyText']
38
+ self.assertTrue('Section0' in bodytext)
39
+
40
+ from hwp5.filestructure import HwpFileHeader
41
+ from hwp5.filestructure import HWP5_SIGNATURE
42
+
43
+ fileheader = adapter['FileHeader']
44
+ fileheader = HwpFileHeader(fileheader)
45
+ self.assertEqual((5, 0, 1, 7), fileheader.version)
46
+ self.assertEqual(HWP5_SIGNATURE, fileheader.signature)
47
+
48
+ # reopen (just being careful)
49
+ fileheader = adapter['FileHeader']
50
+ fileheader = HwpFileHeader(fileheader)
51
+ self.assertEqual((5, 0, 1, 7), fileheader.version)
52
+ self.assertEqual(HWP5_SIGNATURE, fileheader.signature)
53
+
54
+
55
+ class HwpFileFromInputStreamTest(TestBase):
56
+
57
+ def test_basic(self):
58
+ from unokit.adapters import InputStreamFromFileLike
59
+ from hwp5_uno import HwpFileFromInputStream
60
+ with self.open_fixture('sample-5017.hwp', 'rb') as f:
61
+ inputstream = InputStreamFromFileLike(f)
62
+ hwpfile = HwpFileFromInputStream(inputstream)
63
+ self.assertEqual((5, 0, 1, 7), hwpfile.fileheader.version)
64
+
65
+
66
+ class StorageFromInputStreamTest(TestBase):
67
+
68
+ def test_basic(self):
69
+ import uno
70
+ from unokit.adapters import InputStreamFromFileLike
71
+ from hwp5_uno import StorageFromInputStream
72
+ from hwp5.hwp5odt import ODTPackage
73
+
74
+ zipname = self.id()+'.zip'
75
+
76
+ pkg = ODTPackage(zipname)
77
+ try:
78
+ from StringIO import StringIO
79
+ data = StringIO('hello')
80
+ pkg.insert_stream(data, 'abc.txt', 'text/plain')
81
+ finally:
82
+ pkg.close()
83
+
84
+ with file(zipname, 'rb') as f:
85
+ inputstream = InputStreamFromFileLike(f, dontclose=True)
86
+ storage = StorageFromInputStream(inputstream)
87
+ try:
88
+ self.assertTrue(uno.getTypeByName('com.sun.star.embed.XStorage')
89
+ in storage.Types)
90
+ self.assertEqual(set(['abc.txt']), set(storage.ElementNames))
91
+ finally:
92
+ storage.dispose()
93
+
94
+
95
+ class TypedetectTest(TestBase):
96
+ def test_basic(self):
97
+ from unokit.adapters import InputStreamFromFileLike
98
+ from hwp5_uno import inputstream_is_hwp5file
99
+ from hwp5_uno import typedetect
100
+ with self.open_fixture('sample-5017.hwp', 'rb') as f:
101
+ inputstream = InputStreamFromFileLike(f, dontclose=True)
102
+ self.assertTrue(inputstream_is_hwp5file(inputstream))
103
+ self.assertEqual('hwp5', typedetect(inputstream))
104
+
105
+
106
+ class LoadHwp5FileTest(TestBase):
107
+
108
+ def get_paragraphs(self, text):
109
+ import unokit.util
110
+ return unokit.util.enumerate(text)
111
+
112
+ def get_text_portions(self, paragraph):
113
+ import unokit.util
114
+ return unokit.util.enumerate(paragraph)
115
+
116
+ def get_text_contents(self, text_portion):
117
+ import unokit.util
118
+ if hasattr(text_portion, 'createContentEnumeration'):
119
+ xenum = text_portion.createContentEnumeration('com.sun.star.text.TextContent')
120
+ for text_content in unokit.util.iterate(xenum):
121
+ yield text_content
122
+
123
+ def test_basic(self):
124
+ from unokit.services import css
125
+ import unokit.util
126
+ from hwp5.xmlmodel import Hwp5File
127
+ from hwp5_uno import load_hwp5file_into_doc
128
+
129
+ desktop = css.frame.Desktop()
130
+ doc = desktop.loadComponentFromURL('private:factory/swriter', '_blank',
131
+ 0, tuple())
132
+ hwp5path = self.get_fixture_path('sample-5017.hwp')
133
+ hwp5file = Hwp5File(hwp5path)
134
+
135
+ load_hwp5file_into_doc(hwp5file, doc)
136
+
137
+ text = doc.getText()
138
+
139
+ paragraphs = list(self.get_paragraphs(text))
140
+
141
+ p = paragraphs[0]
142
+ text_portions = list(self.get_text_portions(p))
143
+ tp = text_portions[0]
144
+ self.assertEqual('Text', tp.TextPortionType)
145
+ self.assertEqual(u'한글 ', tp.String)
146
+
147
+ p = paragraphs[-1]
148
+ tp = list(self.get_text_portions(p))[-1]
149
+ self.assertEqual('Frame', tp.TextPortionType)
150
+ tc = list(self.get_text_contents(tp))[-1]
151
+ self.assertTrue('com.sun.star.drawing.GraphicObjectShape' in
152
+ tc.SupportedServiceNames)
153
+
154
+ table = paragraphs[6]
155
+ self.assertTrue('com.sun.star.text.TextTable' in
156
+ table.SupportedServiceNames)
157
+
158
+ drawpage = doc.getDrawPage()
159
+ shapes = list(unokit.util.enumerate(drawpage))
160
+
161
+ self.assertEqual(2, len(shapes))
162
+
163
+ self.assertEqual(1, shapes[0].Graphic.GraphicType)
164
+ self.assertEqual('image/jpeg', shapes[0].Graphic.MimeType)
165
+ self.assertEqual(2, shapes[0].Bitmap.GraphicType)
166
+ self.assertEqual('image/x-vclgraphic', shapes[0].Bitmap.MimeType)
167
+ self.assertEqual(28254, len(shapes[0].Bitmap.DIB))
168
+ self.assertTrue(shapes[0].GraphicURL.startswith('vnd.sun.star.GraphicObject:'))
169
+ print shapes[0].GraphicURL
170
+ #self.assertEqual('vnd.sun.star.GraphicObject:10000000000001F40000012C1F9CCF04',
171
+ # shapes[0].GraphicURL)
172
+ self.assertEqual(None, shapes[0].GraphicStreamURL)
173
+
174
+ self.assertEqual(1, shapes[1].Graphic.GraphicType)
175
+ self.assertEqual('image/png', shapes[1].Graphic.MimeType)
176
+ self.assertEqual(2, shapes[1].Bitmap.GraphicType)
177
+ self.assertEqual('image/x-vclgraphic', shapes[1].Bitmap.MimeType)
178
+ self.assertEqual(374, len(shapes[1].Bitmap.DIB))
179
+ self.assertTrue(shapes[1].GraphicURL.startswith('vnd.sun.star.GraphicObject:'))
180
+ print shapes[1].GraphicURL
181
+ #self.assertEqual('vnd.sun.star.GraphicObject:1000020100000010000000108F049D12',
182
+ # shapes[1].GraphicURL)
183
+ self.assertEqual(None, shapes[1].GraphicStreamURL)
pyhwp_uno/setup.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+ setup(name='pyhwp_uno',
3
+ install_requires=['unokit', 'pyhwp'],
4
+ packages=find_packages())