| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2006-2009 Zuza Software Foundation
5 #
6 # This file is part of the Translate Toolkit.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \
30 getXMLspace, setXMLspace, namespaced
31 except ImportError, e:
32 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
33
34
36 """generate match objects for all L{re_obj} matches in L{text}."""
37 start = 0
38 max = len(text)
39 while start < max:
40 m = re_obj.search(text, start)
41 if not m:
42 break
43 yield m
44 start = m.end()
45
46 #TODO: we can now do better with our proper placeables support
47 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)',
48 '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
49 re_placeholders = [re.compile(ph) for ph in placeholders]
51 """return list of regexp matchobjects for with all place holders in the
52 L{text}"""
53 matches = []
54 for re_ph in re_placeholders:
55 matches.extend(list(_findAllMatches(text, re_ph)))
56
57 # sort them so they come sequentially
58 matches.sort(lambda a, b: cmp(a.start(), b.start()))
59 return matches
60
61
63 """
64 A single unit in the file. Provisional work is done to make several
65 languages possible.
66 """
67
68 #The name of the root element of this unit type:(termEntry, tu, trans-unit)
69 rootNode = ""
70 # The name of the per language element of this unit type:(termEntry, tu,
71 # trans-unit)
72 languageNode = ""
73 #The name of the innermost element of this unit type:(term, seg)
74 textNode = ""
75
76 namespace = None
77 _default_xml_space = "preserve"
78 """The default handling of spacing in the absense of an xml:space
79 attribute.
80
81 This is mostly for correcting XLIFF behaviour."""
82
84 """Constructs a unit containing the given source string"""
85 self._rich_source = None
86 self._rich_target = None
87 if empty:
88 self._state_n = 0
89 return
90 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
91 #add descrip, note, etc.
92 super(LISAunit, self).__init__(source)
93
95 """Compares two units"""
96 if not isinstance(other, LISAunit):
97 return super(LISAunit, self).__eq__(other)
98 languageNodes = self.getlanguageNodes()
99 otherlanguageNodes = other.getlanguageNodes()
100 if len(languageNodes) != len(otherlanguageNodes):
101 return False
102 for i in range(len(languageNodes)):
103 mytext = self.getNodeText(languageNodes[i],
104 getXMLspace(self.xmlelement,
105 self._default_xml_space))
106 othertext = other.getNodeText(otherlanguageNodes[i],
107 getXMLspace(self.xmlelement,
108 self._default_xml_space))
109 if mytext != othertext:
110 #TODO:^ maybe we want to take children and notes into account
111 return False
112 return True
113
115 """Returns name in Clark notation.
116
117 For example namespaced("source") in an XLIFF document might return::
118 {urn:oasis:names:tc:xliff:document:1.1}source
119 This is needed throughout lxml.
120 """
121 return namespaced(self.namespace, name)
122
124 languageNodes = self.getlanguageNodes()
125 if len(languageNodes) > 0:
126 self.xmlelement.replace(languageNodes[0], dom_node)
127 else:
128 self.xmlelement.append(dom_node)
129
132 source_dom = property(get_source_dom, set_source_dom)
133
135 if self._rich_source is not None:
136 self._rich_source = None
137 text = data.forceunicode(text)
138 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
139
141 return self.getNodeText(self.source_dom,
142 getXMLspace(self.xmlelement,
143 self._default_xml_space))
144 source = property(getsource, setsource)
145
147 languageNodes = self.getlanguageNodes()
148 assert len(languageNodes) > 0
149 if dom_node is not None:
150 if append or len(languageNodes) == 0:
151 self.xmlelement.append(dom_node)
152 else:
153 self.xmlelement.insert(1, dom_node)
154 if not append and len(languageNodes) > 1:
155 self.xmlelement.remove(languageNodes[1])
156
158 if lang:
159 return self.getlanguageNode(lang=lang)
160 else:
161 return self.getlanguageNode(lang=None, index=1)
162 target_dom = property(get_target_dom)
163
165 """Sets the "target" string (second language), or alternatively
166 appends to the list"""
167 #XXX: we really need the language - can't really be optional, and we
168 # need to propagate it
169 if self._rich_target is not None:
170 self._rich_target = None
171 text = data.forceunicode(text)
172 # Firstly deal with reinitialising to None or setting to identical
173 # string
174 if self.gettarget() == text:
175 return
176 languageNode = self.get_target_dom(None)
177 if not text is None:
178 if languageNode is None:
179 languageNode = self.createlanguageNode(lang, text, "target")
180 self.set_target_dom(languageNode, append)
181 else:
182 if self.textNode:
183 terms = languageNode.iter(self.namespaced(self.textNode))
184 try:
185 languageNode = terms.next()
186 except StopIteration, e:
187 pass
188 languageNode.text = text
189 else:
190 self.set_target_dom(None, False)
191
193 """retrieves the "target" text (second entry), or the entry in the
194 specified language, if it exists"""
195 return self.getNodeText(self.get_target_dom(lang),
196 getXMLspace(self.xmlelement,
197 self._default_xml_space))
198 target = property(gettarget, settarget)
199
201 """Returns a xml Element setup with given parameters to represent a
202 single language entry. Has to be overridden."""
203 return None
204
206 """Create the text node in parent containing all the ph tags"""
207 matches = _getPhMatches(text)
208 if not matches:
209 parent.text = text
210 return
211
212 # Now we know there will definitely be some ph tags
213 start = matches[0].start()
214 pretext = text[:start]
215 if pretext:
216 parent.text = pretext
217 lasttag = parent
218 for i, m in enumerate(matches):
219 #pretext
220 pretext = text[start:m.start()]
221 # this will never happen with the first ph tag
222 if pretext:
223 lasttag.tail = pretext
224 #ph node
225 phnode = etree.SubElement(parent, self.namespaced("ph"))
226 phnode.set("id", str(i+1))
227 phnode.text = m.group()
228 lasttag = phnode
229 start = m.end()
230 #post text
231 if text[start:]:
232 lasttag.tail = text[start:]
233
235 """Returns a list of all nodes that contain per language information.
236 """
237 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
238
240 """Retrieves a languageNode either by language or by index"""
241 if lang is None and index is None:
242 raise KeyError("No criterea for languageNode given")
243 languageNodes = self.getlanguageNodes()
244 if lang:
245 for set in languageNodes:
246 if getXMLlang(set) == lang:
247 return set
248 else:#have to use index
249 if index >= len(languageNodes):
250 return None
251 else:
252 return languageNodes[index]
253 return None
254
256 """Retrieves the term from the given languageNode"""
257 if languageNode is None:
258 return None
259 if self.textNode:
260 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
261 if terms is None:
262 return None
263 else:
264 return getText(terms.next(), xml_space)
265 else:
266 return getText(languageNode, xml_space)
267
271
274
275 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
276 lambda self, value: self._set_property(self.namespaced('xid'), value))
277
278 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
279 lambda self, value: self._set_property(self.namespaced('rid'), value))
280
285 createfromxmlElement = classmethod(createfromxmlElement)
286
287
289 """A class representing a file store for one of the LISA file formats."""
290 UnitClass = LISAunit
291 #The root node of the XML document:
292 rootNode = ""
293 #The root node of the content section:
294 bodyNode = ""
295 #The XML skeleton to use for empty construction:
296 XMLskeleton = ""
297
298 namespace = None
299
300 - def __init__(self, inputfile=None, sourcelanguage='en',
301 targetlanguage=None, unitclass=None):
302 super(LISAfile, self).__init__(unitclass=unitclass)
303 if inputfile is not None:
304 self.parse(inputfile)
305 assert self.document.getroot().tag == self.namespaced(self.rootNode)
306 else:
307 # We strip out newlines to ensure that spaces in the skeleton
308 # doesn't interfere with the the pretty printing of lxml
309 self.parse(self.XMLskeleton.replace("\n", ""))
310 self.setsourcelanguage(sourcelanguage)
311 self.settargetlanguage(targetlanguage)
312 self.addheader()
313 self._encoding = "UTF-8"
314
318
320 """Returns name in Clark notation.
321
322 For example namespaced("source") in an XLIFF document might return::
323 {urn:oasis:names:tc:xliff:document:1.1}source
324 This is needed throughout lxml.
325 """
326 return namespaced(self.namespace, name)
327
329 """Initialises self.body so it never needs to be retrieved from the
330 XML again."""
331 self.namespace = self.document.getroot().nsmap.get(None, None)
332 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
333
335 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word?
336 """Adds and returns a new unit with the given string as first entry."""
337 newunit = self.UnitClass(source)
338 self.addunit(newunit)
339 return newunit
340
342 unit.namespace = self.namespace
343 super(LISAfile, self).addunit(unit)
344 if new:
345 self.body.append(unit.xmlelement)
346
348 """Converts to a string containing the file's XML"""
349 return etree.tostring(self.document, pretty_print=True,
350 xml_declaration=True, encoding='utf-8')
351
353 """Populates this object from the given xml string"""
354 if not hasattr(self, 'filename'):
355 self.filename = getattr(xml, 'name', '')
356 if hasattr(xml, "read"):
357 xml.seek(0)
358 posrc = xml.read()
359 xml = posrc
360 if etree.LXML_VERSION >= (2, 1, 0):
361 #Since version 2.1.0 we can pass the strip_cdata parameter to
362 #indicate that we don't want cdata to be converted to raw XML
363 parser = etree.XMLParser(strip_cdata=False)
364 else:
365 parser = etree.XMLParser()
366 self.document = etree.fromstring(xml, parser).getroottree()
367 self._encoding = self.document.docinfo.encoding
368 self.initbody()
369 assert self.document.getroot().tag == self.namespaced(self.rootNode)
370 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
371 term = self.UnitClass.createfromxmlElement(entry)
372 self.addunit(term, new=False)
373
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Aug 17 15:50:29 2010 | http://epydoc.sourceforge.net |