| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 from translate.filters import helpers
34 from translate.filters import decoration
35 from translate.filters import prefilters
36 from translate.filters import spelling
37 from translate.lang import factory
38 from translate.lang import data
39 # The import of xliff could fail if the user doesn't have lxml installed. For
40 # now we try to continue gracefully to help users who aren't interested in
41 # support for XLIFF or other XML formats.
42 try:
43 from translate.storage import xliff
44 except ImportError, e:
45 xliff = None
46 # The import of xliff fail silently in the absence of lxml if another module
47 # already tried to import it unsuccessfully, so let's make 100% sure:
48 if not hasattr(xliff, "xliffunit"):
49 xliff = None
50 import re
51
52 # These are some regular expressions that are compiled for use in some tests
53
54 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't
55 # cover everything we leave \w instead of specifying the exact letters as
56 # this should capture printf types defined in other platforms.
57 # extended to support Python named format specifiers
58 printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))')
59
60 # The name of the XML tag
61 tagname_re = re.compile("<[\s]*([\w\/]*)")
62
63 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
64 #TODO: remove escaped strings once usage is audited
65 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
66
67 # The whole tag
68 tag_re = re.compile("<[^>]+>")
69
70 gconf_attribute_re = re.compile('"[a-z_]+?"')
71
72
74 """Returns the name of the XML/HTML tag in string"""
75 return tagname_re.match(string).groups(1)[0]
76
77
79 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
80 list as wildcards (only allowed in positions "a" and "c"). We take a
81 shortcut by only considering "c" if "b" has already matched."""
82 a, b, c = pair
83 if (b, c) == (None, None):
84 #This is a tagname
85 return pair
86 for pattern in list:
87 x, y, z = pattern
88 if (x, y) in [(a, b), (None, b)]:
89 if z in [None, c]:
90 return pattern
91 return pair
92
93
95 """Returns all the properties in the XML/HTML tag string as
96 (tagname, propertyname, propertyvalue), but ignore those combinations
97 specified in ignore."""
98 properties = []
99 for string in strings:
100 tag = tagname(string)
101 properties += [(tag, None, None)]
102 #Now we isolate the attribute pairs.
103 pairs = property_re.findall(string)
104 for property, value, a, b in pairs:
105 #Strip the quotes:
106 value = value[1:-1]
107
108 canignore = False
109 if (tag, property, value) in ignore or \
110 intuplelist((tag, property, value), ignore) != (tag, property, value):
111 canignore = True
112 break
113 if not canignore:
114 properties += [(tag, property, value)]
115 return properties
116
117
119 """This exception signals that a Filter didn't pass, and gives an
120 explanation or a comment"""
121
123 if not isinstance(messages, list):
124 messages = [messages]
125 assert isinstance(messages[0], unicode) # Assumption: all of same type
126 joined = u", ".join(messages)
127 Exception.__init__(self, joined)
128 # Python 2.3 doesn't have .args
129 if not hasattr(self, "args"):
130 self.args = joined
131
132
134 """This exception signals that a Filter didn't pass, and the bad translation
135 might break an application (so the string will be marked fuzzy)"""
136 pass
137
138 #(tag, attribute, value) specifies a certain attribute which can be changed/
139 #ignored if it exists inside tag. In the case where there is a third element
140 #in the tuple, it indicates a property value that can be ignored if present
141 #(like defaults, for example)
142 #If a certain item is None, it indicates that it is relevant for all values of
143 #the property/tag that is specified as None. A non-None value of "value"
144 #indicates that the value of the attribute must be taken into account.
145 common_ignoretags = [(None, "xml-lang", None)]
146 common_canchangetags = [("img", "alt", None),
147 (None, "title", None),
148 (None, "dir", None),
149 (None, "lang", None),
150 ]
151 # Actually the title tag is allowed on many tags in HTML (but probably not all)
152
153
155 """object representing the configuration of a checker"""
156
157 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
158 notranslatewords=None, musttranslatewords=None,
159 validchars=None, punctuation=None, endpunctuation=None,
160 ignoretags=None, canchangetags=None, criticaltests=None,
161 credit_sources=None):
162 # Init lists
163 self.accelmarkers = self._init_list(accelmarkers)
164 self.varmatches = self._init_list(varmatches)
165 self.criticaltests = self._init_list(criticaltests)
166 self.credit_sources = self._init_list(credit_sources)
167 # Lang data
168 self.targetlanguage = targetlanguage
169 self.updatetargetlanguage(targetlanguage)
170 self.sourcelang = factory.getlanguage('en')
171 # Inits with default values
172 self.punctuation = self._init_default(data.normalized_unicode(punctuation),
173 self.lang.punctuation)
174 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation),
175 self.lang.sentenceend)
176 self.ignoretags = self._init_default(ignoretags, common_ignoretags)
177 self.canchangetags = self._init_default(canchangetags, common_canchangetags)
178 # Other data
179 # TODO: allow user configuration of untranslatable words
180 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)])
181 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)])
182 validchars = data.normalized_unicode(validchars)
183 self.validcharsmap = {}
184 self.updatevalidchars(validchars)
185
187 """initialise configuration paramaters that are lists
188
189 @type list: List
190 @param list: None (we'll initialise a blank list) or a list paramater
191 @rtype: List
192 """
193 if list is None:
194 list = []
195 return list
196
198 """initialise parameters that can have default options
199
200 @param param: the user supplied paramater value
201 @param default: default values when param is not specified
202 @return: the paramater as specified by the user of the default settings
203 """
204 if param is None:
205 return default
206 return param
207
209 """combines the info in otherconfig into this config object"""
210 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
211 self.updatetargetlanguage(self.targetlanguage)
212 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
213 self.varmatches.extend(otherconfig.varmatches)
214 self.notranslatewords.update(otherconfig.notranslatewords)
215 self.musttranslatewords.update(otherconfig.musttranslatewords)
216 self.validcharsmap.update(otherconfig.validcharsmap)
217 self.punctuation += otherconfig.punctuation
218 self.endpunctuation += otherconfig.endpunctuation
219 #TODO: consider also updating in the following cases:
220 self.ignoretags = otherconfig.ignoretags
221 self.canchangetags = otherconfig.canchangetags
222 self.criticaltests.extend(otherconfig.criticaltests)
223 self.credit_sources = otherconfig.credit_sources
224
226 """updates the map that eliminates valid characters"""
227 if validchars is None:
228 return True
229 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)])
230 self.validcharsmap.update(validcharsmap)
231
233 """Updates the target language in the config to the given target
234 language"""
235 self.lang = factory.getlanguage(langcode)
236
237
239
240 def cached_f(self, param1):
241 key = (f.__name__, param1)
242 res_cache = self.results_cache
243 if key in res_cache:
244 return res_cache[key]
245 else:
246 value = f(self, param1)
247 res_cache[key] = value
248 return value
249 return cached_f
250
251
253 """Parent Checker class which does the checking based on functions available
254 in derived classes."""
255 preconditions = {}
256
257 - def __init__(self, checkerconfig=None, excludefilters=None,
258 limitfilters=None, errorhandler=None):
259 self.errorhandler = errorhandler
260 if checkerconfig is None:
261 self.setconfig(CheckerConfig())
262 else:
263 self.setconfig(checkerconfig)
264 # exclude functions defined in UnitChecker from being treated as tests.
265 self.helperfunctions = {}
266 for functionname in dir(UnitChecker):
267 function = getattr(self, functionname)
268 if callable(function):
269 self.helperfunctions[functionname] = function
270 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
271 self.results_cache = {}
272
274 """returns dictionary of available filters, including/excluding those in
275 the given lists"""
276 filters = {}
277 if limitfilters is None:
278 # use everything available unless instructed
279 limitfilters = dir(self)
280 if excludefilters is None:
281 excludefilters = {}
282 for functionname in limitfilters:
283 if functionname in excludefilters:
284 continue
285 if functionname in self.helperfunctions:
286 continue
287 if functionname == "errorhandler":
288 continue
289 filterfunction = getattr(self, functionname, None)
290 if not callable(filterfunction):
291 continue
292 filters[functionname] = filterfunction
293 return filters
294
296 """sets the accelerator list"""
297 self.config = config
298 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
299 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
300 for startmatch, endmatch in self.config.varmatches]
301 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch,
302 prefilters.varnone)
303 for startmatch, endmatch in self.config.varmatches]
304
306 """Sets the filename that a checker should use for evaluating
307 suggestions."""
308 self.suggestion_store = store
309 if self.suggestion_store:
310 self.suggestion_store.require_index()
311
315 filtervariables = cache_results(filtervariables)
316
320 removevariables = cache_results(removevariables)
321
323 """filter out accelerators from str1"""
324 return helpers.multifilter(str1, self.accfilters, None)
325 filteraccelerators = cache_results(filteraccelerators)
326
328 """filter out accelerators from str1"""
329 return helpers.multifilter(str1, self.accfilters, acceptlist)
330
332 """replaces words with punctuation with their unpunctuated
333 equivalents"""
334 return prefilters.filterwordswithpunctuation(str1)
335 filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
336
340 filterxml = cache_results(filterxml)
341
343 """Runs the given test on the given unit.
344
345 Note that this can raise a FilterFailure as part of normal operation"""
346 return test(unit)
347
349 """run all the tests in this suite, return failures as testname,
350 message_or_exception"""
351 self.results_cache = {}
352 failures = {}
353 ignores = self.config.lang.ignoretests[:]
354 functionnames = self.defaultfilters.keys()
355 priorityfunctionnames = self.preconditions.keys()
356 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
357 for functionname in priorityfunctionnames + otherfunctionnames:
358 if functionname in ignores:
359 continue
360 filterfunction = getattr(self, functionname, None)
361 # this filterfunction may only be defined on another checker if
362 # using TeeChecker
363 if filterfunction is None:
364 continue
365 filtermessage = filterfunction.__doc__
366 try:
367 filterresult = self.run_test(filterfunction, unit)
368 except FilterFailure, e:
369 filterresult = False
370 filtermessage = e.args[0]
371 except Exception, e:
372 if self.errorhandler is None:
373 raise ValueError("error in filter %s: %r, %r, %s" % \
374 (functionname, unit.source, unit.target, e))
375 else:
376 filterresult = self.errorhandler(functionname, unit.source,
377 unit.target, e)
378 if not filterresult:
379 # we test some preconditions that aren't actually a cause for
380 # failure
381 if functionname in self.defaultfilters:
382 failures[functionname] = filtermessage
383 if functionname in self.preconditions:
384 for ignoredfunctionname in self.preconditions[functionname]:
385 ignores.append(ignoredfunctionname)
386 self.results_cache = {}
387 return failures
388
389
391 """A checker that passes source and target strings to the checks, not the
392 whole unit.
393
394 This provides some speedup and simplifies testing."""
395
396 - def __init__(self, checkerconfig=None, excludefilters=None,
397 limitfilters=None, errorhandler=None):
398 super(TranslationChecker, self).__init__(checkerconfig, excludefilters,
399 limitfilters, errorhandler)
400
402 """Runs the given test on the given unit.
403
404 Note that this can raise a FilterFailure as part of normal operation."""
405 if self.hasplural:
406 filtermessages = []
407 filterresult = True
408 for pluralform in unit.target.strings:
409 try:
410 if not test(self.str1, unicode(pluralform)):
411 filterresult = False
412 except FilterFailure, e:
413 filterresult = False
414 filtermessages.append(unicode(e.args))
415 if not filterresult and filtermessages:
416 raise FilterFailure(filtermessages)
417 else:
418 return filterresult
419 else:
420 return test(self.str1, self.str2)
421
423 """Do some optimisation by caching some data of the unit for the benefit
424 of run_test()."""
425 self.str1 = data.normalized_unicode(unit.source) or u""
426 self.str2 = data.normalized_unicode(unit.target) or u""
427 self.hasplural = unit.hasplural()
428 self.locations = unit.getlocations()
429 return super(TranslationChecker, self).run_filters(unit)
430
431
433 """A Checker that controls multiple checkers."""
434
435 - def __init__(self, checkerconfig=None, excludefilters=None,
436 limitfilters=None, checkerclasses=None, errorhandler=None,
437 languagecode=None):
438 """construct a TeeChecker from the given checkers"""
439 self.limitfilters = limitfilters
440 if checkerclasses is None:
441 checkerclasses = [StandardChecker]
442 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
443 if languagecode:
444 for checker in self.checkers:
445 checker.config.updatetargetlanguage(languagecode)
446 # Let's hook up the language specific checker
447 lang_checker = self.checkers[0].config.lang.checker
448 if lang_checker:
449 self.checkers.append(lang_checker)
450
451 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
452 self.config = checkerconfig or self.checkers[0].config
453
455 """returns dictionary of available filters, including/excluding those in
456 the given lists"""
457 if excludefilters is None:
458 excludefilters = {}
459 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
460 self.combinedfilters = {}
461 for filters in filterslist:
462 self.combinedfilters.update(filters)
463 # TODO: move this somewhere more sensible (a checkfilters method?)
464 if limitfilters is not None:
465 for filtername in limitfilters:
466 if not filtername in self.combinedfilters:
467 import sys
468 print >> sys.stderr, "warning: could not find filter %s" % filtername
469 return self.combinedfilters
470
472 """run all the tests in the checker's suites"""
473 failures = {}
474 for checker in self.checkers:
475 failures.update(checker.run_filters(unit))
476 return failures
477
479 """Sets the filename that a checker should use for evaluating
480 suggestions."""
481 for checker in self.checkers:
482 checker.setsuggestionstore(store)
483
484
486 """The basic test suite for source -> target translations."""
487
489 """checks whether a string has been translated at all"""
490 str2 = prefilters.removekdecomments(str2)
491 return not (len(str1.strip()) > 0 and len(str2) == 0)
492
494 """checks whether a translation is basically identical to the original
495 string"""
496 str1 = self.filteraccelerators(self.removevariables(str1)).strip()
497 str2 = self.filteraccelerators(self.removevariables(str2)).strip()
498 if len(str1) < 2:
499 return True
500 # If the whole string is upperase, or nothing in the string can go
501 # towards uppercase, let's assume there is nothing translatable
502 # TODO: reconsider
503 if (str1.isupper() or str1.upper() == str1) and str1 == str2:
504 return True
505 if self.config.notranslatewords:
506 words1 = str1.split()
507 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
508 #currently equivalent to:
509 # if len(words1) == 1 and words1[0] in self.config.notranslatewords:
510 #why do we only test for one notranslate word?
511 return True
512 # we could also check for things like str1.isnumeric(), but the test
513 # above (str1.upper() == str1) makes this unnecessary
514 if str1.lower() == str2.lower():
515 raise FilterFailure(u"please translate")
516 return True
517
519 """checks whether a translation only contains spaces"""
520 len1 = len(str1.strip())
521 len2 = len(str2.strip())
522 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
523
525 """checks whether a translation is much shorter than the original
526 string"""
527 len1 = len(str1.strip())
528 len2 = len(str2.strip())
529 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
530
532 """checks whether a translation is much longer than the original
533 string"""
534 len1 = len(str1.strip())
535 len2 = len(str2.strip())
536 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
537
539 """checks whether escaping is consistent between the two strings"""
540 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")):
541 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word])
542 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word])
543 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
544 else:
545 return True
546
548 """checks whether newlines are consistent between the two strings"""
549 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")):
550 raise FilterFailure(u"line endings in original don't match line endings in translation")
551 else:
552 return True
553
555 """checks whether tabs are consistent between the two strings"""
556 if not helpers.countmatch(str1, str2, "\t"):
557 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation")
558 else:
559 return True
560
562 """checks whether singlequoting is consistent between the two strings"""
563 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
564 str1 = self.config.lang.punctranslate(str1)
565 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
566 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
567
569 """checks whether doublequoting is consistent between the two strings"""
570 str1 = self.filteraccelerators(self.filtervariables(str1))
571 str1 = self.filterxml(str1)
572 str1 = self.config.lang.punctranslate(str1)
573 str2 = self.filteraccelerators(self.filtervariables(str2))
574 str2 = self.filterxml(str2)
575 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«",
576 u"»", u"“", u"”"))
577
579 """checks for bad double-spaces by comparing to original"""
580 str1 = self.filteraccelerators(str1)
581 str2 = self.filteraccelerators(str2)
582 return helpers.countmatch(str1, str2, u" ")
583
585 """checks for bad spacing after punctuation"""
586 # Convert all nbsp to space, and just check spaces. Useful intermediate
587 # step to stricter nbsp checking?
588 str1 = self.filteraccelerators(self.filtervariables(str1))
589 str1 = self.config.lang.punctranslate(str1)
590 str1 = str1.replace(u"\u00a0", u" ")
591 if str1.find(u" ") == -1:
592 return True
593 str2 = self.filteraccelerators(self.filtervariables(str2))
594 str2 = str2.replace(u"\u00a0", u" ")
595 for puncchar in self.config.punctuation:
596 plaincount1 = str1.count(puncchar)
597 plaincount2 = str2.count(puncchar)
598 if not plaincount1 or plaincount1 != plaincount2:
599 continue
600 spacecount1 = str1.count(puncchar + u" ")
601 spacecount2 = str2.count(puncchar + u" ")
602 if spacecount1 != spacecount2:
603 # handle extra spaces that are because of transposed punctuation
604 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1 - spacecount2) == 1:
605 continue
606 return False
607 return True
608
610 """checks whether printf format strings match"""
611 count1 = count2 = plural = None
612 # self.hasplural only set by run_filters, not always available
613 if 'hasplural' in self.__dict__:
614 plural = self.hasplural
615 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
616 count2 = var_num2 + 1
617 str2key = match2.group('key')
618 if match2.group('ord'):
619 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
620 count1 = var_num1 + 1
621 if int(match2.group('ord')) == var_num1 + 1:
622 if match2.group('fullvar') != match1.group('fullvar'):
623 return 0
624 elif str2key:
625 str1key = None
626 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
627 count1 = var_num1 + 1
628 if match1.group('key') and str2key == match1.group('key'):
629 str1key = match1.group('key')
630 # '%.0s' "placeholder" in plural will match anything
631 if plural and match2.group('fullvar') == '.0s':
632 continue
633 if match1.group('fullvar') != match2.group('fullvar'):
634 return 0
635 if str1key == None:
636 return 0
637 else:
638 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
639 count1 = var_num1 + 1
640 # '%.0s' "placeholder" in plural will match anything
641 if plural and match2.group('fullvar') == '.0s':
642 continue
643 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
644 return 0
645
646 if count2 is None:
647 if list(printf_pat.finditer(str1)):
648 return 0
649
650 if (count1 or count2) and (count1 != count2):
651 return 0
652 return 1
653
655 """checks whether accelerators are consistent between the two strings"""
656 str1 = self.filtervariables(str1)
657 str2 = self.filtervariables(str2)
658 messages = []
659 for accelmarker in self.config.accelmarkers:
660 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
661 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
662 count1, countbad1 = counter1(str1)
663 count2, countbad2 = counter2(str2)
664 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
665 accel2, bad2 = getaccel(str2)
666 if count1 == count2:
667 continue
668 if count1 == 1 and count2 == 0:
669 if countbad2 == 1:
670 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
671 else:
672 messages.append(u"accelerator %s is missing from translation" % accelmarker)
673 elif count1 == 0:
674 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker)
675 elif count1 == 1 and count2 > count1:
676 messages.append(u"accelerator %s is repeated in translation" % accelmarker)
677 else:
678 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
679 if messages:
680 if "accelerators" in self.config.criticaltests:
681 raise SeriousFilterFailure(messages)
682 else:
683 raise FilterFailure(messages)
684 return True
685
686 # def acceleratedvariables(self, str1, str2):
687 # """checks that no variables are accelerated"""
688 # messages = []
689 # for accelerator in self.config.accelmarkers:
690 # for variablestart, variableend in self.config.varmatches:
691 # error = accelerator + variablestart
692 # if str1.find(error) >= 0:
693 # messages.append(u"original has an accelerated variable")
694 # if str2.find(error) >= 0:
695 # messages.append(u"translation has an accelerated variable")
696 # if messages:
697 # raise FilterFailure(messages)
698 # return True
699
701 """checks whether variables of various forms are consistent between the
702 two strings"""
703 messages = []
704 mismatch1, mismatch2 = [], []
705 varnames1, varnames2 = [], []
706 for startmarker, endmarker in self.config.varmatches:
707 varchecker = decoration.getvariables(startmarker, endmarker)
708 if startmarker and endmarker:
709 if isinstance(endmarker, int):
710 redecorate = lambda var: startmarker + var
711 else:
712 redecorate = lambda var: startmarker + var + endmarker
713 elif startmarker:
714 redecorate = lambda var: startmarker + var
715 else:
716 redecorate = lambda var: var
717 vars1 = varchecker(str1)
718 vars2 = varchecker(str2)
719 if vars1 != vars2:
720 # we use counts to compare so we can handle multiple variables
721 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
722 # filter variable names we've already seen, so they aren't
723 # matched by more than one filter...
724 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
725 varnames1.extend(vars1)
726 varnames2.extend(vars2)
727 vars1 = map(redecorate, vars1)
728 vars2 = map(redecorate, vars2)
729 mismatch1.extend(vars1)
730 mismatch2.extend(vars2)
731 if mismatch1:
732 messages.append(u"do not translate: %s" % u", ".join(mismatch1))
733 elif mismatch2:
734 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2))
735 if messages and mismatch1:
736 raise SeriousFilterFailure(messages)
737 elif messages:
738 raise FilterFailure(messages)
739 return True
740
742 """checks that function names are not translated"""
743 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
744
746 """checks that emails are not translated"""
747 return helpers.funcmatch(str1, str2, decoration.getemails)
748
750 """checks that URLs are not translated"""
751 return helpers.funcmatch(str1, str2, decoration.geturls)
752
754 """checks whether numbers of various forms are consistent between the
755 two strings"""
756 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
757
759 """checks whether whitespace at the beginning of the strings matches"""
760 return helpers.funcmatch(str1, str2, decoration.spacestart)
761
763 """checks whether whitespace at the end of the strings matches"""
764 str1 = self.config.lang.punctranslate(str1)
765 return helpers.funcmatch(str1, str2, decoration.spaceend)
766
768 """checks whether punctuation at the beginning of the strings match"""
769 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))))
770 str1 = self.config.lang.punctranslate(str1)
771 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))))
772 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
773
775 """checks whether punctuation at the end of the strings match"""
776 str1 = self.filtervariables(str1)
777 str1 = self.config.lang.punctranslate(str1)
778 str2 = self.filtervariables(str2)
779 str1 = str1.rstrip()
780 str2 = str2.rstrip()
781 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
782
784 """checks that strings that are purely punctuation are not changed"""
785 # this test is a subset of startandend
786 if (decoration.ispurepunctuation(str1)):
787 return str1 == str2
788 else:
789 return not decoration.ispurepunctuation(str2)
790
792 """checks that the number of brackets in both strings match"""
793 str1 = self.filtervariables(str1)
794 str2 = self.filtervariables(str2)
795 messages = []
796 missing = []
797 extra = []
798 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"):
799 count1 = str1.count(bracket)
800 count2 = str2.count(bracket)
801 if count2 < count1:
802 missing.append(u"'%s'" % bracket)
803 elif count2 > count1:
804 extra.append(u"'%s'" % bracket)
805 if missing:
806 messages.append(u"translation is missing %s" % u", ".join(missing))
807 if extra:
808 messages.append(u"translation has extra %s" % u", ".join(extra))
809 if messages:
810 raise FilterFailure(messages)
811 return True
812
814 """checks that the number of sentences in both strings match"""
815 str1 = self.filteraccelerators(str1)
816 str2 = self.filteraccelerators(str2)
817 sentences1 = len(self.config.sourcelang.sentences(str1))
818 sentences2 = len(self.config.lang.sentences(str2))
819 if not sentences1 == sentences2:
820 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2))
821 return True
822
824 """checks that options are not translated"""
825 str1 = self.filtervariables(str1)
826 for word1 in str1.split():
827 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum():
828 parts = word1.split(u"=")
829 if not parts[0] in str2:
830 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0])
831 if len(parts) > 1 and parts[1] in str2:
832 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]})
833 return True
834
836 """checks that the message starts with the correct capitalisation"""
837 str1 = self.filteraccelerators(str1)
838 str2 = self.filteraccelerators(str2)
839 if len(str1) > 1 and len(str2) > 1:
840 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
841 if len(str1) == 0 and len(str2) == 0:
842 return True
843 if len(str1) == 0 or len(str2) == 0:
844 return False
845 return True
846
848 """checks the capitalisation of two strings isn't wildly different"""
849 str1 = self.removevariables(str1)
850 str2 = self.removevariables(str2)
851 # TODO: review this. The 'I' is specific to English, so it probably
852 # serves no purpose to get sourcelang.sentenceend
853 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1)
854 capitals1 = helpers.filtercount(str1, unicode.isupper)
855 capitals2 = helpers.filtercount(str2, unicode.isupper)
856 alpha1 = helpers.filtercount(str1, unicode.isalpha)
857 alpha2 = helpers.filtercount(str2, unicode.isalpha)
858 # Capture the all caps case
859 if capitals1 == alpha1:
860 return capitals2 == alpha2
861 # some heuristic tests to try and see that the style of capitals is
862 # vaguely the same
863 if capitals1 == 0 or capitals1 == 1:
864 return capitals2 == capitals1
865 elif capitals1 < len(str1) / 10:
866 return capitals2 <= len(str2) / 8
867 elif len(str1) < 10:
868 return abs(capitals1 - capitals2) < 3
869 elif capitals1 > len(str1) * 6 / 10:
870 return capitals2 > len(str2) * 6 / 10
871 else:
872 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
873
875 """checks that acronyms that appear are unchanged"""
876 acronyms = []
877 allowed = []
878 for startmatch, endmatch in self.config.varmatches:
879 allowed += decoration.getvariables(startmatch, endmatch)(str1)
880 allowed += self.config.musttranslatewords.keys()
881 str1 = self.filteraccelerators(self.filtervariables(str1))
882 iter = self.config.lang.word_iter(str1)
883 str2 = self.filteraccelerators(self.filtervariables(str2))
884 #TODO: strip XML? - should provide better error messsages
885 # see mail/chrome/messanger/smime.properties.po
886 #TODO: consider limiting the word length for recognising acronyms to
887 #something like 5/6 characters
888 for word in iter:
889 if word.isupper() and len(word) > 1 and word not in allowed:
890 if str2.find(word) == -1:
891 acronyms.append(word)
892 if acronyms:
893 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms))
894 return True
895
897 """checks for repeated words in the translation"""
898 lastword = ""
899 without_newlines = "\n".join(str2.split("\n"))
900 words = self.filteraccelerators(self.removevariables(self.filterxml(without_newlines))).replace(u".", u"").lower().split()
901 for word in words:
902 if word == lastword and word not in self.config.lang.validdoublewords:
903 raise FilterFailure(u"The word '%s' is repeated" % word)
904 lastword = word
905 return True
906
908 """checks that words configured as untranslatable appear in the
909 translation too"""
910 if not self.config.notranslatewords:
911 return True
912 str1 = self.filtervariables(str1)
913 str2 = self.filtervariables(str2)
914 #The above is full of strange quotes and things in utf-8 encoding.
915 #single apostrophe perhaps problematic in words like "doesn't"
916 for seperator in self.config.punctuation:
917 str1 = str1.replace(seperator, u" ")
918 str2 = str2.replace(seperator, u" ")
919 words1 = self.filteraccelerators(str1).split()
920 words2 = self.filteraccelerators(str2).split()
921 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
922 if stopwords:
923 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords)))
924 return True
925
927 """checks that words configured as definitely translatable don't appear
928 in the translation"""
929 if not self.config.musttranslatewords:
930 return True
931 str1 = self.removevariables(str1)
932 str2 = self.removevariables(str2)
933 # The above is full of strange quotes and things in utf-8 encoding.
934 # single apostrophe perhaps problematic in words like "doesn't"
935 for seperator in self.config.punctuation:
936 str1 = str1.replace(seperator, u" ")
937 str2 = str2.replace(seperator, u" ")
938 words1 = self.filteraccelerators(str1).split()
939 words2 = self.filteraccelerators(str2).split()
940 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
941 if stopwords:
942 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords)))
943 return True
944
946 """checks that only characters specified as valid appear in the
947 translation"""
948 if not self.config.validcharsmap:
949 return True
950 invalid1 = str1.translate(self.config.validcharsmap)
951 invalid2 = str2.translate(self.config.validcharsmap)
952 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
953 if invalidchars:
954 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
955 return True
956
958 """checks that file paths have not been translated"""
959 for word1 in self.filteraccelerators(str1).split():
960 if word1.startswith(u"/"):
961 if not helpers.countsmatch(str1, str2, (word1,)):
962 return False
963 return True
964
992
994 """checks to ensure that no KDE style comments appear in the
995 translation"""
996 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
997
999 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
1000 return str2.find(u"#-#-#-#-#") == -1
1001
1003 """checks for English style plural(s) for you to review"""
1004
1005 def numberofpatterns(string, patterns):
1006 number = 0
1007 for pattern in patterns:
1008 number += len(re.findall(pattern, string))
1009 return number
1010
1011 sourcepatterns = ["\(s\)"]
1012 targetpatterns = ["\(s\)"]
1013 sourcecount = numberofpatterns(str1, sourcepatterns)
1014 targetcount = numberofpatterns(str2, targetpatterns)
1015 if self.config.lang.nplurals == 1:
1016 return not targetcount
1017 return sourcecount == targetcount
1018
1020 """checks words that don't pass a spell check"""
1021 if not self.config.targetlanguage:
1022 return True
1023 if not spelling.available:
1024 return True
1025 # TODO: filterxml?
1026 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel)
1027 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel)
1028 ignore1 = []
1029 messages = []
1030 for word, index, suggestions in spelling.check(str1, lang="en"):
1031 ignore1.append(word)
1032 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
1033 if word in self.config.notranslatewords:
1034 continue
1035 if word in ignore1:
1036 continue
1037 # hack to ignore hyphenisation rules
1038 if word in suggestions:
1039 continue
1040 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5])))
1041 if messages:
1042 raise FilterFailure(messages)
1043 return True
1044
1046 """checks for messages containing translation credits instead of normal
1047 translations."""
1048 return not str1 in self.config.credit_sources
1049
1050 # If the precondition filter is run and fails then the other tests listed are ignored
1051 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
1052 "accelerators", "brackets", "endpunc",
1053 "acronyms", "xmltags", "startpunc",
1054 "endwhitespace", "startwhitespace",
1055 "escapes", "doublequoting", "singlequoting",
1056 "filepaths", "purepunc", "doublespacing",
1057 "sentencecount", "numbers", "isfuzzy",
1058 "isreview", "notranslatewords", "musttranslatewords",
1059 "emails", "simpleplurals", "urls", "printf",
1060 "tabs", "newlines", "functions", "options",
1061 "blank", "nplurals", "gconf"),
1062 "blank": ("simplecaps", "variables", "startcaps",
1063 "accelerators", "brackets", "endpunc",
1064 "acronyms", "xmltags", "startpunc",
1065 "endwhitespace", "startwhitespace",
1066 "escapes", "doublequoting", "singlequoting",
1067 "filepaths", "purepunc", "doublespacing",
1068 "sentencecount", "numbers", "isfuzzy",
1069 "isreview", "notranslatewords", "musttranslatewords",
1070 "emails", "simpleplurals", "urls", "printf",
1071 "tabs", "newlines", "functions", "options",
1072 "gconf"),
1073 "credits": ("simplecaps", "variables", "startcaps",
1074 "accelerators", "brackets", "endpunc",
1075 "acronyms", "xmltags", "startpunc",
1076 "escapes", "doublequoting", "singlequoting",
1077 "filepaths", "doublespacing",
1078 "sentencecount", "numbers",
1079 "emails", "simpleplurals", "urls", "printf",
1080 "tabs", "newlines", "functions", "options"),
1081 "purepunc": ("startcaps", "options"),
1082 # This is causing some problems since Python 2.6, as
1083 # startcaps is now seen as an important one to always execute
1084 # and could now be done before it is blocked by a failing
1085 # "untranslated" or "blank" test. This is probably happening
1086 # due to slightly different implementation of the internal
1087 # dict handling since Python 2.6. We should never have relied
1088 # on this ordering anyway.
1089 #"startcaps": ("simplecaps",),
1090 "endwhitespace": ("endpunc",),
1091 "startwhitespace": ("startpunc",),
1092 "unchanged": ("doublewords",),
1093 "compendiumconflicts": ("accelerators", "brackets", "escapes",
1094 "numbers", "startpunc", "long", "variables",
1095 "startcaps", "sentencecount", "simplecaps",
1096 "doublespacing", "endpunc", "xmltags",
1097 "startwhitespace", "endwhitespace",
1098 "singlequoting", "doublequoting",
1099 "filepaths", "purepunc", "doublewords", "printf")}
1100
1101 # code to actually run the tests (use unittest?)
1102
1103 openofficeconfig = CheckerConfig(
1104 accelmarkers=["~"],
1105 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"),
1106 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0),
1107 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
1108 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"),
1109 ("img", "width", None), ("img", "height", None)],
1110 canchangetags=[("link", "name", None)],
1111 )
1112
1114
1116 checkerconfig = kwargs.get("checkerconfig", None)
1117 if checkerconfig is None:
1118 checkerconfig = CheckerConfig()
1119 kwargs["checkerconfig"] = checkerconfig
1120 checkerconfig.update(openofficeconfig)
1121 StandardChecker.__init__(self, **kwargs)
1122
1123 mozillaconfig = CheckerConfig(
1124 accelmarkers=["&"],
1125 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None),
1126 ("#", 1), ("${", "}"), ("$(^", ")")],
1127 criticaltests=["accelerators"],
1128 )
1129
1131
1133 checkerconfig = kwargs.get("checkerconfig", None)
1134 if checkerconfig is None:
1135 checkerconfig = CheckerConfig()
1136 kwargs["checkerconfig"] = checkerconfig
1137 checkerconfig.update(mozillaconfig)
1138 StandardChecker.__init__(self, **kwargs)
1139
1147
1148 drupalconfig = CheckerConfig(
1149 varmatches=[("%", None), ("@", None), ("!", None)],
1150 )
1151
1153
1155 checkerconfig = kwargs.get("checkerconfig", None)
1156 if checkerconfig is None:
1157 checkerconfig = CheckerConfig()
1158 kwargs["checkerconfig"] = checkerconfig
1159 checkerconfig.update(drupalconfig)
1160 StandardChecker.__init__(self, **kwargs)
1161
1162 gnomeconfig = CheckerConfig(
1163 accelmarkers=["_"],
1164 varmatches=[("%", 1), ("$(", ")")],
1165 credit_sources=[u"translator-credits"],
1166 )
1167
1169
1171 checkerconfig = kwargs.get("checkerconfig", None)
1172 if checkerconfig is None:
1173 checkerconfig = CheckerConfig()
1174 kwargs["checkerconfig"] = checkerconfig
1175 checkerconfig.update(gnomeconfig)
1176 StandardChecker.__init__(self, **kwargs)
1177
1179 """Checks if we have any gconf config settings translated."""
1180 for location in self.locations:
1181 if location.find('schemas.in') != -1:
1182 gconf_attributes = gconf_attribute_re.findall(str1)
1183 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
1184 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2]
1185 if stopwords:
1186 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords)))
1187 return True
1188
1189 kdeconfig = CheckerConfig(
1190 accelmarkers=["&"],
1191 varmatches=[("%", 1)],
1192 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"],
1193 )
1194
1196
1198 # TODO allow setup of KDE plural and translator comments so that they do
1199 # not create false postives
1200 checkerconfig = kwargs.get("checkerconfig", None)
1201 if checkerconfig is None:
1202 checkerconfig = CheckerConfig()
1203 kwargs["checkerconfig"] = checkerconfig
1204 checkerconfig.update(kdeconfig)
1205 StandardChecker.__init__(self, **kwargs)
1206
1207 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")])
1208
1210
1212 checkerconfig = kwargs.get("checkerconfig", None)
1213 if checkerconfig is None:
1214 checkerconfig = CheckerConfig()
1215 kwargs["checkerconfig"] = checkerconfig
1216 checkerconfig.update(cclicenseconfig)
1217 StandardChecker.__init__(self, **kwargs)
1218
1219 projectcheckers = {
1220 "openoffice": OpenOfficeChecker,
1221 "mozilla": MozillaChecker,
1222 "kde": KdeChecker,
1223 "wx": KdeChecker,
1224 "gnome": GnomeChecker,
1225 "creativecommons": CCLicenseChecker,
1226 "drupal": DrupalChecker,
1227 }
1228
1229
1231 """The standard checks for common checks on translation units."""
1232
1236
1240
1242 """Checks for the correct number of noun forms for plural
1243 translations."""
1244 if unit.hasplural():
1245 # if we don't have a valid nplurals value, don't run the test
1246 nplurals = self.config.lang.nplurals
1247 if nplurals > 0:
1248 return len(unit.target.strings) == nplurals
1249 return True
1250
1252 """Checks if there is at least one suggested translation for this
1253 unit."""
1254 self.suggestion_store = getattr(self, 'suggestion_store', None)
1255 suggestions = []
1256 if self.suggestion_store:
1257 suggestions = self.suggestion_store.findunits(unit.source)
1258 elif xliff and isinstance(unit, xliff.xliffunit):
1259 # TODO: we probably want to filter them somehow
1260 suggestions = unit.getalttrans()
1261 return not bool(suggestions)
1262
1263
1265 """verifies that the tests pass for a pair of strings"""
1266 from translate.storage import base
1267 str1 = data.normalized_unicode(str1)
1268 str2 = data.normalized_unicode(str2)
1269 unit = base.TranslationUnit(str1)
1270 unit.target = str2
1271 checker = StandardChecker(excludefilters=ignorelist)
1272 failures = checker.run_filters(unit)
1273 for test in failures:
1274 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2)
1275 return failures
1276
1277
1279 """runs test on a batch of string pairs"""
1280 passed, numpairs = 0, len(pairs)
1281 for str1, str2 in pairs:
1282 if runtests(str1, str2):
1283 passed += 1
1284 print
1285 print "total: %d/%d pairs passed" % (passed, numpairs)
1286
1287 if __name__ == '__main__':
1288 testset = [(r"simple", r"somple"),
1289 (r"\this equals \that", r"does \this equal \that?"),
1290 (r"this \'equals\' that", r"this 'equals' that"),
1291 (r" start and end! they must match.", r"start and end! they must match."),
1292 (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1293 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1294 (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1295 (r"%% %%", r"%%"),
1296 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1297 (r"simple lowercase", r"it is all lowercase"),
1298 (r"simple lowercase", r"It Is All Lowercase"),
1299 (r"Simple First Letter Capitals", r"First Letters"),
1300 (r"SIMPLE CAPITALS", r"First Letters"),
1301 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1302 (r"forgot to translate", r" "),
1303 ]
1304 batchruntests(testset)
1305
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Aug 17 15:50:20 2010 | http://epydoc.sourceforge.net |