| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 from translate.filters import helpers
34 from translate.filters import decoration
35 from translate.filters import prefilters
36 from translate.filters import spelling
37 from translate.lang import factory
38 from translate.lang import data
39 # The import of xliff could fail if the user doesn't have lxml installed. For
40 # now we try to continue gracefully to help users who aren't interested in
41 # support for XLIFF or other XML formats.
42 try:
43 from translate.storage import xliff
44 except ImportError, e:
45 xliff = None
46 # The import of xliff fail silently in the absence of lxml if another module
47 # already tried to import it unsuccessfully, so let's make 100% sure:
48 if not hasattr(xliff, "xliffunit"):
49 xliff = None
50 import re
51
52 # These are some regular expressions that are compiled for use in some tests
53
54 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as
55 # this should capture printf types defined in other platforms.
56 # extended to support Python named format specifiers
57 printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))')
58
59 # The name of the XML tag
60 tagname_re = re.compile("<[\s]*([\w\/]*)")
61
62 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
63 #TODO: remove escaped strings once usage is audited
64 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
65
66 # The whole tag
67 tag_re = re.compile("<[^>]+>")
68
69 gconf_attribute_re = re.compile('"[a-z_]+?"')
70
72 """Returns the name of the XML/HTML tag in string"""
73 return tagname_re.match(string).groups(1)[0]
74
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut
78 by only considering "c" if "b" has already matched."""
79 a, b, c = pair
80 if (b, c) == (None, None):
81 #This is a tagname
82 return pair
83 for pattern in list:
84 x, y, z = pattern
85 if (x, y) in [(a, b), (None, b)]:
86 if z in [None, c]:
87 return pattern
88 return pair
89
91 """Returns all the properties in the XML/HTML tag string as
92 (tagname, propertyname, propertyvalue), but ignore those combinations
93 specified in ignore."""
94 properties = []
95 for string in strings:
96 tag = tagname(string)
97 properties += [(tag, None, None)]
98 #Now we isolate the attribute pairs.
99 pairs = property_re.findall(string)
100 for property, value, a, b in pairs:
101 #Strip the quotes:
102 value = value[1:-1]
103
104 canignore = False
105 if (tag, property, value) in ignore or \
106 intuplelist((tag,property,value), ignore) != (tag,property,value):
107 canignore = True
108 break
109 if not canignore:
110 properties += [(tag, property, value)]
111 return properties
112
113
115 """This exception signals that a Filter didn't pass, and gives an explanation
116 or a comment"""
118 if not isinstance(messages, list):
119 messages = [messages]
120 assert isinstance(messages[0], unicode) # Assumption: all of same type
121 joined = u", ".join(messages)
122 Exception.__init__(self, joined)
123 # Python 2.3 doesn't have .args
124 if not hasattr(self, "args"):
125 self.args = joined
126
128 """This exception signals that a Filter didn't pass, and the bad translation
129 might break an application (so the string will be marked fuzzy)"""
130 pass
131
132 #(tag, attribute, value) specifies a certain attribute which can be changed/
133 #ignored if it exists inside tag. In the case where there is a third element
134 #in the tuple, it indicates a property value that can be ignored if present
135 #(like defaults, for example)
136 #If a certain item is None, it indicates that it is relevant for all values of
137 #the property/tag that is specified as None. A non-None value of "value"
138 #indicates that the value of the attribute must be taken into account.
139 common_ignoretags = [(None, "xml-lang", None)]
140 common_canchangetags = [("img", "alt", None)]
141
143 """object representing the configuration of a checker"""
144 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
145 notranslatewords=None, musttranslatewords=None, validchars=None,
146 punctuation=None, endpunctuation=None, ignoretags=None,
147 canchangetags=None, criticaltests=None, credit_sources=None):
148 # Init lists
149 self.accelmarkers = self._init_list(accelmarkers)
150 self.varmatches = self._init_list(varmatches)
151 self.criticaltests = self._init_list(criticaltests)
152 self.credit_sources = self._init_list(credit_sources)
153 # Lang data
154 self.targetlanguage = targetlanguage
155 self.updatetargetlanguage(targetlanguage)
156 self.sourcelang = factory.getlanguage('en')
157 # Inits with default values
158 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation)
159 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend)
160 self.ignoretags = self._init_default(ignoretags, common_ignoretags)
161 self.canchangetags = self._init_default(canchangetags, common_canchangetags)
162 # Other data
163 # TODO: allow user configuration of untranslatable words
164 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)])
165 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)])
166 validchars = data.normalized_unicode(validchars)
167 self.validcharsmap = {}
168 self.updatevalidchars(validchars)
169
171 """initialise configuration paramaters that are lists
172
173 @type list: List
174 @param list: None (we'll initialise a blank list) or a list paramater
175 @rtype: List
176 """
177 if list is None:
178 list = []
179 return list
180
182 """initialise parameters that can have default options
183
184 @param param: the user supplied paramater value
185 @param default: default values when param is not specified
186 @return: the paramater as specified by the user of the default settings
187 """
188 if param is None:
189 return default
190 return param
191
193 """combines the info in otherconfig into this config object"""
194 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
195 self.updatetargetlanguage(self.targetlanguage)
196 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
197 self.varmatches.extend(otherconfig.varmatches)
198 self.notranslatewords.update(otherconfig.notranslatewords)
199 self.musttranslatewords.update(otherconfig.musttranslatewords)
200 self.validcharsmap.update(otherconfig.validcharsmap)
201 self.punctuation += otherconfig.punctuation
202 self.endpunctuation += otherconfig.endpunctuation
203 #TODO: consider also updating in the following cases:
204 self.ignoretags = otherconfig.ignoretags
205 self.canchangetags = otherconfig.canchangetags
206 self.criticaltests.extend(otherconfig.criticaltests)
207 self.credit_sources = otherconfig.credit_sources
208
210 """updates the map that eliminates valid characters"""
211 if validchars is None:
212 return True
213 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)])
214 self.validcharsmap.update(validcharsmap)
215
217 """Updates the target language in the config to the given target language"""
218 self.lang = factory.getlanguage(langcode)
219
221 def cached_f(self, param1):
222 key = (f.__name__, param1)
223 res_cache = self.results_cache
224 if key in res_cache:
225 return res_cache[key]
226 else:
227 value = f(self, param1)
228 res_cache[key] = value
229 return value
230 return cached_f
231
233 """Parent Checker class which does the checking based on functions available
234 in derived classes."""
235 preconditions = {}
236
237 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
238 self.errorhandler = errorhandler
239 if checkerconfig is None:
240 self.setconfig(CheckerConfig())
241 else:
242 self.setconfig(checkerconfig)
243 # exclude functions defined in UnitChecker from being treated as tests...
244 self.helperfunctions = {}
245 for functionname in dir(UnitChecker):
246 function = getattr(self, functionname)
247 if callable(function):
248 self.helperfunctions[functionname] = function
249 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
250
251 self.results_cache = {}
252
254 """returns dictionary of available filters, including/excluding those in
255 the given lists"""
256 filters = {}
257 if limitfilters is None:
258 # use everything available unless instructed
259 limitfilters = dir(self)
260 if excludefilters is None:
261 excludefilters = {}
262 for functionname in limitfilters:
263 if functionname in excludefilters: continue
264 if functionname in self.helperfunctions: continue
265 if functionname == "errorhandler": continue
266 filterfunction = getattr(self, functionname, None)
267 if not callable(filterfunction): continue
268 filters[functionname] = filterfunction
269 return filters
270
272 """sets the accelerator list"""
273 self.config = config
274 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
275 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
276 for startmatch, endmatch in self.config.varmatches]
277 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone)
278 for startmatch, endmatch in self.config.varmatches]
279
281 """Sets the filename that a checker should use for evaluating suggestions."""
282 self.suggestion_store = store
283 if self.suggestion_store:
284 self.suggestion_store.require_index()
285
289 filtervariables = cache_results(filtervariables)
290
294 removevariables = cache_results(removevariables)
295
297 """filter out accelerators from str1"""
298 return helpers.multifilter(str1, self.accfilters, None)
299 filteraccelerators = cache_results(filteraccelerators)
300
302 """filter out accelerators from str1"""
303 return helpers.multifilter(str1, self.accfilters, acceptlist)
304
306 """replaces words with punctuation with their unpunctuated equivalents"""
307 return prefilters.filterwordswithpunctuation(str1)
308 filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
309
313 filterxml = cache_results(filterxml)
314
316 """Runs the given test on the given unit.
317
318 Note that this can raise a FilterFailure as part of normal operation"""
319 return test(unit)
320
322 """run all the tests in this suite, return failures as testname, message_or_exception"""
323 self.results_cache = {}
324 failures = {}
325 ignores = self.config.lang.ignoretests[:]
326 functionnames = self.defaultfilters.keys()
327 priorityfunctionnames = self.preconditions.keys()
328 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
329 for functionname in priorityfunctionnames + otherfunctionnames:
330 if functionname in ignores:
331 continue
332 filterfunction = getattr(self, functionname, None)
333 # this filterfunction may only be defined on another checker if using TeeChecker
334 if filterfunction is None:
335 continue
336 filtermessage = filterfunction.__doc__
337 try:
338 filterresult = self.run_test(filterfunction, unit)
339 except FilterFailure, e:
340 filterresult = False
341 filtermessage = e.args[0]
342 except Exception, e:
343 if self.errorhandler is None:
344 raise ValueError("error in filter %s: %r, %r, %s" % \
345 (functionname, unit.source, unit.target, e))
346 else:
347 filterresult = self.errorhandler(functionname, unit.source, unit.target, e)
348 if not filterresult:
349 # we test some preconditions that aren't actually a cause for failure
350 if functionname in self.defaultfilters:
351 failures[functionname] = filtermessage
352 if functionname in self.preconditions:
353 for ignoredfunctionname in self.preconditions[functionname]:
354 ignores.append(ignoredfunctionname)
355 self.results_cache = {}
356 return failures
357
359 """A checker that passes source and target strings to the checks, not the
360 whole unit.
361
362 This provides some speedup and simplifies testing."""
363 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
364 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
365
367 """Runs the given test on the given unit.
368
369 Note that this can raise a FilterFailure as part of normal operation."""
370 if self.hasplural:
371 filtermessages = []
372 filterresult = True
373 for pluralform in unit.target.strings:
374 try:
375 if not test(self.str1, unicode(pluralform)):
376 filterresult = False
377 except FilterFailure, e:
378 filterresult = False
379 filtermessages.append( unicode(e.args) )
380 if not filterresult and filtermessages:
381 raise FilterFailure(filtermessages)
382 else:
383 return filterresult
384 else:
385 return test(self.str1, self.str2)
386
388 """Do some optimisation by caching some data of the unit for the benefit
389 of run_test()."""
390 self.str1 = data.normalized_unicode(unit.source)
391 self.str2 = data.normalized_unicode(unit.target)
392 self.hasplural = unit.hasplural()
393 self.locations = unit.getlocations()
394 return super(TranslationChecker, self).run_filters(unit)
395
397 """A Checker that controls multiple checkers."""
398 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None,
399 checkerclasses=None, errorhandler=None, languagecode=None):
400 """construct a TeeChecker from the given checkers"""
401 self.limitfilters = limitfilters
402 if checkerclasses is None:
403 checkerclasses = [StandardChecker]
404 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
405 if languagecode:
406 for checker in self.checkers:
407 checker.config.updatetargetlanguage(languagecode)
408 # Let's hook up the language specific checker
409 lang_checker = self.checkers[0].config.lang.checker
410 if lang_checker:
411 self.checkers.append(lang_checker)
412
413 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
414 self.config = checkerconfig or self.checkers[0].config
415
417 """returns dictionary of available filters, including/excluding those in
418 the given lists"""
419 if excludefilters is None:
420 excludefilters = {}
421 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
422 self.combinedfilters = {}
423 for filters in filterslist:
424 self.combinedfilters.update(filters)
425 # TODO: move this somewhere more sensible (a checkfilters method?)
426 if limitfilters is not None:
427 for filtername in limitfilters:
428 if not filtername in self.combinedfilters:
429 import sys
430 print >> sys.stderr, "warning: could not find filter %s" % filtername
431 return self.combinedfilters
432
434 """run all the tests in the checker's suites"""
435 failures = {}
436 for checker in self.checkers:
437 failures.update(checker.run_filters(unit))
438 return failures
439
441 """Sets the filename that a checker should use for evaluating suggestions."""
442 for checker in self.checkers:
443 checker.setsuggestionstore(store)
444
445
447 """The basic test suite for source -> target translations."""
449 """checks whether a string has been translated at all"""
450 str2 = prefilters.removekdecomments(str2)
451 return not (len(str1.strip()) > 0 and len(str2) == 0)
452
454 """checks whether a translation is basically identical to the original string"""
455 str1 = self.filteraccelerators(self.removevariables(str1)).strip()
456 str2 = self.filteraccelerators(self.removevariables(str2)).strip()
457 if len(str1) < 2:
458 return True
459 # If the whole string is upperase, or nothing in the string can go
460 # towards uppercase, let's assume there is nothing translatable
461 # TODO: reconsider
462 if (str1.isupper() or str1.upper() == str1) and str1 == str2:
463 return True
464 if self.config.notranslatewords:
465 words1 = str1.split()
466 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
467 #currently equivalent to:
468 # if len(words1) == 1 and words1[0] in self.config.notranslatewords:
469 #why do we only test for one notranslate word?
470 return True
471 # we could also check for things like str1.isnumeric(), but the test
472 # above (str1.upper() == str1) makes this unnecessary
473 if str1.lower() == str2.lower():
474 raise FilterFailure(u"please translate")
475 return True
476
478 """checks whether a translation only contains spaces"""
479 len1 = len(str1.strip())
480 len2 = len(str2.strip())
481 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
482
484 """checks whether a translation is much shorter than the original string"""
485 len1 = len(str1.strip())
486 len2 = len(str2.strip())
487 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
488
490 """checks whether a translation is much longer than the original string"""
491 len1 = len(str1.strip())
492 len2 = len(str2.strip())
493 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
494
496 """checks whether escaping is consistent between the two strings"""
497 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")):
498 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word])
499 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word])
500 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
501 else:
502 return True
503
505 """checks whether newlines are consistent between the two strings"""
506 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")):
507 raise FilterFailure(u"line endings in original don't match line endings in translation")
508 else:
509 return True
510
512 """checks whether tabs are consistent between the two strings"""
513 if not helpers.countmatch(str1, str2, "\t"):
514 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation")
515 else:
516 return True
517
519 """checks whether singlequoting is consistent between the two strings"""
520 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
521 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
522 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
523
525 """checks whether doublequoting is consistent between the two strings"""
526 str1 = self.filteraccelerators(self.filtervariables(str1))
527 str1 = self.filterxml(str1)
528 str1 = self.config.lang.punctranslate(str1)
529 str2 = self.filteraccelerators(self.filtervariables(str2))
530 str2 = self.filterxml(str2)
531 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
532
534 """checks for bad double-spaces by comparing to original"""
535 str1 = self.filteraccelerators(str1)
536 str2 = self.filteraccelerators(str2)
537 return helpers.countmatch(str1, str2, u" ")
538
540 """checks for bad spacing after punctuation"""
541 if str1.find(u" ") == -1:
542 return True
543 str1 = self.filteraccelerators(self.filtervariables(str1))
544 str1 = self.config.lang.punctranslate(str1)
545 str2 = self.filteraccelerators(self.filtervariables(str2))
546 for puncchar in self.config.punctuation:
547 plaincount1 = str1.count(puncchar)
548 plaincount2 = str2.count(puncchar)
549 if not plaincount1 or plaincount1 != plaincount2:
550 continue
551 spacecount1 = str1.count(puncchar + u" ")
552 spacecount2 = str2.count(puncchar + u" ")
553 if spacecount1 != spacecount2:
554 # handle extra spaces that are because of transposed punctuation
555 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1:
556 continue
557 return False
558 return True
559
561 """checks whether printf format strings match"""
562 count1 = count2 = plural = None
563 # self.hasplural only set by run_filters, not always available
564 if 'hasplural' in self.__dict__:
565 plural = self.hasplural
566 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
567 count2 = var_num2 + 1
568 str2key = match2.group('key')
569 if match2.group('ord'):
570 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
571 count1 = var_num1 + 1
572 if int(match2.group('ord')) == var_num1 + 1:
573 if match2.group('fullvar') != match1.group('fullvar'):
574 return 0
575 elif str2key:
576 str1key = None
577 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
578 count1 = var_num1 + 1
579 if match1.group('key') and str2key == match1.group('key'):
580 str1key = match1.group('key')
581 # '%.0s' "placeholder" in plural will match anything
582 if plural and match2.group('fullvar') == '.0s':
583 continue
584 if match1.group('fullvar') != match2.group('fullvar'):
585 return 0
586 if str1key == None:
587 return 0
588 else:
589 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
590 count1 = var_num1 + 1
591 # '%.0s' "placeholder" in plural will match anything
592 if plural and match2.group('fullvar') == '.0s':
593 continue
594 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
595 return 0
596
597 if count2 is None:
598 if list(printf_pat.finditer(str1)):
599 return 0
600
601 if (count1 or count2) and (count1 != count2):
602 return 0
603 return 1
604
606 """checks whether accelerators are consistent between the two strings"""
607 str1 = self.filtervariables(str1)
608 str2 = self.filtervariables(str2)
609 messages = []
610 for accelmarker in self.config.accelmarkers:
611 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
612 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
613 count1, countbad1 = counter1(str1)
614 count2, countbad2 = counter2(str2)
615 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
616 accel2, bad2 = getaccel(str2)
617 if count1 == count2:
618 continue
619 if count1 == 1 and count2 == 0:
620 if countbad2 == 1:
621 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
622 else:
623 messages.append(u"accelerator %s is missing from translation" % accelmarker)
624 elif count1 == 0:
625 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker)
626 elif count1 == 1 and count2 > count1:
627 messages.append(u"accelerator %s is repeated in translation" % accelmarker)
628 else:
629 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
630 if messages:
631 if "accelerators" in self.config.criticaltests:
632 raise SeriousFilterFailure(messages)
633 else:
634 raise FilterFailure(messages)
635 return True
636
637 # def acceleratedvariables(self, str1, str2):
638 # """checks that no variables are accelerated"""
639 # messages = []
640 # for accelerator in self.config.accelmarkers:
641 # for variablestart, variableend in self.config.varmatches:
642 # error = accelerator + variablestart
643 # if str1.find(error) >= 0:
644 # messages.append(u"original has an accelerated variable")
645 # if str2.find(error) >= 0:
646 # messages.append(u"translation has an accelerated variable")
647 # if messages:
648 # raise FilterFailure(messages)
649 # return True
650
652 """checks whether variables of various forms are consistent between the two strings"""
653 messages = []
654 mismatch1, mismatch2 = [], []
655 varnames1, varnames2 = [], []
656 for startmarker, endmarker in self.config.varmatches:
657 varchecker = decoration.getvariables(startmarker, endmarker)
658 if startmarker and endmarker:
659 if isinstance(endmarker, int):
660 redecorate = lambda var: startmarker + var
661 else:
662 redecorate = lambda var: startmarker + var + endmarker
663 elif startmarker:
664 redecorate = lambda var: startmarker + var
665 else:
666 redecorate = lambda var: var
667 vars1 = varchecker(str1)
668 vars2 = varchecker(str2)
669 if vars1 != vars2:
670 # we use counts to compare so we can handle multiple variables
671 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
672 # filter variable names we've already seen, so they aren't matched by more than one filter...
673 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
674 varnames1.extend(vars1)
675 varnames2.extend(vars2)
676 vars1 = map(redecorate, vars1)
677 vars2 = map(redecorate, vars2)
678 mismatch1.extend(vars1)
679 mismatch2.extend(vars2)
680 if mismatch1:
681 messages.append(u"do not translate: %s" % u", ".join(mismatch1))
682 elif mismatch2:
683 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2))
684 if messages and mismatch1:
685 raise SeriousFilterFailure(messages)
686 elif messages:
687 raise FilterFailure(messages)
688 return True
689
691 """checks that function names are not translated"""
692 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
693
695 """checks that emails are not translated"""
696 return helpers.funcmatch(str1, str2, decoration.getemails)
697
699 """checks that URLs are not translated"""
700 return helpers.funcmatch(str1, str2, decoration.geturls)
701
703 """checks whether numbers of various forms are consistent between the two strings"""
704 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
705
707 """checks whether whitespace at the beginning of the strings matches"""
708 return helpers.funcmatch(str1, str2, decoration.spacestart)
709
711 """checks whether whitespace at the end of the strings matches"""
712 str1 = self.config.lang.punctranslate(str1)
713 return helpers.funcmatch(str1, str2, decoration.spaceend)
714
716 """checks whether punctuation at the beginning of the strings match"""
717 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
718 str1 = self.config.lang.punctranslate(str1)
719 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
720 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
721
723 """checks whether punctuation at the end of the strings match"""
724 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
725 str1 = self.config.lang.punctranslate(str1)
726 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
727 str1 = str1.rstrip()
728 str2 = str2.rstrip()
729 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
730
732 """checks that strings that are purely punctuation are not changed"""
733 # this test is a subset of startandend
734 if (decoration.ispurepunctuation(str1)):
735 return str1 == str2
736 else:
737 return not decoration.ispurepunctuation(str2)
738
740 """checks that the number of brackets in both strings match"""
741 str1 = self.filtervariables(str1)
742 str2 = self.filtervariables(str2)
743 messages = []
744 missing = []
745 extra = []
746 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"):
747 count1 = str1.count(bracket)
748 count2 = str2.count(bracket)
749 if count2 < count1:
750 missing.append(u"'%s'" % bracket)
751 elif count2 > count1:
752 extra.append(u"'%s'" % bracket)
753 if missing:
754 messages.append(u"translation is missing %s" % u", ".join(missing))
755 if extra:
756 messages.append(u"translation has extra %s" % u", ".join(extra))
757 if messages:
758 raise FilterFailure(messages)
759 return True
760
762 """checks that the number of sentences in both strings match"""
763 str1 = self.filteraccelerators(str1)
764 str2 = self.filteraccelerators(str2)
765 sentences1 = len(self.config.sourcelang.sentences(str1))
766 sentences2 = len(self.config.lang.sentences(str2))
767 if not sentences1 == sentences2:
768 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2))
769 return True
770
772 """checks that options are not translated"""
773 str1 = self.filtervariables(str1)
774 for word1 in str1.split():
775 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum():
776 parts = word1.split(u"=")
777 if not parts[0] in str2:
778 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0])
779 if len(parts) > 1 and parts[1] in str2:
780 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]})
781 return True
782
784 """checks that the message starts with the correct capitalisation"""
785 str1 = self.filteraccelerators(str1)
786 str2 = self.filteraccelerators(str2)
787 if len(str1) > 1 and len(str2) > 1:
788 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
789 if len(str1) == 0 and len(str2) == 0:
790 return True
791 if len(str1) == 0 or len(str2) == 0:
792 return False
793 return True
794
796 """checks the capitalisation of two strings isn't wildly different"""
797 str1 = self.removevariables(str1)
798 str2 = self.removevariables(str2)
799 # TODO: review this. The 'I' is specific to English, so it probably serves
800 # no purpose to get sourcelang.sentenceend
801 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1)
802 capitals1 = helpers.filtercount(str1, unicode.isupper)
803 capitals2 = helpers.filtercount(str2, unicode.isupper)
804 alpha1 = helpers.filtercount(str1, unicode.isalpha)
805 alpha2 = helpers.filtercount(str2, unicode.isalpha)
806 # Capture the all caps case
807 if capitals1 == alpha1:
808 return capitals2 == alpha2
809 # some heuristic tests to try and see that the style of capitals is vaguely the same
810 if capitals1 == 0 or capitals1 == 1:
811 return capitals2 == capitals1
812 elif capitals1 < len(str1) / 10:
813 return capitals2 <= len(str2) / 8
814 elif len(str1) < 10:
815 return abs(capitals1 - capitals2) < 3
816 elif capitals1 > len(str1) * 6 / 10:
817 return capitals2 > len(str2) * 6 / 10
818 else:
819 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
820
822 """checks that acronyms that appear are unchanged"""
823 acronyms = []
824 allowed = []
825 for startmatch, endmatch in self.config.varmatches:
826 allowed += decoration.getvariables(startmatch, endmatch)(str1)
827 allowed += self.config.musttranslatewords.keys()
828 str1 = self.filteraccelerators(self.filtervariables(str1))
829 iter = self.config.lang.word_iter(str1)
830 str2 = self.filteraccelerators(self.filtervariables(str2))
831 #TODO: strip XML? - should provide better error messsages
832 # see mail/chrome/messanger/smime.properties.po
833 #TODO: consider limiting the word length for recognising acronyms to
834 #something like 5/6 characters
835 for word in iter:
836 if word.isupper() and len(word) > 1 and word not in allowed:
837 if str2.find(word) == -1:
838 acronyms.append(word)
839 if acronyms:
840 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms))
841 return True
842
844 """checks for repeated words in the translation"""
845 lastword = ""
846 without_newlines = "\n".join(str2.split("\n"))
847 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split()
848 for word in words:
849 if word == lastword and word not in self.config.lang.validdoublewords:
850 raise FilterFailure(u"The word '%s' is repeated" % word)
851 lastword = word
852 return True
853
855 """checks that words configured as untranslatable appear in the translation too"""
856 if not self.config.notranslatewords:
857 return True
858 str1 = self.filtervariables(str1)
859 str2 = self.filtervariables(str2)
860 #The above is full of strange quotes and things in utf-8 encoding.
861 #single apostrophe perhaps problematic in words like "doesn't"
862 for seperator in self.config.punctuation:
863 str1 = str1.replace(seperator, u" ")
864 str2 = str2.replace(seperator, u" ")
865 words1 = self.filteraccelerators(str1).split()
866 words2 = self.filteraccelerators(str2).split()
867 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
868 if stopwords:
869 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords)))
870 return True
871
873 """checks that words configured as definitely translatable don't appear in
874 the translation"""
875 if not self.config.musttranslatewords:
876 return True
877 str1 = self.removevariables(str1)
878 str2 = self.removevariables(str2)
879 #The above is full of strange quotes and things in utf-8 encoding.
880 #single apostrophe perhaps problematic in words like "doesn't"
881 for seperator in self.config.punctuation:
882 str1 = str1.replace(seperator, u" ")
883 str2 = str2.replace(seperator, u" ")
884 words1 = self.filteraccelerators(str1).split()
885 words2 = self.filteraccelerators(str2).split()
886 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
887 if stopwords:
888 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords)))
889 return True
890
892 """checks that only characters specified as valid appear in the translation"""
893 if not self.config.validcharsmap:
894 return True
895 invalid1 = str1.translate(self.config.validcharsmap)
896 invalid2 = str2.translate(self.config.validcharsmap)
897 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
898 if invalidchars:
899 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
900 return True
901
903 """checks that file paths have not been translated"""
904 for word1 in self.filteraccelerators(str1).split():
905 if word1.startswith(u"/"):
906 if not helpers.countsmatch(str1, str2, (word1,)):
907 return False
908 return True
909
936
938 """checks to ensure that no KDE style comments appear in the translation"""
939 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
940
942 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
943 return str2.find(u"#-#-#-#-#") == -1
944
946 """checks for English style plural(s) for you to review"""
947 def numberofpatterns(string, patterns):
948 number = 0
949 for pattern in patterns:
950 number += len(re.findall(pattern, string))
951 return number
952
953 sourcepatterns = ["\(s\)"]
954 targetpatterns = ["\(s\)"]
955 sourcecount = numberofpatterns(str1, sourcepatterns)
956 targetcount = numberofpatterns(str2, targetpatterns)
957 if self.config.lang.nplurals == 1:
958 return not targetcount
959 return sourcecount == targetcount
960
962 """checks words that don't pass a spell check"""
963 if not self.config.targetlanguage:
964 return True
965 if not spelling.available:
966 return True
967 # TODO: filterxml?
968 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel)
969 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel)
970 ignore1 = []
971 messages = []
972 for word, index, suggestions in spelling.check(str1, lang="en"):
973 ignore1.append(word)
974 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
975 if word in self.config.notranslatewords:
976 continue
977 if word in ignore1:
978 continue
979 # hack to ignore hyphenisation rules
980 if word in suggestions:
981 continue
982 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5])))
983 if messages:
984 raise FilterFailure(messages)
985 return True
986
988 """checks for messages containing translation credits instead of normal translations."""
989 return not str1 in self.config.credit_sources
990
991 # If the precondition filter is run and fails then the other tests listed are ignored
992 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
993 "accelerators", "brackets", "endpunc",
994 "acronyms", "xmltags", "startpunc",
995 "endwhitespace", "startwhitespace",
996 "escapes", "doublequoting", "singlequoting",
997 "filepaths", "purepunc", "doublespacing",
998 "sentencecount", "numbers", "isfuzzy",
999 "isreview", "notranslatewords", "musttranslatewords",
1000 "emails", "simpleplurals", "urls", "printf",
1001 "tabs", "newlines", "functions", "options",
1002 "blank", "nplurals", "gconf"),
1003 "blank": ("simplecaps", "variables", "startcaps",
1004 "accelerators", "brackets", "endpunc",
1005 "acronyms", "xmltags", "startpunc",
1006 "endwhitespace", "startwhitespace",
1007 "escapes", "doublequoting", "singlequoting",
1008 "filepaths", "purepunc", "doublespacing",
1009 "sentencecount", "numbers", "isfuzzy",
1010 "isreview", "notranslatewords", "musttranslatewords",
1011 "emails", "simpleplurals", "urls", "printf",
1012 "tabs", "newlines", "functions", "options",
1013 "gconf"),
1014 "credits": ("simplecaps", "variables", "startcaps",
1015 "accelerators", "brackets", "endpunc",
1016 "acronyms", "xmltags", "startpunc",
1017 "escapes", "doublequoting", "singlequoting",
1018 "filepaths", "doublespacing",
1019 "sentencecount", "numbers",
1020 "emails", "simpleplurals", "urls", "printf",
1021 "tabs", "newlines", "functions", "options"),
1022 "purepunc": ("startcaps", "options"),
1023 # This is causing some problems since Python 2.6, as
1024 # startcaps is now seen as an important one to always execute
1025 # and could now be done before it is blocked by a failing
1026 # "untranslated" or "blank" test. This is probably happening
1027 # due to slightly different implementation of the internal
1028 # dict handling since Python 2.6. We should never have relied
1029 # on this ordering anyway.
1030 #"startcaps": ("simplecaps",),
1031 "endwhitespace": ("endpunc",),
1032 "startwhitespace":("startpunc",),
1033 "unchanged": ("doublewords",),
1034 "compendiumconflicts": ("accelerators", "brackets", "escapes",
1035 "numbers", "startpunc", "long", "variables",
1036 "startcaps", "sentencecount", "simplecaps",
1037 "doublespacing", "endpunc", "xmltags",
1038 "startwhitespace", "endwhitespace",
1039 "singlequoting", "doublequoting",
1040 "filepaths", "purepunc", "doublewords", "printf") }
1041
1042 # code to actually run the tests (use unittest?)
1043
1044 openofficeconfig = CheckerConfig(
1045 accelmarkers = ["~"],
1046 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
1047 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)],
1048 canchangetags = [("link", "name", None)]
1049 )
1050
1053 checkerconfig = kwargs.get("checkerconfig", None)
1054 if checkerconfig is None:
1055 checkerconfig = CheckerConfig()
1056 kwargs["checkerconfig"] = checkerconfig
1057 checkerconfig.update(openofficeconfig)
1058 StandardChecker.__init__(self, **kwargs)
1059
1060 mozillaconfig = CheckerConfig(
1061 accelmarkers = ["&"],
1062 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")],
1063 criticaltests = ["accelerators"]
1064 )
1065
1068 checkerconfig = kwargs.get("checkerconfig", None)
1069 if checkerconfig is None:
1070 checkerconfig = CheckerConfig()
1071 kwargs["checkerconfig"] = checkerconfig
1072 checkerconfig.update(mozillaconfig)
1073 StandardChecker.__init__(self, **kwargs)
1074
1081
1082 drupalconfig = CheckerConfig(
1083 varmatches = [("%", None), ("@", None), ("!", None)],
1084 )
1085
1088 checkerconfig = kwargs.get("checkerconfig", None)
1089 if checkerconfig is None:
1090 checkerconfig = CheckerConfig()
1091 kwargs["checkerconfig"] = checkerconfig
1092 checkerconfig.update(drupalconfig)
1093 StandardChecker.__init__(self, **kwargs)
1094
1095 gnomeconfig = CheckerConfig(
1096 accelmarkers = ["_"],
1097 varmatches = [("%", 1), ("$(", ")")],
1098 credit_sources = [u"translator-credits"]
1099 )
1100
1103 checkerconfig = kwargs.get("checkerconfig", None)
1104 if checkerconfig is None:
1105 checkerconfig = CheckerConfig()
1106 kwargs["checkerconfig"] = checkerconfig
1107 checkerconfig.update(gnomeconfig)
1108 StandardChecker.__init__(self, **kwargs)
1109
1111 """Checks if we have any gconf config settings translated."""
1112 for location in self.locations:
1113 if location.find('schemas.in') != -1:
1114 gconf_attributes = gconf_attribute_re.findall(str1)
1115 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
1116 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2]
1117 if stopwords:
1118 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords)))
1119 return True
1120
1121 kdeconfig = CheckerConfig(
1122 accelmarkers = ["&"],
1123 varmatches = [("%", 1)],
1124 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"]
1125 )
1126
1129 # TODO allow setup of KDE plural and translator comments so that they do
1130 # not create false postives
1131 checkerconfig = kwargs.get("checkerconfig", None)
1132 if checkerconfig is None:
1133 checkerconfig = CheckerConfig()
1134 kwargs["checkerconfig"] = checkerconfig
1135 checkerconfig.update(kdeconfig)
1136 StandardChecker.__init__(self, **kwargs)
1137
1138 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1141 checkerconfig = kwargs.get("checkerconfig", None)
1142 if checkerconfig is None:
1143 checkerconfig = CheckerConfig()
1144 kwargs["checkerconfig"] = checkerconfig
1145 checkerconfig.update(cclicenseconfig)
1146 StandardChecker.__init__(self, **kwargs)
1147
1148 projectcheckers = {
1149 "openoffice": OpenOfficeChecker,
1150 "mozilla": MozillaChecker,
1151 "kde": KdeChecker,
1152 "wx": KdeChecker,
1153 "gnome": GnomeChecker,
1154 "creativecommons": CCLicenseChecker,
1155 "drupal": DrupalChecker,
1156 }
1157
1158
1160 """The standard checks for common checks on translation units."""
1164
1168
1170 """Checks for the correct number of noun forms for plural translations."""
1171 if unit.hasplural():
1172 # if we don't have a valid nplurals value, don't run the test
1173 nplurals = self.config.lang.nplurals
1174 if nplurals > 0:
1175 return len(unit.target.strings) == nplurals
1176 return True
1177
1179 """Checks if there is at least one suggested translation for this unit."""
1180 self.suggestion_store = getattr(self, 'suggestion_store', None)
1181 suggestions = []
1182 if self.suggestion_store:
1183 suggestions = self.suggestion_store.findunits(unit.source)
1184 elif xliff and isinstance(unit, xliff.xliffunit):
1185 # TODO: we probably want to filter them somehow
1186 suggestions = unit.getalttrans()
1187 return not bool(suggestions)
1188
1189
1191 """verifies that the tests pass for a pair of strings"""
1192 from translate.storage import base
1193 str1 = data.normalized_unicode(str1)
1194 str2 = data.normalized_unicode(str2)
1195 unit = base.TranslationUnit(str1)
1196 unit.target = str2
1197 checker = StandardChecker(excludefilters=ignorelist)
1198 failures = checker.run_filters(unit)
1199 for test in failures:
1200 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2)
1201 return failures
1202
1204 """runs test on a batch of string pairs"""
1205 passed, numpairs = 0, len(pairs)
1206 for str1, str2 in pairs:
1207 if runtests(str1, str2):
1208 passed += 1
1209 print
1210 print "total: %d/%d pairs passed" % (passed, numpairs)
1211
1212 if __name__ == '__main__':
1213 testset = [(r"simple", r"somple"),
1214 (r"\this equals \that", r"does \this equal \that?"),
1215 (r"this \'equals\' that", r"this 'equals' that"),
1216 (r" start and end! they must match.", r"start and end! they must match."),
1217 (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1218 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1219 (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1220 (r"%% %%", r"%%"),
1221 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1222 (r"simple lowercase", r"it is all lowercase"),
1223 (r"simple lowercase", r"It Is All Lowercase"),
1224 (r"Simple First Letter Capitals", r"First Letters"),
1225 (r"SIMPLE CAPITALS", r"First Letters"),
1226 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1227 (r"forgot to translate", r" ")
1228 ]
1229 batchruntests(testset)
1230
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed Nov 25 17:19:17 2009 | http://epydoc.sourceforge.net |