1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': ('Afrikaans', 2, '(n != 1)'),
31 'ak': ('Akan', 2, 'n > 1'),
32 'am': ('Amharic', 2, 'n > 1'),
33 'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5'),
34 'arn': ('Mapudungun; Mapuche', 2, 'n > 1'),
35 'az': ('Azerbaijani', 2, '(n != 1)'),
36 'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
37 'bg': ('Bulgarian', 2, '(n != 1)'),
38 'bn': ('Bengali', 2, '(n != 1)'),
39 'bn_IN': ('Bengali (India)', 2, '(n != 1)'),
40 'bo': ('Tibetan', 1, '0'),
41 'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
42 'ca': ('Catalan; Valencian', 2, '(n != 1)'),
43 'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
44 'csb': ('Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
45 'cy': ('Welsh', 2, '(n==2) ? 1 : 0'),
46 'da': ('Danish', 2, '(n != 1)'),
47 'de': ('German', 2, '(n != 1)'),
48 'dz': ('Dzongkha', 1, '0'),
49 'el': ('Greek', 2, '(n != 1)'),
50 'en': ('English', 2, '(n != 1)'),
51 'en_GB': ('English (United Kingdom)', 2, '(n != 1)'),
52 'en_ZA': ('English (South Africa)', 2, '(n != 1)'),
53 'eo': ('Esperanto', 2, '(n != 1)'),
54 'es': ('Spanish; Castilian', 2, '(n != 1)'),
55 'et': ('Estonian', 2, '(n != 1)'),
56 'eu': ('Basque', 2, '(n != 1)'),
57 'fa': ('Persian', 1, '0'),
58 'fi': ('Finnish', 2, '(n != 1)'),
59 'fil': ('Filipino; Pilipino', 2, '(n > 1)'),
60 'fo': ('Faroese', 2, '(n != 1)'),
61 'fr': ('French', 2, '(n > 1)'),
62 'fur': ('Friulian', 2, '(n != 1)'),
63 'fy': ('Frisian', 2, '(n != 1)'),
64 'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
65 'gl': ('Galician', 2, '(n != 1)'),
66 'gu': ('Gujarati', 2, '(n != 1)'),
67 'gun': ('Gun', 2, '(n > 1)'),
68 'ha': ('Hausa', 2, '(n != 1)'),
69 'he': ('Hebrew', 2, '(n != 1)'),
70 'hi': ('Hindi', 2, '(n != 1)'),
71 'hy': ('Armenian', 1, '0'),
72 'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
73 'hu': ('Hungarian', 2, '(n != 1)'),
74 'id': ('Indonesian', 1, '0'),
75 'is': ('Icelandic', 2, '(n != 1)'),
76 'it': ('Italian', 2, '(n != 1)'),
77 'ja': ('Japanese', 1, '0'),
78 'jv': ('Javanese', 2, '(n != 1)'),
79 'ka': ('Georgian', 1, '0'),
80 'km': ('Khmer', 1, '0'),
81 'kn': ('Kannada', 2, '(n != 1)'),
82 'ko': ('Korean', 1, '0'),
83 'ku': ('Kurdish', 2, '(n != 1)'),
84 'kw': ('Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
85 'ky': ('Kirghiz; Kyrgyz', 1, '0'),
86 'lb': ('Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
87 'ln': ('Lingala', 2, '(n > 1)'),
88 'lo': ('Lao', 1, '0'),
89 'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
90 'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
91 'mg': ('Malagasy', 2, '(n > 1)'),
92 'mi': ('Maori', 2, '(n > 1)'),
93 'mk': ('Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
94 'ml': ('Malayalam', 2, '(n != 1)'),
95 'mn': ('Mongolian', 2, '(n != 1)'),
96 'mr': ('Marathi', 2, '(n != 1)'),
97 'ms': ('Malay', 1, '0'),
98 'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
99 'nah': ('Nahuatl languages', 2, '(n != 1)'),
100 'nap': ('Neapolitan', 2, '(n != 1)'),
101 'nb': ('Norwegian Bokmal', 2, '(n != 1)'),
102 'ne': ('Nepali', 2, '(n != 1)'),
103 'nl': ('Dutch; Flemish', 2, '(n != 1)'),
104 'nn': ('Norwegian Nynorsk', 2, '(n != 1)'),
105 'nso': ('Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
106 'or': ('Oriya', 2, '(n != 1)'),
107 'pa': ('Panjabi; Punjabi', 2, '(n != 1)'),
108 'pap': ('Papiamento', 2, '(n != 1)'),
109 'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
110 'pms': ('Piemontese', 2, '(n != 1)'),
111 'ps': ('Pushto; Pashto', 2, '(n != 1)'),
112 'pt': ('Portuguese', 2, '(n != 1)'),
113 'pt_BR': ('Portuguese (Brazil)', 2, '(n > 1)'),
114 'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
115 'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
116 'sco': ('Scots', 2, '(n != 1)'),
117 'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
118 'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
119 'so': ('Somali', 2, '(n != 1)'),
120 'sq': ('Albanian', 2, '(n != 1)'),
121 'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
122 'su': ('Sundanese', 1, '0'),
123 'sv': ('Swedish', 2, '(n != 1)'),
124 'ta': ('Tamil', 2, '(n != 1)'),
125 'te': ('Telugu', 2, '(n != 1)'),
126 'tg': ('Tajik', 2, '(n != 1)'),
127 'ti': ('Tigrinya', 2, '(n > 1)'),
128 'th': ('Thai', 1, '0'),
129 'tk': ('Turkmen', 2, '(n != 1)'),
130 'tr': ('Turkish', 1, '0'),
131 'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
132 'vi': ('Vietnamese', 1, '0'),
133 'wa': ('Walloon', 2, '(n > 1)'),
134
135
136
137 'zh_CN': ('Chinese (China)', 1, '0'),
138 'zh_HK': ('Chinese (Hong Kong)', 1, '0'),
139 'zh_TW': ('Chinese (Taiwan)', 1, '0'),
140 }
141 """Dictionary of language data.
142 The language code is the dictionary key (which may contain country codes and modifiers).
143 The value is a tuple: (Full name in English, nplurals, plural equation)"""
144
146 """This attempts to simplify the given language code by ignoring country
147 codes, for example.
148
149 @see:
150 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
151 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
152 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
153 - U{http://www.w3.org/International/articles/language-tags/}
154 """
155 if not code:
156 return code
157
158 normalized = normalize_code(code)
159 separator = normalized.rfind('-')
160 if separator >= 0:
161 return code[:separator]
162 else:
163 return ""
164
165
166 expansion_factors = {
167 'af': 0.1,
168 'ar': -0.09,
169 'es': 0.21,
170 'fr': 0.28,
171 'it': 0.2,
172 }
173 """Source to target string length expansion factors."""
174
175 import gettext
176 import locale
177 import re
178 import os
179
180 iso639 = {}
181 """ISO 639 language codes"""
182 iso3166 = {}
183 """ISO 3166 country codes"""
184
185 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
186 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
187
189 """matches a languagecode to another, ignoring regions in the second"""
190 if languagecode is None:
191 return langcode_re.match(otherlanguagecode)
192 return languagecode == otherlanguagecode or \
193 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
194
195 dialect_name_re = re.compile(r"(.+)\s\(([^)]+)\)$")
196
198 """Gives a function that can translate a language name, even in the form C{"language (country)"},
199 into the language with iso code langcode, or the system language if no language is specified."""
200 langfunc = gettext_lang(langcode)
201 countryfunc = gettext_country(langcode)
202
203 def handlelanguage(name):
204 match = dialect_name_re.match(name)
205 if match:
206 language, country = match.groups()
207 return u"%s (%s)" % (langfunc(language), countryfunc(country))
208 else:
209 return langfunc(name)
210
211 return handlelanguage
212
213 -def gettext_lang(langcode=None):
214 """Returns a gettext function to translate language names into the given
215 language, or the system language if no language is specified."""
216 if not langcode in iso639:
217 if not langcode:
218 langcode = ""
219 if os.name == "nt":
220
221 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
222 else:
223 t = gettext.translation('iso_639', fallback=True)
224 else:
225 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
226 iso639[langcode] = t.ugettext
227 return iso639[langcode]
228
229 -def gettext_country(langcode=None):
230 """Returns a gettext function to translate country names into the given
231 language, or the system language if no language is specified."""
232 if not langcode in iso3166:
233 if not langcode:
234 langcode = ""
235 if os.name == "nt":
236
237 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
238 else:
239 t = gettext.translation('iso_3166', fallback=True)
240 else:
241 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
242 iso3166[langcode] = t.ugettext
243 return iso3166[langcode]
244
246 """Return a unicode string in its normalized form
247
248 @param string: The string to be normalized
249 @param normal_form: NFC (default), NFD, NFCK, NFDK
250 @return: Normalized string
251 """
252 if string is None:
253 return None
254 else:
255 return unicodedata.normalize(normal_form, string)
256
258 """Ensures that the string is in unicode.
259
260 @param string: A text string
261 @type string: Unicode, String
262 @return: String converted to Unicode and normalized as needed.
263 @rtype: Unicode
264 """
265 if string is None:
266 return None
267 if isinstance(string, str):
268 encoding = getattr(string, "encoding", "utf-8")
269 string = string.decode(encoding)
270 elif isinstance(string, StringElem):
271 string = unicode(string)
272 return string
273
275 """Forces the string to unicode and does normalization."""
276 return normalize(forceunicode(string))
277
280
282 """Simplify language code to the most commonly used form for the
283 language, stripping country information for languages that tend
284 not to be localized differently for different countries"""
285 simpler = simplercode(language_code)
286 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
287 return language_code
288 else:
289 return simplify_to_common(simpler)
290