1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base
30 import re
31 import copy
32 import cStringIO
33 import poparser
34
35 lsep = "\n#: "
36 """Seperator for #: entries"""
37
38
39
40 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
41 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
42
44 """Escapes a line for po format. assumes no \n occurs in the line.
45
46 @param line: unescaped text
47 """
48 special_locations = []
49 for special_key in po_escape_map:
50 special_locations.extend(quote.find_all(line, special_key))
51 special_locations = dict.fromkeys(special_locations).keys()
52 special_locations.sort()
53 escaped_line = ""
54 last_location = 0
55 for location in special_locations:
56 escaped_line += line[last_location:location]
57 escaped_line += po_escape_map[line[location:location+1]]
58 last_location = location+1
59 escaped_line += line[last_location:]
60 return escaped_line
61
65
67 """Wrap text for po files."""
68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
69
70
71 if len(wrappedlines) > 1:
72 for index, line in enumerate(wrappedlines[1:]):
73 if line.startswith(' '):
74
75 wrappedlines[index+1] = line[1:]
76
77
78 wrappedlines[index] += ' '
79 return wrappedlines
80
82 """quotes the given text for a PO file, returning quoted and escaped lines"""
83 polines = []
84 if text is None:
85 return polines
86 lines = text.split("\n")
87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
88 if len(lines) != 2 or lines[1]:
89 polines.extend(['""'])
90 for line in lines[:-1]:
91
92 lns = wrapline(line)
93 if len(lns) > 0:
94 for ln in lns[:-1]:
95 polines.extend(['"' + escapeforpo(ln) + '"'])
96 if lns[-1]:
97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
98 else:
99 polines.extend(['"\\n"'])
100 if lines[-1]:
101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
102 return polines
103
105 """Remove quote and unescape line from po file.
106
107 @param line: a quoted line from a po file (msgid or msgstr)
108 """
109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
110 return extracted
111
114
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8.
117 This function is used to ensure that a valid encoding is always used."""
118 if encoding == "CHARSET" or encoding == None: return 'utf-8'
119 return encoding
120
121
122
123
124
125
126
127
129 return lst == [] or len(lst) == 1 and lst[0] == '""'
130
132 left = string.find('"')
133 right = string.rfind('"')
134 if right > -1:
135 return string[left:right+1]
136 else:
137 return string[left:] + '"'
138
139 -class pounit(pocommon.pounit):
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154 __shallow__ = ['_store']
155
156 - def __init__(self, source=None, encoding="UTF-8"):
157 self._encoding = encodingToUse(encoding)
158 self.obsolete = False
159 self._initallcomments(blankall=True)
160 self.prev_msgctxt = []
161 self.prev_msgid = []
162 self.prev_msgid_plural = []
163 self.msgctxt = []
164 self.msgid = []
165 self.msgid_pluralcomments = []
166 self.msgid_plural = []
167 self.msgstr = []
168 self.obsoletemsgctxt = []
169 self.obsoletemsgid = []
170 self.obsoletemsgid_pluralcomments = []
171 self.obsoletemsgid_plural = []
172 self.obsoletemsgstr = []
173 pocommon.pounit.__init__(self, source)
174
184
192
193 allcomments = property(_get_all_comments)
194
203
221
225
227 """Sets the msgid to the given (unescaped) value.
228
229 @param source: an unescaped source string.
230 """
231 self.msgid, self.msgid_plural = self._set_source_vars(source)
232 source = property(getsource, setsource)
233
235 """Returns the unescaped msgid"""
236 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
237
239 """Sets the msgid to the given (unescaped) value.
240
241 @param source: an unescaped source string.
242 """
243 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
244 prev_source = property(_get_prev_source, _set_prev_source)
245
253
255 """Sets the msgstr to the given (unescaped) value"""
256 self._rich_target = None
257 if isinstance(target, str):
258 target = target.decode(self._encoding)
259 if self.hasplural():
260 if isinstance(target, multistring):
261 target = target.strings
262 elif isinstance(target, basestring):
263 target = [target]
264 elif isinstance(target,(dict, list)):
265 if len(target) == 1:
266 target = target[0]
267 else:
268 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
269 templates = self.msgstr
270 if isinstance(templates, list):
271 templates = {0: templates}
272 if isinstance(target, list):
273 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
274 elif isinstance(target, dict):
275 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
276 else:
277 self.msgstr = quoteforpo(target)
278 target = property(gettarget, settarget)
279
281 """Return comments based on origin value (programmer, developer, source code and translator)"""
282 if origin == None:
283 comments = u"".join([comment[2:] for comment in self.othercomments])
284 comments += u"".join([comment[3:] for comment in self.automaticcomments])
285 elif origin == "translator":
286 comments = u"".join ([comment[2:] for comment in self.othercomments])
287 elif origin in ["programmer", "developer", "source code"]:
288 comments = u"".join([comment[3:] for comment in self.automaticcomments])
289 else:
290 raise ValueError("Comment type not valid")
291
292 return comments[:-1]
293
294 - def addnote(self, text, origin=None, position="append"):
295 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
296
297 if not (text and text.strip()):
298 return
299 text = data.forceunicode(text)
300 commentlist = self.othercomments
301 linestart = "# "
302 if origin in ["programmer", "developer", "source code"]:
303 autocomments = True
304 commentlist = self.automaticcomments
305 linestart = "#. "
306 text = text.split("\n")
307 if position == "append":
308 commentlist += [linestart + line + "\n" for line in text]
309 else:
310 newcomments = [linestart + line + "\n" for line in text]
311 newcomments += [line for line in commentlist]
312 if autocomments:
313 self.automaticcomments = newcomments
314 else:
315 self.othercomments = newcomments
316
318 """Remove all the translator's notes (other comments)"""
319 self.othercomments = []
320
322
323 new_unit = self.__class__()
324
325
326 shallow = set(self.__shallow__)
327
328 for key, value in self.__dict__.iteritems():
329 if key not in shallow:
330 setattr(new_unit, key, copy.deepcopy(value))
331
332 for key in set(shallow):
333 setattr(new_unit, key, getattr(self, key))
334
335
336 memo[id(self)] = self
337
338 return new_unit
339
341 return copy.deepcopy(self)
342
348
350 if isinstance(self.msgstr, dict):
351 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()])
352 return len(combinedstr.strip())
353 else:
354 return len(unquotefrompo(self.msgstr).strip())
355
356 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
357 """Merges the otherpo (with the same msgid) into this one.
358
359 Overwrite non-blank self.msgstr only if overwrite is True
360 merge comments only if comments is True
361 """
362
363 def mergelists(list1, list2, split=False):
364
365 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
366 for position, item in enumerate(list1):
367 if isinstance(item, str):
368 list1[position] = item.decode("utf-8")
369 for position, item in enumerate(list2):
370 if isinstance(item, str):
371 list2[position] = item.decode("utf-8")
372
373
374 lineend = ""
375 if list1 and list1[0]:
376 for candidate in ["\n", "\r", "\n\r"]:
377 if list1[0].endswith(candidate):
378 lineend = candidate
379 if not lineend:
380 lineend = ""
381 else:
382 lineend = "\n"
383
384
385 if split:
386 splitlist1 = []
387 splitlist2 = []
388 prefix = "#"
389 for item in list1:
390 splitlist1.extend(item.split()[1:])
391 prefix = item.split()[0]
392 for item in list2:
393 splitlist2.extend(item.split()[1:])
394 prefix = item.split()[0]
395 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
396 else:
397
398 if list1 != list2:
399 for item in list2:
400 if lineend:
401 item = item.rstrip() + lineend
402
403 if item not in list1 or len(item) < 5:
404 list1.append(item)
405 if not isinstance(otherpo, pounit):
406 super(pounit, self).merge(otherpo, overwrite, comments)
407 return
408 if comments:
409 mergelists(self.othercomments, otherpo.othercomments)
410 mergelists(self.typecomments, otherpo.typecomments)
411 if not authoritative:
412
413
414 mergelists(self.automaticcomments, otherpo.automaticcomments)
415 mergelists(self.msgidcomments, otherpo.msgidcomments)
416 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
417 if not self.istranslated() or overwrite:
418
419 if self._extract_msgidcomments(otherpo.target):
420 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
421 self.target = otherpo.target
422 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
423 self.markfuzzy()
424 else:
425 self.markfuzzy(otherpo.isfuzzy())
426 elif not otherpo.istranslated():
427 if self.source != otherpo.source:
428 self.markfuzzy()
429 else:
430 if self.target != otherpo.target:
431 self.markfuzzy()
432
434
435
436 return (is_null(self.msgid)
437 and not is_null(self.msgstr)
438 and self.msgidcomments == []
439 and is_null(self.msgctxt)
440 )
441
443 if self.isheader() or len(self.msgidcomments):
444 return False
445 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
446 return True
447 return False
448
449
450
451
456
464
474
477
480
483
486
489
492
494 """Makes this unit obsolete"""
495 self.obsolete = True
496 if self.msgctxt:
497 self.obsoletemsgctxt = self.msgctxt
498 if self.msgid:
499 self.obsoletemsgid = self.msgid
500 self.msgid = []
501 if self.msgidcomments:
502 self.obsoletemsgidcomments = self.msgidcomments
503 self.msgidcomments = []
504 if self.msgid_plural:
505 self.obsoletemsgid_plural = self.msgid_plural
506 self.msgid_plural = []
507 if self.msgstr:
508 self.obsoletemsgstr = self.msgstr
509 self.msgstr = []
510 self.sourcecomments = []
511 self.automaticcomments = []
512
514 """Makes an obsolete unit normal"""
515 self.obsolete = False
516 if self.obsoletemsgctxt:
517 self.msgid = self.obsoletemsgctxt
518 self.obsoletemsgctxt = []
519 if self.obsoletemsgid:
520 self.msgid = self.obsoletemsgid
521 self.obsoletemsgid = []
522 if self.obsoletemsgidcomments:
523 self.msgidcomments = self.obsoletemsgidcomments
524 self.obsoletemsgidcomments = []
525 if self.obsoletemsgid_plural:
526 self.msgid_plural = self.obsoletemsgid_plural
527 self.obsoletemsgid_plural = []
528 if self.obsoletemsgstr:
529 self.msgstr = self.obsoletemsgstr
530 self.obsoletemgstr = []
531
533 """returns whether this pounit contains plural strings..."""
534 return len(self.msgid_plural) > 0
535
538
540 if isinstance(partlines, dict):
541 partkeys = partlines.keys()
542 partkeys.sort()
543 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
544 partstr = partname + " "
545 partstartline = 0
546 if len(partlines) > 0 and len(partcomments) == 0:
547 partstr += partlines[0]
548 partstartline = 1
549 elif len(partcomments) > 0:
550 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
551
552 partstr += partlines[0] + '\n'
553
554 if len(partlines) > 1:
555 partstartline += 1
556 else:
557
558 partstr += '""\n'
559
560 if len(partcomments) > 1:
561 combinedcomment = []
562 for comment in partcomments:
563 comment = unquotefrompo([comment])
564 if comment.startswith("_:"):
565 comment = comment[len("_:"):]
566 if comment.endswith("\\n"):
567 comment = comment[:-len("\\n")]
568
569 combinedcomment.append(comment)
570 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
571
572 partstr += "\n".join(partcomments)
573 partstr = quote.rstripeol(partstr)
574 else:
575 partstr += '""'
576 partstr += '\n'
577
578 for partline in partlines[partstartline:]:
579 partstr += partline + '\n'
580 return partstr
581
583 """encodes unicode strings and returns other strings unchanged"""
584 if isinstance(output, unicode):
585 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
586 return output.encode(encoding)
587 return output
588
590 """convert to a string. double check that unicode is handled somehow here"""
591 output = self._getoutput()
592 return self._encodeifneccessary(output)
593
595 """return this po element as a string"""
596 def add_prev_msgid_lines(lines, header, var):
597 if len(var) > 0:
598 lines.append("#| %s %s\n" % (header, var[0]))
599 lines.extend("#| %s\n" % line for line in var[1:])
600
601 def add_prev_msgid_info(lines):
602 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt)
603 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid)
604 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural)
605
606 lines = []
607 lines.extend(self.othercomments)
608 if self.isobsolete():
609 lines.extend(self.typecomments)
610 obsoletelines = []
611 if self.obsoletemsgctxt:
612 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
613 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
614 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
615 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
616 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
617 for index, obsoleteline in enumerate(obsoletelines):
618
619 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
620 lines.extend(obsoletelines)
621 lines = [self._encodeifneccessary(line) for line in lines]
622 return "".join(lines)
623
624
625 if is_null(self.msgid):
626 if not (self.isheader() or self.getcontext() or self.sourcecomments):
627 return "".join(lines)
628 lines.extend(self.automaticcomments)
629 lines.extend(self.sourcecomments)
630 lines.extend(self.typecomments)
631 add_prev_msgid_info(lines)
632 if self.msgctxt:
633 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
634 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
635 if self.msgid_plural or self.msgid_pluralcomments:
636 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
637 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
638 lines = [self._encodeifneccessary(line) for line in lines]
639 postr = "".join(lines)
640 return postr
641
643 """Get a list of locations from sourcecomments in the PO unit
644
645 rtype: List
646 return: A list of the locations with '#: ' stripped
647
648 """
649 locations = []
650 for sourcecomment in self.sourcecomments:
651 locations += quote.rstripeol(sourcecomment)[3:].split()
652 return locations
653
655 """Add a location to sourcecomments in the PO unit
656
657 @param location: Text location e.g. 'file.c:23' does not include #:
658 @type location: String
659
660 """
661 self.sourcecomments.append("#: %s\n" % location)
662
673
674 - def getcontext(self):
675 """Get the message context."""
676 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
677
679 """Returns a unique identifier for this unit."""
680 context = self.getcontext()
681
682
683
684
685
686 id = self.source
687 if self.msgidcomments:
688 id = "_: %s\n%s" % (context, id)
689 elif context:
690 id = "%s\04%s" % (context, id)
691 return id
692
693 -class pofile(pocommon.pofile):
694 """this represents a .po file containing various units"""
695 UnitClass = pounit
697 """construct a pofile, optionally reading in from inputfile.
698 encoding can be specified but otherwise will be read from the PO header"""
699 self.UnitClass = unitclass
700 pocommon.pofile.__init__(self, unitclass=unitclass)
701 self.units = []
702 self.filename = ''
703 self._encoding = encodingToUse(encoding)
704 if inputfile is not None:
705 self.parse(inputfile)
706
708 """Deprecated: changes the encoding on the file."""
709
710
711
712 raise DeprecationWarning
713
714 self._encoding = encodingToUse(newencoding)
715 if not self.units:
716 return
717 header = self.header()
718 if not header or header.isblank():
719 return
720 charsetline = None
721 headerstr = unquotefrompo(header.msgstr)
722 for line in headerstr.split("\n"):
723 if not ":" in line: continue
724 key, value = line.strip().split(":", 1)
725 if key.strip() != "Content-Type": continue
726 charsetline = line
727 if charsetline is None:
728 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
729 else:
730 charset = re.search("charset=([^ ]*)", charsetline)
731 if charset is None:
732 newcharsetline = charsetline
733 if not newcharsetline.strip().endswith(";"):
734 newcharsetline += ";"
735 newcharsetline += " charset=%s" % self._encoding
736 else:
737 charset = charset.group(1)
738 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
739 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
740 header.msgstr = quoteforpo(headerstr)
741
743 """parses the given file or file source string"""
744 try:
745 if hasattr(input, 'name'):
746 self.filename = input.name
747 elif not getattr(self, 'filename', ''):
748 self.filename = ''
749 if isinstance(input, str):
750 input = cStringIO.StringIO(input)
751 poparser.parse_units(poparser.ParseState(input, pounit), self)
752 except Exception, e:
753 raise base.ParseError(e)
754
756 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
757
758
759 id_dict = {}
760 uniqueunits = []
761
762
763 markedpos = []
764 def addcomment(thepo):
765 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
766 markedpos.append(thepo)
767 for thepo in self.units:
768 id = thepo.getid()
769 if thepo.isheader() and not thepo.getlocations():
770
771 uniqueunits.append(thepo)
772 elif id in id_dict:
773 if duplicatestyle == "merge":
774 if id:
775 id_dict[id].merge(thepo)
776 else:
777 addcomment(thepo)
778 uniqueunits.append(thepo)
779 elif duplicatestyle == "msgctxt":
780 origpo = id_dict[id]
781 if origpo not in markedpos:
782 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
783 markedpos.append(thepo)
784 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
785 uniqueunits.append(thepo)
786 else:
787 if not id:
788 if duplicatestyle == "merge":
789 addcomment(thepo)
790 else:
791 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
792 id_dict[id] = thepo
793 uniqueunits.append(thepo)
794 self.units = uniqueunits
795
797 """convert to a string. double check that unicode is handled somehow here"""
798 output = self._getoutput()
799 if isinstance(output, unicode):
800 return output.encode(getattr(self, "encoding", "UTF-8"))
801 return output
802
804 """convert the units back to lines"""
805 lines = []
806 for unit in self.units:
807 unitsrc = str(unit) + "\n"
808 lines.append(unitsrc)
809 lines = "".join(self.encode(lines)).rstrip()
810
811 if lines: lines += "\n"
812 return lines
813
815 """encode any unicode strings in lines in self._encoding"""
816 newlines = []
817 encoding = self._encoding
818 if encoding is None or encoding.lower() == "charset":
819 encoding = 'UTF-8'
820 for line in lines:
821 if isinstance(line, unicode):
822 line = line.encode(encoding)
823 newlines.append(line)
824 return newlines
825
827 """decode any non-unicode strings in lines with self._encoding"""
828 newlines = []
829 for line in lines:
830 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
831 try:
832 line = line.decode(self._encoding)
833 except UnicodeError, e:
834 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
835 newlines.append(line)
836 return newlines
837
842