1 # Copyright (C) 2002-2007 Python Software Foundation
   2 # Author: Ben Gertzfield
   3 # Contact: email-sig@python.org
   4 
   5 """Base64 content transfer encoding per RFCs 2045-2047.
   6 
   7 This module handles the content transfer encoding method defined in RFC 2045
   8 to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
   9 characters encoding known as Base64.
  10 
  11 It is used in the MIME standards for email to attach images, audio, and text
  12 using some 8-bit character sets to messages.
  13 
  14 This module provides an interface to encode and decode both headers and bodies
  15 with Base64 encoding.
  16 
  17 RFC 2045 defines a method for including character set information in an
  18 `encoded-word' in a header.  This method is commonly used for 8-bit real names
  19 in To:, From:, Cc:, etc. fields, as well as Subject: lines.
  20 
  21 This module does not do the line wrapping or end-of-line character conversion
  22 necessary for proper internationalized headers; it only does dumb encoding and
  23 decoding.  To deal with the various line wrapping issues, use the email.header
  24 module.
  25 """
  26 
  27 __all__ = [
  28     'body_decode',
  29     'body_encode',
  30     'decode',
  31     'decodestring',
  32     'header_encode',
  33     'header_length',
  34     ]
  35 
  36 
  37 from base64 import b64encode
  38 from binascii import b2a_base64, a2b_base64
  39 
  40 CRLF = '\r\n'
  41 NL = '\n'
  42 EMPTYSTRING = ''
  43 
  44 # See also Charset.py
  45 MISC_LEN = 7
  46 
  47 
  48 
  49 # Helpers
  50 def header_length(bytearray):
  51     """Return the length of s when it is encoded with base64."""
  52     groups_of_3, leftover = divmod(len(bytearray), 3)
  53     # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
  54     n = groups_of_3 * 4
  55     if leftover:
  56         n += 4
  57     return n
  58 
  59 
  60 
  61 def header_encode(header_bytes, charset='iso-8859-1'):
  62     """Encode a single header line with Base64 encoding in a given charset.
  63 
  64     charset names the character set to use to encode the header.  It defaults
  65     to iso-8859-1.  Base64 encoding is defined in RFC 2045.
  66     """
  67     if not header_bytes:
  68         return ""
  69     if isinstance(header_bytes, str):
  70         header_bytes = header_bytes.encode(charset)
  71     encoded = b64encode(header_bytes).decode("ascii")
  72     return '=?%s?b?%s?=' % (charset, encoded)
  73 
  74 
  75 
  76 def body_encode(s, maxlinelen=76, eol=NL):
  77     r"""Encode a string with base64.
  78 
  79     Each line will be wrapped at, at most, maxlinelen characters (defaults to
  80     76 characters).
  81 
  82     Each line of encoded text will end with eol, which defaults to "\n".  Set
  83     this to "\r\n" if you will be using the result of this function directly
  84     in an email.
  85     """
  86     if not s:
  87         return s
  88 
  89     encvec = []
  90     max_unencoded = maxlinelen * 3 // 4
  91     for i in range(0, len(s), max_unencoded):
  92         # BAW: should encode() inherit b2a_base64()'s dubious behavior in
  93         # adding a newline to the encoded string?
  94         enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
  95         if enc.endswith(NL) and eol != NL:
  96             enc = enc[:-1] + eol
  97         encvec.append(enc)
  98     return EMPTYSTRING.join(encvec)
  99 
 100 
 101 
 102 def decode(string):
 103     """Decode a raw base64 string, returning a bytes object.
 104 
 105     This function does not parse a full MIME header value encoded with
 106     base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
 107     level email.header class for that functionality.
 108     """
 109     if not string:
 110         return bytes()
 111     elif isinstance(string, str):
 112         return a2b_base64(string.encode('raw-unicode-escape'))
 113     else:
 114         return a2b_base64(string)
 115 
 116 
 117 # For convenience and backwards compatibility w/ standard base64 module
 118 body_decode = decode
 119 decodestring = decode