1 # Copyright (C) 2001-2007 Python Software Foundation
   2 # Author: Barry Warsaw
   3 # Contact: email-sig@python.org
   4 
   5 """Basic message object for the email package object model."""
   6 
   7 __all__ = ['Message']
   8 
   9 import re
  10 import uu
  11 from io import BytesIO, StringIO
  12 
  13 # Intrapackage imports
  14 from email import utils
  15 from email import errors
  16 from email._policybase import compat32
  17 from email import charset as _charset
  18 from email._encoded_words import decode_b
  19 Charset = _charset.Charset
  20 
  21 SEMISPACE = '; '
  22 
  23 # Regular expression that matches `special' characters in parameters, the
  24 # existence of which force quoting of the parameter value.
  25 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
  26 
  27 
  28 def _splitparam(param):
  29     # Split header parameters.  BAW: this may be too simple.  It isn't
  30     # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
  31     # found in the wild.  We may eventually need a full fledged parser.
  32     # RDM: we might have a Header here; for now just stringify it.
  33     a, sep, b = str(param).partition(';')
  34     if not sep:
  35         return a.strip(), None
  36     return a.strip(), b.strip()
  37 
  38 def _formatparam(param, value=None, quote=True):
  39     """Convenience function to format and return a key=value pair.
  40 
  41     This will quote the value if needed or if quote is true.  If value is a
  42     three tuple (charset, language, value), it will be encoded according
  43     to RFC2231 rules.  If it contains non-ascii characters it will likewise
  44     be encoded according to RFC2231 rules, using the utf-8 charset and
  45     a null language.
  46     """
  47     if value is not None and len(value) > 0:
  48         # A tuple is used for RFC 2231 encoded parameter values where items
  49         # are (charset, language, value).  charset is a string, not a Charset
  50         # instance.  RFC 2231 encoded values are never quoted, per RFC.
  51         if isinstance(value, tuple):
  52             # Encode as per RFC 2231
  53             param += '*'
  54             value = utils.encode_rfc2231(value[2], value[0], value[1])
  55             return '%s=%s' % (param, value)
  56         else:
  57             try:
  58                 value.encode('ascii')
  59             except UnicodeEncodeError:
  60                 param += '*'
  61                 value = utils.encode_rfc2231(value, 'utf-8', '')
  62                 return '%s=%s' % (param, value)
  63         # BAW: Please check this.  I think that if quote is set it should
  64         # force quoting even if not necessary.
  65         if quote or tspecials.search(value):
  66             return '%s="%s"' % (param, utils.quote(value))
  67         else:
  68             return '%s=%s' % (param, value)
  69     else:
  70         return param
  71 
  72 def _parseparam(s):
  73     # RDM This might be a Header, so for now stringify it.
  74     s = ';' + str(s)
  75     plist = []
  76     while s[:1] == ';':
  77         s = s[1:]
  78         end = s.find(';')
  79         while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
  80             end = s.find(';', end + 1)
  81         if end < 0:
  82             end = len(s)
  83         f = s[:end]
  84         if '=' in f:
  85             i = f.index('=')
  86             f = f[:i].strip().lower() + '=' + f[i+1:].strip()
  87         plist.append(f.strip())
  88         s = s[end:]
  89     return plist
  90 
  91 
  92 def _unquotevalue(value):
  93     # This is different than utils.collapse_rfc2231_value() because it doesn't
  94     # try to convert the value to a unicode.  Message.get_param() and
  95     # Message.get_params() are both currently defined to return the tuple in
  96     # the face of RFC 2231 parameters.
  97     if isinstance(value, tuple):
  98         return value[0], value[1], utils.unquote(value[2])
  99     else:
 100         return utils.unquote(value)
 101 
 102 
 103 
 104 class Message:
 105     """Basic message object.
 106 
 107     A message object is defined as something that has a bunch of RFC 2822
 108     headers and a payload.  It may optionally have an envelope header
 109     (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
 110     multipart or a message/rfc822), then the payload is a list of Message
 111     objects, otherwise it is a string.
 112 
 113     Message objects implement part of the `mapping' interface, which assumes
 114     there is exactly one occurrence of the header per message.  Some headers
 115     do in fact appear multiple times (e.g. Received) and for those headers,
 116     you must use the explicit API to set or get all the headers.  Not all of
 117     the mapping methods are implemented.
 118     """
 119     def __init__(self, policy=compat32):
 120         self.policy = policy
 121         self._headers = []
 122         self._unixfrom = None
 123         self._payload = None
 124         self._charset = None
 125         # Defaults for multipart messages
 126         self.preamble = self.epilogue = None
 127         self.defects = []
 128         # Default content type
 129         self._default_type = 'text/plain'
 130 
 131     def __str__(self):
 132         """Return the entire formatted message as a string.
 133         """
 134         return self.as_string()
 135 
 136     def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
 137         """Return the entire formatted message as a string.
 138 
 139         Optional 'unixfrom', when true, means include the Unix From_ envelope
 140         header.  For backward compatibility reasons, if maxheaderlen is
 141         not specified it defaults to 0, so you must override it explicitly
 142         if you want a different maxheaderlen.  'policy' is passed to the
 143         Generator instance used to serialize the mesasge; if it is not
 144         specified the policy associated with the message instance is used.
 145 
 146         If the message object contains binary data that is not encoded
 147         according to RFC standards, the non-compliant data will be replaced by
 148         unicode "unknown character" code points.
 149         """
 150         from email.generator import Generator
 151         policy = self.policy if policy is None else policy
 152         fp = StringIO()
 153         g = Generator(fp,
 154                       mangle_from_=False,
 155                       maxheaderlen=maxheaderlen,
 156                       policy=policy)
 157         g.flatten(self, unixfrom=unixfrom)
 158         return fp.getvalue()
 159 
 160     def __bytes__(self):
 161         """Return the entire formatted message as a bytes object.
 162         """
 163         return self.as_bytes()
 164 
 165     def as_bytes(self, unixfrom=False, policy=None):
 166         """Return the entire formatted message as a bytes object.
 167 
 168         Optional 'unixfrom', when true, means include the Unix From_ envelope
 169         header.  'policy' is passed to the BytesGenerator instance used to
 170         serialize the message; if not specified the policy associated with
 171         the message instance is used.
 172         """
 173         from email.generator import BytesGenerator
 174         policy = self.policy if policy is None else policy
 175         fp = BytesIO()
 176         g = BytesGenerator(fp, mangle_from_=False, policy=policy)
 177         g.flatten(self, unixfrom=unixfrom)
 178         return fp.getvalue()
 179 
 180     def is_multipart(self):
 181         """Return True if the message consists of multiple parts."""
 182         return isinstance(self._payload, list)
 183 
 184     #
 185     # Unix From_ line
 186     #
 187     def set_unixfrom(self, unixfrom):
 188         self._unixfrom = unixfrom
 189 
 190     def get_unixfrom(self):
 191         return self._unixfrom
 192 
 193     #
 194     # Payload manipulation.
 195     #
 196     def attach(self, payload):
 197         """Add the given payload to the current payload.
 198 
 199         The current payload will always be a list of objects after this method
 200         is called.  If you want to set the payload to a scalar object, use
 201         set_payload() instead.
 202         """
 203         if self._payload is None:
 204             self._payload = [payload]
 205         else:
 206             self._payload.append(payload)
 207 
 208     def get_payload(self, i=None, decode=False):
 209         """Return a reference to the payload.
 210 
 211         The payload will either be a list object or a string.  If you mutate
 212         the list object, you modify the message's payload in place.  Optional
 213         i returns that index into the payload.
 214 
 215         Optional decode is a flag indicating whether the payload should be
 216         decoded or not, according to the Content-Transfer-Encoding header
 217         (default is False).
 218 
 219         When True and the message is not a multipart, the payload will be
 220         decoded if this header's value is `quoted-printable' or `base64'.  If
 221         some other encoding is used, or the header is missing, or if the
 222         payload has bogus data (i.e. bogus base64 or uuencoded data), the
 223         payload is returned as-is.
 224 
 225         If the message is a multipart and the decode flag is True, then None
 226         is returned.
 227         """
 228         # Here is the logic table for this code, based on the email5.0.0 code:
 229         #   i     decode  is_multipart  result
 230         # ------  ------  ------------  ------------------------------
 231         #  None   True    True          None
 232         #   i     True    True          None
 233         #  None   False   True          _payload (a list)
 234         #   i     False   True          _payload element i (a Message)
 235         #   i     False   False         error (not a list)
 236         #   i     True    False         error (not a list)
 237         #  None   False   False         _payload
 238         #  None   True    False         _payload decoded (bytes)
 239         # Note that Barry planned to factor out the 'decode' case, but that
 240         # isn't so easy now that we handle the 8 bit data, which needs to be
 241         # converted in both the decode and non-decode path.
 242         if self.is_multipart():
 243             if decode:
 244                 return None
 245             if i is None:
 246                 return self._payload
 247             else:
 248                 return self._payload[i]
 249         # For backward compatibility, Use isinstance and this error message
 250         # instead of the more logical is_multipart test.
 251         if i is not None and not isinstance(self._payload, list):
 252             raise TypeError('Expected list, got %s' % type(self._payload))
 253         payload = self._payload
 254         # cte might be a Header, so for now stringify it.
 255         cte = str(self.get('content-transfer-encoding', '')).lower()
 256         # payload may be bytes here.
 257         if isinstance(payload, str):
 258             if utils._has_surrogates(payload):
 259                 bpayload = payload.encode('ascii', 'surrogateescape')
 260                 if not decode:
 261                     try:
 262                         payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
 263                     except LookupError:
 264                         payload = bpayload.decode('ascii', 'replace')
 265             elif decode:
 266                 try:
 267                     bpayload = payload.encode('ascii')
 268                 except UnicodeError:
 269                     # This won't happen for RFC compliant messages (messages
 270                     # containing only ASCII codepoints in the unicode input).
 271                     # If it does happen, turn the string into bytes in a way
 272                     # guaranteed not to fail.
 273                     bpayload = payload.encode('raw-unicode-escape')
 274         if not decode:
 275             return payload
 276         if cte == 'quoted-printable':
 277             return utils._qdecode(bpayload)
 278         elif cte == 'base64':
 279             # XXX: this is a bit of a hack; decode_b should probably be factored
 280             # out somewhere, but I haven't figured out where yet.
 281             value, defects = decode_b(b''.join(bpayload.splitlines()))
 282             for defect in defects:
 283                 self.policy.handle_defect(self, defect)
 284             return value
 285         elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
 286             in_file = BytesIO(bpayload)
 287             out_file = BytesIO()
 288             try:
 289                 uu.decode(in_file, out_file, quiet=True)
 290                 return out_file.getvalue()
 291             except uu.Error:
 292                 # Some decoding problem
 293                 return bpayload
 294         if isinstance(payload, str):
 295             return bpayload
 296         return payload
 297 
 298     def set_payload(self, payload, charset=None):
 299         """Set the payload to the given value.
 300 
 301         Optional charset sets the message's default character set.  See
 302         set_charset() for details.
 303         """
 304         if isinstance(payload, bytes):
 305             payload = payload.decode('ascii', 'surrogateescape')
 306         self._payload = payload
 307         if charset is not None:
 308             self.set_charset(charset)
 309 
 310     def set_charset(self, charset):
 311         """Set the charset of the payload to a given character set.
 312 
 313         charset can be a Charset instance, a string naming a character set, or
 314         None.  If it is a string it will be converted to a Charset instance.
 315         If charset is None, the charset parameter will be removed from the
 316         Content-Type field.  Anything else will generate a TypeError.
 317 
 318         The message will be assumed to be of type text/* encoded with
 319         charset.input_charset.  It will be converted to charset.output_charset
 320         and encoded properly, if needed, when generating the plain text
 321         representation of the message.  MIME headers (MIME-Version,
 322         Content-Type, Content-Transfer-Encoding) will be added as needed.
 323         """
 324         if charset is None:
 325             self.del_param('charset')
 326             self._charset = None
 327             return
 328         if not isinstance(charset, Charset):
 329             charset = Charset(charset)
 330         self._charset = charset
 331         if 'MIME-Version' not in self:
 332             self.add_header('MIME-Version', '1.0')
 333         if 'Content-Type' not in self:
 334             self.add_header('Content-Type', 'text/plain',
 335                             charset=charset.get_output_charset())
 336         else:
 337             self.set_param('charset', charset.get_output_charset())
 338         if charset != charset.get_output_charset():
 339             self._payload = charset.body_encode(self._payload)
 340         if 'Content-Transfer-Encoding' not in self:
 341             cte = charset.get_body_encoding()
 342             try:
 343                 cte(self)
 344             except TypeError:
 345                 self._payload = charset.body_encode(self._payload)
 346                 self.add_header('Content-Transfer-Encoding', cte)
 347 
 348     def get_charset(self):
 349         """Return the Charset instance associated with the message's payload.
 350         """
 351         return self._charset
 352 
 353     #
 354     # MAPPING INTERFACE (partial)
 355     #
 356     def __len__(self):
 357         """Return the total number of headers, including duplicates."""
 358         return len(self._headers)
 359 
 360     def __getitem__(self, name):
 361         """Get a header value.
 362 
 363         Return None if the header is missing instead of raising an exception.
 364 
 365         Note that if the header appeared multiple times, exactly which
 366         occurrence gets returned is undefined.  Use get_all() to get all
 367         the values matching a header field name.
 368         """
 369         return self.get(name)
 370 
 371     def __setitem__(self, name, val):
 372         """Set the value of a header.
 373 
 374         Note: this does not overwrite an existing header with the same field
 375         name.  Use __delitem__() first to delete any existing headers.
 376         """
 377         max_count = self.policy.header_max_count(name)
 378         if max_count:
 379             lname = name.lower()
 380             found = 0
 381             for k, v in self._headers:
 382                 if k.lower() == lname:
 383                     found += 1
 384                     if found >= max_count:
 385                         raise ValueError("There may be at most {} {} headers "
 386                                          "in a message".format(max_count, name))
 387         self._headers.append(self.policy.header_store_parse(name, val))
 388 
 389     def __delitem__(self, name):
 390         """Delete all occurrences of a header, if present.
 391 
 392         Does not raise an exception if the header is missing.
 393         """
 394         name = name.lower()
 395         newheaders = []
 396         for k, v in self._headers:
 397             if k.lower() != name:
 398                 newheaders.append((k, v))
 399         self._headers = newheaders
 400 
 401     def __contains__(self, name):
 402         return name.lower() in [k.lower() for k, v in self._headers]
 403 
 404     def __iter__(self):
 405         for field, value in self._headers:
 406             yield field
 407 
 408     def keys(self):
 409         """Return a list of all the message's header field names.
 410 
 411         These will be sorted in the order they appeared in the original
 412         message, or were added to the message, and may contain duplicates.
 413         Any fields deleted and re-inserted are always appended to the header
 414         list.
 415         """
 416         return [k for k, v in self._headers]
 417 
 418     def values(self):
 419         """Return a list of all the message's header values.
 420 
 421         These will be sorted in the order they appeared in the original
 422         message, or were added to the message, and may contain duplicates.
 423         Any fields deleted and re-inserted are always appended to the header
 424         list.
 425         """
 426         return [self.policy.header_fetch_parse(k, v)
 427                 for k, v in self._headers]
 428 
 429     def items(self):
 430         """Get all the message's header fields and values.
 431 
 432         These will be sorted in the order they appeared in the original
 433         message, or were added to the message, and may contain duplicates.
 434         Any fields deleted and re-inserted are always appended to the header
 435         list.
 436         """
 437         return [(k, self.policy.header_fetch_parse(k, v))
 438                 for k, v in self._headers]
 439 
 440     def get(self, name, failobj=None):
 441         """Get a header value.
 442 
 443         Like __getitem__() but return failobj instead of None when the field
 444         is missing.
 445         """
 446         name = name.lower()
 447         for k, v in self._headers:
 448             if k.lower() == name:
 449                 return self.policy.header_fetch_parse(k, v)
 450         return failobj
 451 
 452     #
 453     # "Internal" methods (public API, but only intended for use by a parser
 454     # or generator, not normal application code.
 455     #
 456 
 457     def set_raw(self, name, value):
 458         """Store name and value in the model without modification.
 459 
 460         This is an "internal" API, intended only for use by a parser.
 461         """
 462         self._headers.append((name, value))
 463 
 464     def raw_items(self):
 465         """Return the (name, value) header pairs without modification.
 466 
 467         This is an "internal" API, intended only for use by a generator.
 468         """
 469         return iter(self._headers.copy())
 470 
 471     #
 472     # Additional useful stuff
 473     #
 474 
 475     def get_all(self, name, failobj=None):
 476         """Return a list of all the values for the named field.
 477 
 478         These will be sorted in the order they appeared in the original
 479         message, and may contain duplicates.  Any fields deleted and
 480         re-inserted are always appended to the header list.
 481 
 482         If no such fields exist, failobj is returned (defaults to None).
 483         """
 484         values = []
 485         name = name.lower()
 486         for k, v in self._headers:
 487             if k.lower() == name:
 488                 values.append(self.policy.header_fetch_parse(k, v))
 489         if not values:
 490             return failobj
 491         return values
 492 
 493     def add_header(self, _name, _value, **_params):
 494         """Extended header setting.
 495 
 496         name is the header field to add.  keyword arguments can be used to set
 497         additional parameters for the header field, with underscores converted
 498         to dashes.  Normally the parameter will be added as key="value" unless
 499         value is None, in which case only the key will be added.  If a
 500         parameter value contains non-ASCII characters it can be specified as a
 501         three-tuple of (charset, language, value), in which case it will be
 502         encoded according to RFC2231 rules.  Otherwise it will be encoded using
 503         the utf-8 charset and a language of ''.
 504 
 505         Examples:
 506 
 507         msg.add_header('content-disposition', 'attachment', filename='bud.gif')
 508         msg.add_header('content-disposition', 'attachment',
 509                        filename=('utf-8', '', Fußballer.ppt'))
 510         msg.add_header('content-disposition', 'attachment',
 511                        filename='Fußballer.ppt'))
 512         """
 513         parts = []
 514         for k, v in _params.items():
 515             if v is None:
 516                 parts.append(k.replace('_', '-'))
 517             else:
 518                 parts.append(_formatparam(k.replace('_', '-'), v))
 519         if _value is not None:
 520             parts.insert(0, _value)
 521         self[_name] = SEMISPACE.join(parts)
 522 
 523     def replace_header(self, _name, _value):
 524         """Replace a header.
 525 
 526         Replace the first matching header found in the message, retaining
 527         header order and case.  If no matching header was found, a KeyError is
 528         raised.
 529         """
 530         _name = _name.lower()
 531         for i, (k, v) in zip(range(len(self._headers)), self._headers):
 532             if k.lower() == _name:
 533                 self._headers[i] = self.policy.header_store_parse(k, _value)
 534                 break
 535         else:
 536             raise KeyError(_name)
 537 
 538     #
 539     # Use these three methods instead of the three above.
 540     #
 541 
 542     def get_content_type(self):
 543         """Return the message's content type.
 544 
 545         The returned string is coerced to lower case of the form
 546         `maintype/subtype'.  If there was no Content-Type header in the
 547         message, the default type as given by get_default_type() will be
 548         returned.  Since according to RFC 2045, messages always have a default
 549         type this will always return a value.
 550 
 551         RFC 2045 defines a message's default type to be text/plain unless it
 552         appears inside a multipart/digest container, in which case it would be
 553         message/rfc822.
 554         """
 555         missing = object()
 556         value = self.get('content-type', missing)
 557         if value is missing:
 558             # This should have no parameters
 559             return self.get_default_type()
 560         ctype = _splitparam(value)[0].lower()
 561         # RFC 2045, section 5.2 says if its invalid, use text/plain
 562         if ctype.count('/') != 1:
 563             return 'text/plain'
 564         return ctype
 565 
 566     def get_content_maintype(self):
 567         """Return the message's main content type.
 568 
 569         This is the `maintype' part of the string returned by
 570         get_content_type().
 571         """
 572         ctype = self.get_content_type()
 573         return ctype.split('/')[0]
 574 
 575     def get_content_subtype(self):
 576         """Returns the message's sub-content type.
 577 
 578         This is the `subtype' part of the string returned by
 579         get_content_type().
 580         """
 581         ctype = self.get_content_type()
 582         return ctype.split('/')[1]
 583 
 584     def get_default_type(self):
 585         """Return the `default' content type.
 586 
 587         Most messages have a default content type of text/plain, except for
 588         messages that are subparts of multipart/digest containers.  Such
 589         subparts have a default content type of message/rfc822.
 590         """
 591         return self._default_type
 592 
 593     def set_default_type(self, ctype):
 594         """Set the `default' content type.
 595 
 596         ctype should be either "text/plain" or "message/rfc822", although this
 597         is not enforced.  The default content type is not stored in the
 598         Content-Type header.
 599         """
 600         self._default_type = ctype
 601 
 602     def _get_params_preserve(self, failobj, header):
 603         # Like get_params() but preserves the quoting of values.  BAW:
 604         # should this be part of the public interface?
 605         missing = object()
 606         value = self.get(header, missing)
 607         if value is missing:
 608             return failobj
 609         params = []
 610         for p in _parseparam(value):
 611             try:
 612                 name, val = p.split('=', 1)
 613                 name = name.strip()
 614                 val = val.strip()
 615             except ValueError:
 616                 # Must have been a bare attribute
 617                 name = p.strip()
 618                 val = ''
 619             params.append((name, val))
 620         params = utils.decode_params(params)
 621         return params
 622 
 623     def get_params(self, failobj=None, header='content-type', unquote=True):
 624         """Return the message's Content-Type parameters, as a list.
 625 
 626         The elements of the returned list are 2-tuples of key/value pairs, as
 627         split on the `=' sign.  The left hand side of the `=' is the key,
 628         while the right hand side is the value.  If there is no `=' sign in
 629         the parameter the value is the empty string.  The value is as
 630         described in the get_param() method.
 631 
 632         Optional failobj is the object to return if there is no Content-Type
 633         header.  Optional header is the header to search instead of
 634         Content-Type.  If unquote is True, the value is unquoted.
 635         """
 636         missing = object()
 637         params = self._get_params_preserve(missing, header)
 638         if params is missing:
 639             return failobj
 640         if unquote:
 641             return [(k, _unquotevalue(v)) for k, v in params]
 642         else:
 643             return params
 644 
 645     def get_param(self, param, failobj=None, header='content-type',
 646                   unquote=True):
 647         """Return the parameter value if found in the Content-Type header.
 648 
 649         Optional failobj is the object to return if there is no Content-Type
 650         header, or the Content-Type header has no such parameter.  Optional
 651         header is the header to search instead of Content-Type.
 652 
 653         Parameter keys are always compared case insensitively.  The return
 654         value can either be a string, or a 3-tuple if the parameter was RFC
 655         2231 encoded.  When it's a 3-tuple, the elements of the value are of
 656         the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
 657         LANGUAGE can be None, in which case you should consider VALUE to be
 658         encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
 659         The parameter value (either the returned string, or the VALUE item in
 660         the 3-tuple) is always unquoted, unless unquote is set to False.
 661 
 662         If your application doesn't care whether the parameter was RFC 2231
 663         encoded, it can turn the return value into a string as follows:
 664 
 665             rawparam = msg.get_param('foo')
 666             param = email.utils.collapse_rfc2231_value(rawparam)
 667 
 668         """
 669         if header not in self:
 670             return failobj
 671         for k, v in self._get_params_preserve(failobj, header):
 672             if k.lower() == param.lower():
 673                 if unquote:
 674                     return _unquotevalue(v)
 675                 else:
 676                     return v
 677         return failobj
 678 
 679     def set_param(self, param, value, header='Content-Type', requote=True,
 680                   charset=None, language='', replace=False):
 681         """Set a parameter in the Content-Type header.
 682 
 683         If the parameter already exists in the header, its value will be
 684         replaced with the new value.
 685 
 686         If header is Content-Type and has not yet been defined for this
 687         message, it will be set to "text/plain" and the new parameter and
 688         value will be appended as per RFC 2045.
 689 
 690         An alternate header can specified in the header argument, and all
 691         parameters will be quoted as necessary unless requote is False.
 692 
 693         If charset is specified, the parameter will be encoded according to RFC
 694         2231.  Optional language specifies the RFC 2231 language, defaulting
 695         to the empty string.  Both charset and language should be strings.
 696         """
 697         if not isinstance(value, tuple) and charset:
 698             value = (charset, language, value)
 699 
 700         if header not in self and header.lower() == 'content-type':
 701             ctype = 'text/plain'
 702         else:
 703             ctype = self.get(header)
 704         if not self.get_param(param, header=header):
 705             if not ctype:
 706                 ctype = _formatparam(param, value, requote)
 707             else:
 708                 ctype = SEMISPACE.join(
 709                     [ctype, _formatparam(param, value, requote)])
 710         else:
 711             ctype = ''
 712             for old_param, old_value in self.get_params(header=header,
 713                                                         unquote=requote):
 714                 append_param = ''
 715                 if old_param.lower() == param.lower():
 716                     append_param = _formatparam(param, value, requote)
 717                 else:
 718                     append_param = _formatparam(old_param, old_value, requote)
 719                 if not ctype:
 720                     ctype = append_param
 721                 else:
 722                     ctype = SEMISPACE.join([ctype, append_param])
 723         if ctype != self.get(header):
 724             if replace:
 725                 self.replace_header(header, ctype)
 726             else:
 727                 del self[header]
 728                 self[header] = ctype
 729 
 730     def del_param(self, param, header='content-type', requote=True):
 731         """Remove the given parameter completely from the Content-Type header.
 732 
 733         The header will be re-written in place without the parameter or its
 734         value. All values will be quoted as necessary unless requote is
 735         False.  Optional header specifies an alternative to the Content-Type
 736         header.
 737         """
 738         if header not in self:
 739             return
 740         new_ctype = ''
 741         for p, v in self.get_params(header=header, unquote=requote):
 742             if p.lower() != param.lower():
 743                 if not new_ctype:
 744                     new_ctype = _formatparam(p, v, requote)
 745                 else:
 746                     new_ctype = SEMISPACE.join([new_ctype,
 747                                                 _formatparam(p, v, requote)])
 748         if new_ctype != self.get(header):
 749             del self[header]
 750             self[header] = new_ctype
 751 
 752     def set_type(self, type, header='Content-Type', requote=True):
 753         """Set the main type and subtype for the Content-Type header.
 754 
 755         type must be a string in the form "maintype/subtype", otherwise a
 756         ValueError is raised.
 757 
 758         This method replaces the Content-Type header, keeping all the
 759         parameters in place.  If requote is False, this leaves the existing
 760         header's quoting as is.  Otherwise, the parameters will be quoted (the
 761         default).
 762 
 763         An alternative header can be specified in the header argument.  When
 764         the Content-Type header is set, we'll always also add a MIME-Version
 765         header.
 766         """
 767         # BAW: should we be strict?
 768         if not type.count('/') == 1:
 769             raise ValueError
 770         # Set the Content-Type, you get a MIME-Version
 771         if header.lower() == 'content-type':
 772             del self['mime-version']
 773             self['MIME-Version'] = '1.0'
 774         if header not in self:
 775             self[header] = type
 776             return
 777         params = self.get_params(header=header, unquote=requote)
 778         del self[header]
 779         self[header] = type
 780         # Skip the first param; it's the old type.
 781         for p, v in params[1:]:
 782             self.set_param(p, v, header, requote)
 783 
 784     def get_filename(self, failobj=None):
 785         """Return the filename associated with the payload if present.
 786 
 787         The filename is extracted from the Content-Disposition header's
 788         `filename' parameter, and it is unquoted.  If that header is missing
 789         the `filename' parameter, this method falls back to looking for the
 790         `name' parameter.
 791         """
 792         missing = object()
 793         filename = self.get_param('filename', missing, 'content-disposition')
 794         if filename is missing:
 795             filename = self.get_param('name', missing, 'content-type')
 796         if filename is missing:
 797             return failobj
 798         return utils.collapse_rfc2231_value(filename).strip()
 799 
 800     def get_boundary(self, failobj=None):
 801         """Return the boundary associated with the payload if present.
 802 
 803         The boundary is extracted from the Content-Type header's `boundary'
 804         parameter, and it is unquoted.
 805         """
 806         missing = object()
 807         boundary = self.get_param('boundary', missing)
 808         if boundary is missing:
 809             return failobj
 810         # RFC 2046 says that boundaries may begin but not end in w/s
 811         return utils.collapse_rfc2231_value(boundary).rstrip()
 812 
 813     def set_boundary(self, boundary):
 814         """Set the boundary parameter in Content-Type to 'boundary'.
 815 
 816         This is subtly different than deleting the Content-Type header and
 817         adding a new one with a new boundary parameter via add_header().  The
 818         main difference is that using the set_boundary() method preserves the
 819         order of the Content-Type header in the original message.
 820 
 821         HeaderParseError is raised if the message has no Content-Type header.
 822         """
 823         missing = object()
 824         params = self._get_params_preserve(missing, 'content-type')
 825         if params is missing:
 826             # There was no Content-Type header, and we don't know what type
 827             # to set it to, so raise an exception.
 828             raise errors.HeaderParseError('No Content-Type header found')
 829         newparams = []
 830         foundp = False
 831         for pk, pv in params:
 832             if pk.lower() == 'boundary':
 833                 newparams.append(('boundary', '"%s"' % boundary))
 834                 foundp = True
 835             else:
 836                 newparams.append((pk, pv))
 837         if not foundp:
 838             # The original Content-Type header had no boundary attribute.
 839             # Tack one on the end.  BAW: should we raise an exception
 840             # instead???
 841             newparams.append(('boundary', '"%s"' % boundary))
 842         # Replace the existing Content-Type header with the new value
 843         newheaders = []
 844         for h, v in self._headers:
 845             if h.lower() == 'content-type':
 846                 parts = []
 847                 for k, v in newparams:
 848                     if v == '':
 849                         parts.append(k)
 850                     else:
 851                         parts.append('%s=%s' % (k, v))
 852                 val = SEMISPACE.join(parts)
 853                 newheaders.append(self.policy.header_store_parse(h, val))
 854 
 855             else:
 856                 newheaders.append((h, v))
 857         self._headers = newheaders
 858 
 859     def get_content_charset(self, failobj=None):
 860         """Return the charset parameter of the Content-Type header.
 861 
 862         The returned string is always coerced to lower case.  If there is no
 863         Content-Type header, or if that header has no charset parameter,
 864         failobj is returned.
 865         """
 866         missing = object()
 867         charset = self.get_param('charset', missing)
 868         if charset is missing:
 869             return failobj
 870         if isinstance(charset, tuple):
 871             # RFC 2231 encoded, so decode it, and it better end up as ascii.
 872             pcharset = charset[0] or 'us-ascii'
 873             try:
 874                 # LookupError will be raised if the charset isn't known to
 875                 # Python.  UnicodeError will be raised if the encoded text
 876                 # contains a character not in the charset.
 877                 as_bytes = charset[2].encode('raw-unicode-escape')
 878                 charset = str(as_bytes, pcharset)
 879             except (LookupError, UnicodeError):
 880                 charset = charset[2]
 881         # charset characters must be in us-ascii range
 882         try:
 883             charset.encode('us-ascii')
 884         except UnicodeError:
 885             return failobj
 886         # RFC 2046, $4.1.2 says charsets are not case sensitive
 887         return charset.lower()
 888 
 889     def get_charsets(self, failobj=None):
 890         """Return a list containing the charset(s) used in this message.
 891 
 892         The returned list of items describes the Content-Type headers'
 893         charset parameter for this message and all the subparts in its
 894         payload.
 895 
 896         Each item will either be a string (the value of the charset parameter
 897         in the Content-Type header of that part) or the value of the
 898         'failobj' parameter (defaults to None), if the part does not have a
 899         main MIME type of "text", or the charset is not defined.
 900 
 901         The list will contain one string for each part of the message, plus
 902         one for the container message (i.e. self), so that a non-multipart
 903         message will still return a list of length 1.
 904         """
 905         return [part.get_content_charset(failobj) for part in self.walk()]
 906 
 907     # I.e. def walk(self): ...
 908     from email.iterators import walk
 909 
 910 
 911 class MIMEPart(Message):
 912 
 913     def __init__(self, policy=None):
 914         if policy is None:
 915             from email.policy import default
 916             policy = default
 917         Message.__init__(self, policy)
 918 
 919     @property
 920     def is_attachment(self):
 921         c_d = self.get('content-disposition')
 922         if c_d is None:
 923             return False
 924         return c_d.lower() == 'attachment'
 925 
 926     def _find_body(self, part, preferencelist):
 927         if part.is_attachment:
 928             return
 929         maintype, subtype = part.get_content_type().split('/')
 930         if maintype == 'text':
 931             if subtype in preferencelist:
 932                 yield (preferencelist.index(subtype), part)
 933             return
 934         if maintype != 'multipart':
 935             return
 936         if subtype != 'related':
 937             for subpart in part.iter_parts():
 938                 yield from self._find_body(subpart, preferencelist)
 939             return
 940         if 'related' in preferencelist:
 941             yield (preferencelist.index('related'), part)
 942         candidate = None
 943         start = part.get_param('start')
 944         if start:
 945             for subpart in part.iter_parts():
 946                 if subpart['content-id'] == start:
 947                     candidate = subpart
 948                     break
 949         if candidate is None:
 950             subparts = part.get_payload()
 951             candidate = subparts[0] if subparts else None
 952         if candidate is not None:
 953             yield from self._find_body(candidate, preferencelist)
 954 
 955     def get_body(self, preferencelist=('related', 'html', 'plain')):
 956         """Return best candidate mime part for display as 'body' of message.
 957 
 958         Do a depth first search, starting with self, looking for the first part
 959         matching each of the items in preferencelist, and return the part
 960         corresponding to the first item that has a match, or None if no items
 961         have a match.  If 'related' is not included in preferencelist, consider
 962         the root part of any multipart/related encountered as a candidate
 963         match.  Ignore parts with 'Content-Disposition: attachment'.
 964         """
 965         best_prio = len(preferencelist)
 966         body = None
 967         for prio, part in self._find_body(self, preferencelist):
 968             if prio < best_prio:
 969                 best_prio = prio
 970                 body = part
 971                 if prio == 0:
 972                     break
 973         return body
 974 
 975     _body_types = {('text', 'plain'),
 976                    ('text', 'html'),
 977                    ('multipart', 'related'),
 978                    ('multipart', 'alternative')}
 979     def iter_attachments(self):
 980         """Return an iterator over the non-main parts of a multipart.
 981 
 982         Skip the first of each occurrence of text/plain, text/html,
 983         multipart/related, or multipart/alternative in the multipart (unless
 984         they have a 'Content-Disposition: attachment' header) and include all
 985         remaining subparts in the returned iterator.  When applied to a
 986         multipart/related, return all parts except the root part.  Return an
 987         empty iterator when applied to a multipart/alternative or a
 988         non-multipart.
 989         """
 990         maintype, subtype = self.get_content_type().split('/')
 991         if maintype != 'multipart' or subtype == 'alternative':
 992             return
 993         parts = self.get_payload()
 994         if maintype == 'multipart' and subtype == 'related':
 995             # For related, we treat everything but the root as an attachment.
 996             # The root may be indicated by 'start'; if there's no start or we
 997             # can't find the named start, treat the first subpart as the root.
 998             start = self.get_param('start')
 999             if start:
1000                 found = False
1001                 attachments = []
1002                 for part in parts:
1003                     if part.get('content-id') == start:
1004                         found = True
1005                     else:
1006                         attachments.append(part)
1007                 if found:
1008                     yield from attachments
1009                     return
1010             parts.pop(0)
1011             yield from parts
1012             return
1013         # Otherwise we more or less invert the remaining logic in get_body.
1014         # This only really works in edge cases (ex: non-text relateds or
1015         # alternatives) if the sending agent sets content-disposition.
1016         seen = []   # Only skip the first example of each candidate type.
1017         for part in parts:
1018             maintype, subtype = part.get_content_type().split('/')
1019             if ((maintype, subtype) in self._body_types and
1020                     not part.is_attachment and subtype not in seen):
1021                 seen.append(subtype)
1022                 continue
1023             yield part
1024 
1025     def iter_parts(self):
1026         """Return an iterator over all immediate subparts of a multipart.
1027 
1028         Return an empty iterator for a non-multipart.
1029         """
1030         if self.get_content_maintype() == 'multipart':
1031             yield from self.get_payload()
1032 
1033     def get_content(self, *args, content_manager=None, **kw):
1034         if content_manager is None:
1035             content_manager = self.policy.content_manager
1036         return content_manager.get_content(self, *args, **kw)
1037 
1038     def set_content(self, *args, content_manager=None, **kw):
1039         if content_manager is None:
1040             content_manager = self.policy.content_manager
1041         content_manager.set_content(self, *args, **kw)
1042 
1043     def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1044         if self.get_content_maintype() == 'multipart':
1045             existing_subtype = self.get_content_subtype()
1046             disallowed_subtypes = disallowed_subtypes + (subtype,)
1047             if existing_subtype in disallowed_subtypes:
1048                 raise ValueError("Cannot convert {} to {}".format(
1049                     existing_subtype, subtype))
1050         keep_headers = []
1051         part_headers = []
1052         for name, value in self._headers:
1053             if name.lower().startswith('content-'):
1054                 part_headers.append((name, value))
1055             else:
1056                 keep_headers.append((name, value))
1057         if part_headers:
1058             # There is existing content, move it to the first subpart.
1059             part = type(self)(policy=self.policy)
1060             part._headers = part_headers
1061             part._payload = self._payload
1062             self._payload = [part]
1063         else:
1064             self._payload = []
1065         self._headers = keep_headers
1066         self['Content-Type'] = 'multipart/' + subtype
1067         if boundary is not None:
1068             self.set_param('boundary', boundary)
1069 
1070     def make_related(self, boundary=None):
1071         self._make_multipart('related', ('alternative', 'mixed'), boundary)
1072 
1073     def make_alternative(self, boundary=None):
1074         self._make_multipart('alternative', ('mixed',), boundary)
1075 
1076     def make_mixed(self, boundary=None):
1077         self._make_multipart('mixed', (), boundary)
1078 
1079     def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1080         if (self.get_content_maintype() != 'multipart' or
1081                 self.get_content_subtype() != _subtype):
1082             getattr(self, 'make_' + _subtype)()
1083         part = type(self)(policy=self.policy)
1084         part.set_content(*args, **kw)
1085         if _disp and 'content-disposition' not in part:
1086             part['Content-Disposition'] = _disp
1087         self.attach(part)
1088 
1089     def add_related(self, *args, **kw):
1090         self._add_multipart('related', *args, _disp='inline', **kw)
1091 
1092     def add_alternative(self, *args, **kw):
1093         self._add_multipart('alternative', *args, **kw)
1094 
1095     def add_attachment(self, *args, **kw):
1096         self._add_multipart('mixed', *args, _disp='attachment', **kw)
1097 
1098     def clear(self):
1099         self._headers = []
1100         self._payload = None
1101 
1102     def clear_content(self):
1103         self._headers = [(n, v) for n, v in self._headers
1104                          if not n.lower().startswith('content-')]
1105         self._payload = None
1106 
1107 
1108 class EmailMessage(MIMEPart):
1109 
1110     def set_content(self, *args, **kw):
1111         super().set_content(*args, **kw)
1112         if 'MIME-Version' not in self:
1113             self['MIME-Version'] = '1.0'