1 # Copyright (C) 2001-2007 Python Software Foundation
   2 # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
   3 # Contact: email-sig@python.org
   4 
   5 """A parser of RFC 2822 and MIME email messages."""
   6 
   7 __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
   8            'FeedParser', 'BytesFeedParser']
   9 
  10 import warnings
  11 from io import StringIO, TextIOWrapper
  12 
  13 from email.feedparser import FeedParser, BytesFeedParser
  14 from email.message import Message
  15 from email._policybase import compat32
  16 
  17 
  18 
  19 class Parser:
  20     def __init__(self, _class=Message, *, policy=compat32):
  21         """Parser of RFC 2822 and MIME email messages.
  22 
  23         Creates an in-memory object tree representing the email message, which
  24         can then be manipulated and turned over to a Generator to return the
  25         textual representation of the message.
  26 
  27         The string must be formatted as a block of RFC 2822 headers and header
  28         continuation lines, optionally preceeded by a `Unix-from' header.  The
  29         header block is terminated either by the end of the string or by a
  30         blank line.
  31 
  32         _class is the class to instantiate for new message objects when they
  33         must be created.  This class must have a constructor that can take
  34         zero arguments.  Default is Message.Message.
  35 
  36         The policy keyword specifies a policy object that controls a number of
  37         aspects of the parser's operation.  The default policy maintains
  38         backward compatibility.
  39 
  40         """
  41         self._class = _class
  42         self.policy = policy
  43 
  44     def parse(self, fp, headersonly=False):
  45         """Create a message structure from the data in a file.
  46 
  47         Reads all the data from the file and returns the root of the message
  48         structure.  Optional headersonly is a flag specifying whether to stop
  49         parsing after reading the headers or not.  The default is False,
  50         meaning it parses the entire contents of the file.
  51         """
  52         feedparser = FeedParser(self._class, policy=self.policy)
  53         if headersonly:
  54             feedparser._set_headersonly()
  55         while True:
  56             data = fp.read(8192)
  57             if not data:
  58                 break
  59             feedparser.feed(data)
  60         return feedparser.close()
  61 
  62     def parsestr(self, text, headersonly=False):
  63         """Create a message structure from a string.
  64 
  65         Returns the root of the message structure.  Optional headersonly is a
  66         flag specifying whether to stop parsing after reading the headers or
  67         not.  The default is False, meaning it parses the entire contents of
  68         the file.
  69         """
  70         return self.parse(StringIO(text), headersonly=headersonly)
  71 
  72 
  73 
  74 class HeaderParser(Parser):
  75     def parse(self, fp, headersonly=True):
  76         return Parser.parse(self, fp, True)
  77 
  78     def parsestr(self, text, headersonly=True):
  79         return Parser.parsestr(self, text, True)
  80 
  81 
  82 class BytesParser:
  83 
  84     def __init__(self, *args, **kw):
  85         """Parser of binary RFC 2822 and MIME email messages.
  86 
  87         Creates an in-memory object tree representing the email message, which
  88         can then be manipulated and turned over to a Generator to return the
  89         textual representation of the message.
  90 
  91         The input must be formatted as a block of RFC 2822 headers and header
  92         continuation lines, optionally preceeded by a `Unix-from' header.  The
  93         header block is terminated either by the end of the input or by a
  94         blank line.
  95 
  96         _class is the class to instantiate for new message objects when they
  97         must be created.  This class must have a constructor that can take
  98         zero arguments.  Default is Message.Message.
  99         """
 100         self.parser = Parser(*args, **kw)
 101 
 102     def parse(self, fp, headersonly=False):
 103         """Create a message structure from the data in a binary file.
 104 
 105         Reads all the data from the file and returns the root of the message
 106         structure.  Optional headersonly is a flag specifying whether to stop
 107         parsing after reading the headers or not.  The default is False,
 108         meaning it parses the entire contents of the file.
 109         """
 110         fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
 111         with fp:
 112             return self.parser.parse(fp, headersonly)
 113 
 114 
 115     def parsebytes(self, text, headersonly=False):
 116         """Create a message structure from a byte string.
 117 
 118         Returns the root of the message structure.  Optional headersonly is a
 119         flag specifying whether to stop parsing after reading the headers or
 120         not.  The default is False, meaning it parses the entire contents of
 121         the file.
 122         """
 123         text = text.decode('ASCII', errors='surrogateescape')
 124         return self.parser.parsestr(text, headersonly)
 125 
 126 
 127 class BytesHeaderParser(BytesParser):
 128     def parse(self, fp, headersonly=True):
 129         return BytesParser.parse(self, fp, headersonly=True)
 130 
 131     def parsebytes(self, text, headersonly=True):
 132         return BytesParser.parsebytes(self, text, headersonly=True)