1 # Copyright (C) 2001-2007 Python Software Foundation 2 # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter 3 # Contact: email-sig@python.org 4 5 """A parser of RFC 2822 and MIME email messages.""" 6 7 __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 8 'FeedParser', 'BytesFeedParser'] 9 10 import warnings 11 from io import StringIO, TextIOWrapper 12 13 from email.feedparser import FeedParser, BytesFeedParser 14 from email.message import Message 15 from email._policybase import compat32 16 17 18 19 class Parser: 20 def __init__(self, _class=Message, *, policy=compat32): 21 """Parser of RFC 2822 and MIME email messages. 22 23 Creates an in-memory object tree representing the email message, which 24 can then be manipulated and turned over to a Generator to return the 25 textual representation of the message. 26 27 The string must be formatted as a block of RFC 2822 headers and header 28 continuation lines, optionally preceeded by a `Unix-from' header. The 29 header block is terminated either by the end of the string or by a 30 blank line. 31 32 _class is the class to instantiate for new message objects when they 33 must be created. This class must have a constructor that can take 34 zero arguments. Default is Message.Message. 35 36 The policy keyword specifies a policy object that controls a number of 37 aspects of the parser's operation. The default policy maintains 38 backward compatibility. 39 40 """ 41 self._class = _class 42 self.policy = policy 43 44 def parse(self, fp, headersonly=False): 45 """Create a message structure from the data in a file. 46 47 Reads all the data from the file and returns the root of the message 48 structure. Optional headersonly is a flag specifying whether to stop 49 parsing after reading the headers or not. The default is False, 50 meaning it parses the entire contents of the file. 51 """ 52 feedparser = FeedParser(self._class, policy=self.policy) 53 if headersonly: 54 feedparser._set_headersonly() 55 while True: 56 data = fp.read(8192) 57 if not data: 58 break 59 feedparser.feed(data) 60 return feedparser.close() 61 62 def parsestr(self, text, headersonly=False): 63 """Create a message structure from a string. 64 65 Returns the root of the message structure. Optional headersonly is a 66 flag specifying whether to stop parsing after reading the headers or 67 not. The default is False, meaning it parses the entire contents of 68 the file. 69 """ 70 return self.parse(StringIO(text), headersonly=headersonly) 71 72 73 74 class HeaderParser(Parser): 75 def parse(self, fp, headersonly=True): 76 return Parser.parse(self, fp, True) 77 78 def parsestr(self, text, headersonly=True): 79 return Parser.parsestr(self, text, True) 80 81 82 class BytesParser: 83 84 def __init__(self, *args, **kw): 85 """Parser of binary RFC 2822 and MIME email messages. 86 87 Creates an in-memory object tree representing the email message, which 88 can then be manipulated and turned over to a Generator to return the 89 textual representation of the message. 90 91 The input must be formatted as a block of RFC 2822 headers and header 92 continuation lines, optionally preceeded by a `Unix-from' header. The 93 header block is terminated either by the end of the input or by a 94 blank line. 95 96 _class is the class to instantiate for new message objects when they 97 must be created. This class must have a constructor that can take 98 zero arguments. Default is Message.Message. 99 """ 100 self.parser = Parser(*args, **kw) 101 102 def parse(self, fp, headersonly=False): 103 """Create a message structure from the data in a binary file. 104 105 Reads all the data from the file and returns the root of the message 106 structure. Optional headersonly is a flag specifying whether to stop 107 parsing after reading the headers or not. The default is False, 108 meaning it parses the entire contents of the file. 109 """ 110 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') 111 with fp: 112 return self.parser.parse(fp, headersonly) 113 114 115 def parsebytes(self, text, headersonly=False): 116 """Create a message structure from a byte string. 117 118 Returns the root of the message structure. Optional headersonly is a 119 flag specifying whether to stop parsing after reading the headers or 120 not. The default is False, meaning it parses the entire contents of 121 the file. 122 """ 123 text = text.decode('ASCII', errors='surrogateescape') 124 return self.parser.parsestr(text, headersonly) 125 126 127 class BytesHeaderParser(BytesParser): 128 def parse(self, fp, headersonly=True): 129 return BytesParser.parse(self, fp, headersonly=True) 130 131 def parsebytes(self, text, headersonly=True): 132 return BytesParser.parsebytes(self, text, headersonly=True) |