diff --git a/extractor.py b/extractor.py index abe7dad..c095d07 100644 --- a/extractor.py +++ b/extractor.py @@ -22,9 +22,12 @@ from pprint import pprint as pp # Search folders, multiple directories can be given # TODO: A user will want to change this -SEARCH_FOLDER = ['"Trash"', '"INBOX"'] +SEARCH_FOLDER = ['"Trash"', '"INBOX"'] DEFAULT_MAIL_SERVER = 'mail.antipode.net' +# Output file name +OUTPUT_FILE = "recipient_list.txt" + # No user parameters below this line ADDR_PATTERN = re.compile("<(.+)>") # Finds email as @@ -32,7 +35,7 @@ ADDR_PATTERN = re.compile("<(.+)>") # Finds email as def connect(user, pwd, server=DEFAULT_MAIL_SERVER): """Connect to [the specified] mail server. Return an open connection""" conn = imaplib.IMAP4_SSL(host=server, - ssl_context=ssl.create_default_context()) + ssl_context=ssl.create_default_context()) try: conn.login(user, pwd) except imaplib.IMAP4.error: @@ -50,7 +53,7 @@ def print_folders(conn): def get_mails_from_folder(conn, folder_name): """Fetch a specific folder (or label) from server""" - typ, data = conn.select(mailbox=folder_name, readonly=True) + typ, data = conn.select(mailbox=folder_name, readonly=False) # Set readonly=False to mark messages as seen if typ != 'OK': print("Could not open specified folder. Known labels:") print_folders(conn) @@ -63,6 +66,7 @@ def get_mails_from_folder(conn, folder_name): return data[0].split() + def fetch_message(conn, msg_uid): """ Fetch a specific message uid (not sequential id!) from the given folder; @@ -96,32 +100,36 @@ def get_recipients(msg): if __name__ == "__main__": username = input("Enter username: ") - password = input("Enter password: ") + password = getpass.getpass("Enter password: ") # Use getpass to hide the password input # Connect mail_conn = connect(username, password) - # show folders of mail account - #print_folders(mail_conn) + # Open the output file in write mode + with open(OUTPUT_FILE, 'w') as file: + # Open folders and get list of email message uids + all_recipients = [] + for folder in SEARCH_FOLDER: + # switch to folder + for mail_id in get_mails_from_folder(mail_conn, folder): + data = fetch_message(mail_conn, mail_id) + recip_list = get_recipients(data) + all_recipients.extend(recip_list) - # Open folders and get list of email message uids - all_recipients = [] + unique_recipients = set(all_recipients) + + # Write each recipient on a new line in the output file + for recipient in unique_recipients: + file.write(recipient + '\n') + + # Delete the analyzed emails for folder in SEARCH_FOLDER: - # switch to folder for mail_id in get_mails_from_folder(mail_conn, folder): - data = fetch_message(mail_conn, mail_id) - recip_list = get_recipients(data) - all_recipients.extend(recip_list) - - mail_conn.close() + mail_conn.store(mail_id, '+FLAGS', '\\Deleted') # Mark the email as deleted + mail_conn.expunge() # Permanently remove the deleted emails + # Close the connection + mail_conn.close() mail_conn.logout() - # Very unsophisticated way of showing the recipient list - print("List of all recipients:") - print("------------") - pp(all_recipients) - - print("\n\n List of all UNIQUE recipients:") - print("-------------------------------") - pp(set(all_recipients)) \ No newline at end of file + print("Recipient list generated successfully and analyzed emails erased.")