diff --git a/extractor.py b/extractor.py index 6051173..95d60bf 100644 --- a/extractor.py +++ b/extractor.py @@ -6,6 +6,8 @@ and https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/ Initially grabbed from : https://gist.github.com/abought/15a1e08705b121c1b7bd + +Version: 1.0 on 22/06/2023 """ __author__ = 'mj' @@ -21,6 +23,7 @@ from dotenv import load_dotenv from email.message import EmailMessage from pprint import pprint as pp import boto3 +from datetime import date # Load environment variables from .env file load_dotenv() @@ -32,8 +35,9 @@ load_dotenv() SEARCH_FOLDER = ['"Trash"', '"INBOX"'] DEFAULT_MAIL_SERVER = os.getenv("EMAIL_SERVER") -# Output file name -OUTPUT_FILE = "recipient_list.txt" +# Output file name with date timestamp +today = date.today().strftime("%Y-%m-%d") +OUTPUT_FILE = f"recipient_list_{today}.txt" # Email settings SENDER_SMTP = os.getenv("SENDER_SMTP") @@ -45,7 +49,8 @@ RECIPIENT_EMAIL = os.getenv("RECIPIENT_EMAIL") AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") -S3_OUTPUT_FILE_KEY = "recipient_list.txt" +S3_BUCKET_REGION = os.getenv("S3_BUCKET_REGION") +S3_OUTPUT_FILE_KEY = f"subdirectory/recipient_list_{today}.txt" # Modify the subdirectory path here # No user parameters below this line ADDR_PATTERN = re.compile("<(.+)>") # Finds email as @@ -117,7 +122,12 @@ def get_recipients(msg): def publish_to_s3(bucket_name, file_name, data): - s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) + s3 = boto3.client( + 's3', + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, + region_name=S3_BUCKET_REGION + ) s3.put_object(Body=data, Bucket=bucket_name, Key=file_name) @@ -152,31 +162,31 @@ if __name__ == "__main__": for recipient in unique_recipients: file.write(recipient + '\n') - # Delete the analyzed emails - for folder in SEARCH_FOLDER: - for mail_id in get_mails_from_folder(mail_conn, folder): - mail_conn.store(mail_id, '+FLAGS', '\\Deleted') # Mark the email as deleted - mail_conn.expunge() # Permanently remove the deleted emails + # Publish the output file to S3 + with open(OUTPUT_FILE, 'rb') as file: + output_data = file.read() + publish_to_s3(S3_BUCKET_NAME, S3_OUTPUT_FILE_KEY, output_data) - # Close the connection - mail_conn.close() - mail_conn.logout() + # Delete the analyzed emails + for folder in SEARCH_FOLDER: + for mail_id in get_mails_from_folder(mail_conn, folder): + mail_conn.store(mail_id, '+FLAGS', '\\Deleted') # Mark the email as deleted + mail_conn.expunge() # Permanently remove the deleted emails - # Publish the output file to S3 - with open(OUTPUT_FILE, 'rb') as file: - output_data = file.read() - publish_to_s3(S3_BUCKET_NAME, S3_OUTPUT_FILE_KEY, output_data) + # Close the connection + mail_conn.close() + mail_conn.logout() - # Send email with the count of unique addresses - msg = EmailMessage() - msg['Subject'] = 'Mail extractor unique recipient count' - msg['From'] = SENDER_EMAIL - msg['To'] = RECIPIENT_EMAIL - msg.set_content(f"The count of unique recipients is: {unique_recipient_count}") + # Send email with the count of unique addresses + msg = EmailMessage() + msg['Subject'] = 'Mail extractor unique recipient count' + msg['From'] = SENDER_EMAIL + msg['To'] = RECIPIENT_EMAIL + msg.set_content(f"The count of unique recipients is: {unique_recipient_count}") - with smtplib.SMTP(SENDER_SMTP, 587) as server: - server.starttls() - server.login(SENDER_EMAIL, SENDER_PASSWORD) - server.send_message(msg) + with smtplib.SMTP(SENDER_SMTP, 587) as server: + server.starttls() + server.login(SENDER_EMAIL, SENDER_PASSWORD) + server.send_message(msg) - print("Recipient list generated successfully, analyzed emails erased, and email sent.") + print("Recipient list generated successfully, analyzed emails erased, and email sent.")