diff --git a/extractor.py b/extractor.py index 02db35b..6051173 100644 --- a/extractor.py +++ b/extractor.py @@ -1,4 +1,4 @@ -"""Create a connection to IMAPS mail account extract all unique adresses and output a single file with the results. +"""Create a connection to IMAPS mail account extract all unique adresses and output a single file with the results to a S3 bucket. References: http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/ @@ -20,6 +20,7 @@ import smtplib from dotenv import load_dotenv from email.message import EmailMessage from pprint import pprint as pp +import boto3 # Load environment variables from .env file load_dotenv() @@ -40,6 +41,12 @@ SENDER_EMAIL = os.getenv("SENDER_EMAIL") SENDER_PASSWORD = os.getenv("SENDER_PASSWORD") RECIPIENT_EMAIL = os.getenv("RECIPIENT_EMAIL") +# S3 bucket settings +AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") +AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") +S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") +S3_OUTPUT_FILE_KEY = "recipient_list.txt" + # No user parameters below this line ADDR_PATTERN = re.compile("<(.+)>") # Finds email as @@ -109,6 +116,11 @@ def get_recipients(msg): return recipients +def publish_to_s3(bucket_name, file_name, data): + s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) + s3.put_object(Body=data, Bucket=bucket_name, Key=file_name) + + if __name__ == "__main__": # Retrieve username and password from environment variables username = os.getenv("EMAIL_USERNAME") @@ -150,6 +162,11 @@ if __name__ == "__main__": mail_conn.close() mail_conn.logout() + # Publish the output file to S3 + with open(OUTPUT_FILE, 'rb') as file: + output_data = file.read() + publish_to_s3(S3_BUCKET_NAME, S3_OUTPUT_FILE_KEY, output_data) + # Send email with the count of unique addresses msg = EmailMessage() msg['Subject'] = 'Mail extractor unique recipient count'