Three caveats:
1. If you need to download a large number of e-mail headers, it will take some time (maybe several minutes).
2. It sometimes gets the dates wrong. However, this seems to occur only in a statistically insignificant fraction of cases.
3. Running this will mark all the messages it accesses as read. I'm sure there's a way to avoid this, but haven't had time to track it down.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dateutil.parser | |
import matplotlib.dates as mdates | |
import imaplib, getpass, re | |
from email.parser import HeaderParser | |
import matplotlib.pyplot as pl | |
import numpy as np | |
import sys | |
def connect(email): | |
imap = imaplib.IMAP4_SSL("imap.gmail.com") | |
password = getpass.getpass("Enter your password: ") | |
imap.login(email, password) | |
return imap | |
def disconnect(imap): | |
imap.logout() | |
def parse_uid(data): | |
pattern_uid = re.compile('\d+ \(UID (?P<uid>\d+)\)') | |
match = pattern_uid.match(data) | |
return match.group('uid') | |
def plotdates(dnums): | |
"""Given a set of dates as numerical values, plot a histogram | |
by month.""" | |
ddates=mdates.num2date(dnums) | |
mindate=mdates.num2date(dnums.min()) | |
maxdate=mdates.num2date(dnums.max()) | |
y,m=mindate.year,mindate.month | |
months=[mdates.datetime.datetime(y,m,1)] | |
counts=[sum([1 if (dd.year==y and dd.month==m) else 0 for dd in ddates])] | |
while y<maxdate.year: | |
while m<12: | |
m=m+1 | |
months.append(mdates.datetime.datetime(y,m,1)) | |
counts.append(sum([1 if (dd.year==y and dd.month==m) else 0 | |
for dd in ddates])) | |
y=y+1; m=0 | |
while m<maxdate.month: | |
m=m+1 | |
months.append(mdates.datetime.datetime(y,m,1)) | |
counts.append(sum([1 if (dd.year==y and dd.month==m) else 0 | |
for dd in ddates])) | |
fig=pl.figure() | |
ax=fig.add_subplot(111) | |
ax.bar(months,counts,width=20.0) | |
for xlabel_i in ax.get_xticklabels(): | |
xlabel_i.set_fontsize(30) | |
for ylabel_i in ax.get_yticklabels(): | |
ylabel_i.set_fontsize(30) | |
ax.xaxis_date() | |
fig.autofmt_xdate() | |
pl.draw() | |
return counts,months | |
def getdates(imap,mailboxname): | |
imap.select(mailbox = mailboxname) | |
print 'searching...'; sys.stdout.flush() | |
resp, items = imap.search(None, 'All') | |
email_ids = ','.join(items[0].split()) | |
email_id_list = [int(ID) for ID in items[0].split()] | |
print len(email_id_list), 'messages in ', mailboxname; sys.stdout.flush() | |
print 'fetching headers...'; sys.stdout.flush() | |
resp, data = imap.fetch(email_ids, "(BODY[HEADER.FIELDS (DATE)])") | |
print str(len(data)/2),' headers fetched from ', mailboxname; sys.stdout.flush() | |
print 'reformatting dates...'; sys.stdout.flush() | |
baddates = [data[2*i][1] for i in range(len(data)/2)] | |
dates = [' '.join(date.split()[1:5]) for date in baddates] | |
disconnect(imap) | |
print 'converting dates...'; sys.stdout.flush() | |
dtimes = [dateutil.parser.parse(date) for date in dates] | |
dnums = mdates.date2num(dtimes) | |
return dnums | |
if __name__ == '__main__': | |
mailboxname='everything' | |
imap = connect('<your.email.here>') | |
dnums = getdates(imap,mailboxname) | |
print 'plotting...'; sys.stdout.flush() | |
plotdates(dnums) |