"""An interface for downloading and filtering Matchlight feeds."""
from __future__ import absolute_import, print_function
import datetime
import io
import json
import time
import six
import matchlight.error
import matchlight.utils
if not six.PY3:
from backports import csv
else:
import csv
[docs]class Feed(object):
"""Represents a Matchlight Data Feed.
Examples:
>>> ml = matchlight.Matchlight()
>>> feed = ml.feeds.filter()[0]
>>> feed
<Feed(name="CompanyEmailAddress", recent_alerts=2)>
>>> feed.details
{'description': None, 'name': u'CompanyEmailAddress',
'recent_alerts_count': 2,
'start_timestamp': '2016-06-03T00:00:00',
'stop_timestamp': None}
Attributes:
description (:obj:`str`): Description of the feed.
name (:obj:`str`): Name of the feed.
recent_alerts_count (int): Number of recent alerts.
start_timestamp (:class:`datetime.datetime`): Start time of the
feed.
stop_timestamp (:class:`datetime.datetime`): Stop time of the
feed.
"""
def __init__(self, name, description, recent_alerts_count,
start_timestamp, stop_timestamp=None):
"""Initializes a new Matchlight feed.
Args:
description (:obj:`str`): Description of the feed.
name (:obj:`str`): Name of the feed.
recent_alerts_count (int): Number of recent alerts.
start_timestamp (:obj:`str`): ISO 8601 formatted timestamp
of when feed collection began.
stop_timestamp (:obj:`str`, optional): ISO 8601 formatted
timestamp of when feed collection ended or will end. If
not provided, the feed is assumed to never expired.
"""
self.name = name
self.description = description
self.recent_alerts_count = recent_alerts_count
self.start_timestamp = start_timestamp
self.stop_timestamp = stop_timestamp
@property
def details(self):
""":obj:`dict`: Returns the feed details as a mapping."""
return {
'name': self.name,
'description': self.description,
'recent_alerts_count': self.recent_alerts_count,
'start_timestamp': self.start_timestamp,
'stop_timestamp': self.stop_timestamp,
}
@property
def start(self):
""":class:`datetime.datetime`: When feed data collection began."""
return datetime.datetime.fromtimestamp(self.start_timestamp)
@property
def end(self): # noqa: D205,D400
""":obj:`NoneType` or :class:`datetime.datetime`: If the feed
has a ``stop_timestamp``, returns a datetime object. Otherwise,
returns :obj:`NoneType`.
"""
if self.stop_timestamp:
return datetime.datetime.fromtimestamp(self.stop_timestamp)
def __repr__(self): # pragma: no cover
return '<Feed(name="{name}", recent_alerts={alerts})>'.format(
alerts=self.recent_alerts_count, name=self.name)
[docs]class FeedMethods(object):
"""Provides methods for interfacing with the feeds API."""
def __init__(self, ml_connection): # noqa: D205,D400
"""Initializes a feed interface with the given Matchlight
connection.
Args:
ml_connection (:class:`~.Connection`): A Matchlight
connection instance.
"""
self.conn = ml_connection
[docs] def all(self):
"""Returns a list of feeds associated with a Matchlight account.
Returns:
:obj:`list` of :class:`matchlight.Feed`: A list of feeds
associated with an account.
"""
r = self.conn.request('/feeds')
return [Feed(**feed) for feed in r.json()['feeds']]
[docs] def counts(self, feed, start_date, end_date):
"""Daily counts for a feed for a given date range.
Args:
feed (:class:`~.Feed`): A feed instance or feed name.
start_date (:class:`datetime.datetime`): Start of date range.
end_date (:class:`datetime.datetime`): End of date range.
Returns:
:obj:`dict`: Mapping of dates (``YYYY-MM-DD``) to alert counts.
"""
if isinstance(feed, six.string_types):
feed_name = feed
else:
feed_name = feed.name
data = {
'start_date': int(matchlight.utils.datetime_to_unix(start_date)),
'end_date': int(matchlight.utils.datetime_to_unix(end_date)),
}
response = self.conn.request(
'/feeds/{feed_name}'.format(feed_name=feed_name),
data=json.dumps(data))
return self._format_count(response.json())
[docs] def download(self, feed, start_date, end_date, save_path=None):
"""Downloads feed data for the given date range.
Args:
feed (:class:`~.Feed`): A feed instance or feed name.
start_date (:class:`datetime.datetime`): Start of date range.
end_date (:class:`datetime.datetime`): End of date range.
save_path (:obj:`str`): Path to output file.
Returns:
:obj:`list` of :obj:`dict`: All feed hits for the given range.
"""
if isinstance(feed, six.string_types):
feed_name = feed
else:
feed_name = feed.name
data = {
'start_date': int(matchlight.utils.datetime_to_unix(start_date)),
'end_date': int(matchlight.utils.datetime_to_unix(end_date)),
}
response = self.conn.request(
'/feed/{feed_name}/prepare'.format(feed_name=feed_name),
data=json.dumps(data))
if response.status_code != 200:
raise matchlight.error.SDKError(
'Feed failed to be generated. Please try again later.')
data = {'feed_response_id': response.json().get('feed_response_id')}
status = 'pending'
while status == 'pending':
response = self.conn.request(
'/feed/{feed_name}/link'.format(feed_name=feed_name),
data=json.dumps(data))
status = response.json().get('status', None)
time.sleep(1)
# TODO: backoff and timeout
if status == 'failed':
raise matchlight.error.SDKError(
'Feed failed to be generated. Please try again later.')
elif status == 'ready':
content = self.conn._request('GET', response.json().get('url'))
else:
raise matchlight.error.SDKError('An unknown error occurred.')
if save_path:
with io.open(save_path, 'wb') as f:
f.write(content.content)
else:
unicode_feed = content.content.decode('utf-8-sig')
return [
self._format_feed(row)
for row in csv.DictReader(unicode_feed.split('\n'))
]
def _format_count(self, counts):
return {
datetime.datetime.fromtimestamp(int(k)).strftime('%Y-%m-%d'): v
for k, v in counts.items()
}
def _format_feed(self, feed_row):
feed_row['ts'] = matchlight.utils.terbium_timestamp_to_datetime(
feed_row['ts'])
return feed_row
def __iter__(self):
return (item for item in self.all())