Source code for matchlight.record

"""An interface for creating and retrieving PII records in Matchlight."""
from __future__ import absolute_import

import io
import json

import six

import matchlight.error
import matchlight.utils

from pylibfp import (
    fingerprint,
    fingerprints_pii_address_variants,
    fingerprints_pii_city_state_zip_variants,
    fingerprints_pii_credit_card,
    fingerprints_pii_email_address,
    fingerprints_pii_iban,
    fingerprints_pii_name_variants,
    fingerprints_pii_phone_number,
    fingerprints_pii_ssn,
    MODE_CODE,
    OPTIONS_TILED,
)


__all__ = (
    'Record',
    'RecordMethods',
)


MAX_DOCUMENT_FINGERPRINTS = 840


[docs]class Record(object): """Represents a personal information record.""" def __init__(self, id, name, description, ctime=None, mtime=None, metadata=None): """Initializes a new personal information record. Args: id (:obj:`str`): A 128-bit UUID. name (:obj:`str`): The name of the record. description (:obj:`str`): The description of the record. ctime (:obj:`int`, optional): A Unix timestamp of the record creation timestamp. mtime (:obj:`int`, optional): A Unix timestamp of the record last modification date timestamp. """ if metadata is None: metadata = {} self.id = id self.name = name self.description = description self.ctime = ctime self.mtime = mtime self.metadata = metadata
[docs] @classmethod def from_mapping(cls, mapping): """Creates a new project instance from the given mapping.""" return cls( id=mapping['id'], name=mapping['name'], description=mapping['description'], ctime=mapping['ctime'], mtime=mapping['mtime'], metadata=mapping['metadata'], )
@property def user_provided_id(self): """:obj:`int`: The user provided record identifier.""" return self.metadata.get('user_record_id', None) @property def details(self): """:obj:`dict`: Returns the feed details as a mapping.""" return { 'id': self.id, 'name': self.name, 'description': self.description, 'ctime': self.ctime, 'mtime': self.mtime, 'metadata': self.metadata, } def __repr__(self): # pragma: no cover return '<Record(name="{}", id="{}")>'.format(self.name, self.id)
[docs]class RecordMethods(object): """Provides methods for interfacing with the records API. Examples: Get record by record id:: >>> record = ml.records.get("0760570a2c4a4ea68d526f58bab46cbd") >>> record <Record(name="pce****@terbiumlabs.com", id="0760570a2c4a4ea68d526f58bab46cbd")> Add PII records to a project:: >>> pii_project = ml.projects.add( ... name="Employee Database May 2016", ... project_type="pii") >>> record_data = { ... "first_name": "Bird", ... "last_name": "Feather", ... "email": "familybird@teribumlabs.com", ... } >>> new_record = ml.records.add_pii( ... pii_project, ... "uploaded on 20160519", ... **record_data) Delete a record:: >>> record <Record(name="fam****@terbiumlabs.com", id="655a732ad0f243beab1801651c2088a3")> >>> ml.record.delete(record) """ def __init__(self, ml_connection): # noqa: D205,D400 """Initializes a records interface with the given Matchlight connection. Args: ml_connection (:class:`~.Connection`): A Matchlight connection instance. """ self.conn = ml_connection
[docs] def all(self): """Returns all records associated with the account.""" return self.filter()
[docs] def add_document(self, project, name, description, content, user_record_id='-', min_score=None, offline=False): """Creates a new document record in the given project. Args: project (:class:`~.Project`): Project object to associate with record. name (:obj:`str`): The name of the document (not fingerprinted). description (:obj:`str`): A description of the record (not fingerprinted). content (:obj:`str`): The text of the document to be fingerprinted. Must be 840 characters or less. user_record_id (:obj:`str`, optional): An optional, user provided custom record identifier. Defaults to :obj:`NoneType`. offline (:obj:`bool`, optional): Run in "offline mode". No data is sent to the Matchlight server. Returns a dictionary of values instead of a :class:`~.Report` instance. Returns: :class:`~.Record`: Created record with metadata. """ if len(content) > MAX_DOCUMENT_FINGERPRINTS: raise matchlight.error.SDKError( 'Fingerprinter Failed: the maximum length of a Document record' ' is 840 characters.' ) result_json = fingerprint(content, flags=OPTIONS_TILED) result = json.loads(result_json) fingerprints = result['data']['fingerprints'] data = { 'name': name, 'desc': description, 'user_record_id': user_record_id, 'fingerprints': fingerprints, 'metadata': { 'fingerprinting_tool_name': 'Python SDK', 'fingerprinting_tool_version': matchlight.__version__ } } if min_score is not None: data['metadata']['min_score'] = str(min_score) if offline: return data else: return self.add_document_from_fingerprints(project, data)
[docs] def add_document_from_fingerprints(self, project, fingerprint_data): """Add a document record from fingerprints. Add a document record from fingerprinted data generated by the :class:`~/.Record.add_pii` in offline mode. Args: project (:class:`~.Project`): Project object to associate with record. fingerprint_data (:obj:`dict`): The output of :class:`~/.Record.add_document(offline=True)` """ response = self.conn.request( '/records/upload/document/{upload_token}'.format( upload_token=project.upload_token ), data=json.dumps(fingerprint_data) ) return Record.from_mapping(response.json())
[docs] def add_pii(self, project, description, email, first_name=None, middle_name=None, last_name=None, ssn=None, address=None, city=None, state=None, zipcode=None, phone=None, credit_card=None, iban=None, user_record_id='-', offline=False): """Creates a new PII record in the given project. Args: project (:class:`~.Project`): Project object to associate with record. description (:obj:`str`): A description of the record (not fingerprinted). email (:obj:`str`, optional): An email address. first_name (:obj:`str`, optional): Defaults to :obj:`NoneType`. middle_name (:obj:`str`, optional): Defaults to :obj:`NoneType`. last_name (:obj:`str`, optional): Defaults to :obj:`NoneType`. ssn (:obj:`str`, optional): Defaults to :obj:`NoneType`. address (:obj:`str`, optional): Defaults to :obj:`NoneType`. city (:obj:`str`, optional): Defaults to :obj:`NoneType`. state (:obj:`str`, optional): Defaults to :obj:`NoneType`. zipcode (int, optional): Defaults to :obj:`NoneType`. phone (:obj:`str`, optional): Defaults to :obj:`NoneType`. credit_card (:obj:`str`, optional): Defaults to :obj:`NoneType`. iban (:obj:`str`, optional): Defaults to :obj:`NoneType`. user_record_id (:obj:`str`, optional): An optional, user provided custom record identifier. Defaults to :obj:`NoneType`. offline (:obj:`bool`, optional): Run in "offline mode". No data is sent to the Matchlight server. Returns a dictionary of values instead of a :class:`~.Report` instance. Returns: :class:`~.Record`: Created record with metadata. """ if first_name is not None and last_name is None: raise matchlight.error.SDKError( 'Fingerprinter Failed: the last_name argument is required ' 'along with the first_name argument.' ) if first_name is None and last_name is not None: raise matchlight.error.SDKError( 'Fingerprinter Failed: the first_name argument is required ' 'along with the last_name argument.' ) data = { 'desc': description, 'user_record_id': user_record_id, 'blinded_first': matchlight.utils.blind_name(first_name), 'blinded_last': matchlight.utils.blind_name(last_name), 'blinded_email': matchlight.utils.blind_email(email), 'metadata': { 'fingerprinting_tool_name': 'Python SDK', 'fingerprinting_tool_version': matchlight.__version__ } } if any((first_name, middle_name, last_name)): name_fingerprints = fingerprints_pii_name_variants( first_name or '', middle_name or None, last_name or '') data['name_fingerprints'] = name_fingerprints if email: email_fingerprints = fingerprints_pii_email_address(email) data['email_fingerprints'] = email_fingerprints data['blinded_email'] = matchlight.utils.blind_email(email) data['name'] = matchlight.utils.blind_email(email) if ssn: ssn_fingerprints = [fingerprints_pii_ssn(ssn)] data['ssn_fingerprints'] = ssn_fingerprints if address: address_fingerprints = fingerprints_pii_address_variants( address) data['street_address_fingerprints'] = address_fingerprints if any((city, state, zipcode)): csz_fingerprints = fingerprints_pii_city_state_zip_variants( *[six.text_type(text) if text is not None else '' for text in (city, state, zipcode)]) data['city_state_zip_fingerprints'] = csz_fingerprints if phone: phone_fingerprints = fingerprints_pii_phone_number(phone) data['phone_fingerprints'] = [phone_fingerprints] if credit_card: cc_fingerprints = fingerprints_pii_credit_card(credit_card) data['credit_card_fingerprints'] = [cc_fingerprints] if iban: iban_fingerprints = fingerprints_pii_iban(iban) data['iban_fingerprints'] = iban_fingerprints if offline: return data else: return self.add_pii_from_fingerprints(project, data)
[docs] def add_pii_from_fingerprints(self, project, fingerprint_data): """Add a PII record from fingerprints. Add a PII record from fingerprinted data generated by the :class:`~/.Record.add_pii` in offline mode. Args: project (:class:`~.Project`): Project object to associate with record. fingerprint_data (:obj:`dict`): The output of :class:`~/.Record.add_pii(offline=True)` """ response = self.conn.request( '/records/upload/pii/{}'.format( project.upload_token ), data=json.dumps(fingerprint_data) ) return Record.from_mapping(response.json())
[docs] def add_source_code(self, project, name, description, code_path, min_score=None, offline=False): """Creates a new source code record in the given project. Args: project (:class:`~.Project`): Project object to associate with record. name (:obj:`str`): The name of the file (not fingerprinted). description (:obj:`str`): A description of the code (not fingerprinted). code_path (:obj:`str`): The location of the source code. Code must be 840 characters or less. user_record_id (:obj:`str`, optional): An optional, user provided custom record identifier. Defaults to :obj:`NoneType`. offline (:obj:`bool`, optional): Run in "offline mode". No data is sent to the Matchlight server. Returns a dictionary of values instead of a :class:`~.Report` instance. Returns: :class:`~.Record`: Created record with metadata. """ with io.open(code_path, 'r', encoding='utf-8') as document: content = document.read() if len(content) > MAX_DOCUMENT_FINGERPRINTS: raise matchlight.error.SDKError( 'Fingerprinter Failed: the maximum length of a Source Code ' 'record is 840 characters.' ) result_json = fingerprint(content, flags=OPTIONS_TILED, mode=MODE_CODE) result = json.loads(result_json) fingerprints = result['data']['fingerprints'] data = { 'name': name, 'desc': description, 'fingerprints': fingerprints, 'metadata': { 'fingerprinting_tool_name': 'Python SDK', 'fingerprinting_tool_version': matchlight.__version__ } } if min_score is not None: data['metadata']['min_score'] = str(min_score) if offline: return data else: return self.add_source_code_from_fingerprints(project, data)
[docs] def add_source_code_from_fingerprints(self, project, fingerprint_data): """Add a source code record from fingerprints. Add a souce code record from fingerprinted data generated by the :class:`~/.Record.add_source_code` in offline mode. Args: project (:class:`~.Project`): Project object to associate with record. fingerprint_data (:obj:`dict`): The output of :class:`~/.Record.add_source_code(offline=True)` """ response = self.conn.request( '/records/upload/source_code/{}'.format( upload_token=project.upload_token ), data=json.dumps(fingerprint_data) ) return Record.from_mapping(response.json())
[docs] def delete(self, record_or_id): """Delete a fingerprinted record. Args: record_or_id (:class:`~.Record` or :obj:`str`): The record object or identifier to be deleted. Returns: :obj:`NoneType` """ if isinstance(record_or_id, Record): record_upload_token = record_or_id.id else: record_upload_token = record_or_id self.conn.request('/record/{}/delete'.format(record_upload_token), data=json.dumps({}))
[docs] def filter(self, project=None): """Returns a list of records. Providing an optional **project** keyword argument will only return records that are associated with a specific project. Example: Request all records:: >>> my_project <Project(name="Super Secret Algorithm", type="source_code")> >>> ml.records.filter(project=my_project) [<Record(name="fam****@fakeemail.com", id="625a732ad0f247beab18595z951c2088a3")>, Record(name="pce****@fakeemail.com", id="f9427dd5a24d4a98b2069004g04c2977")] Args: project (:class:`~.Project`, optional): a project object. Defaults to all projects if not specified. Returns: :obj:`list` of :class:`~.Record`: List of records that are associated with a project. """ if project is not None: upload_token = project.upload_token else: upload_token = None response = self.conn.request('/records', params={ 'upload_token': upload_token}) records = [] for payload in response.json().get('data', []): records.append(Record( id=payload['id'], name=payload['name'], description=payload['description'], ctime=int(payload['ctime']), mtime=int(payload['mtime']), )) return records
[docs] def get(self, record_id): """Returns a record by the given record ID. Args: record_id (:obj:`str`): The record identifier. Returns: :class:`~.Record`: A record instance. """ return next((record for record in self.filter() if record.id == record_id), None)
def __iter__(self): return iter(self.filter())