concrete.metadata.ttypes module

class concrete.metadata.ttypes.AnnotationMetadata(tool=None, timestamp=None, digest=None, dependencies=None, kBest=1)

Bases: object

Metadata associated with an annotation or a set of annotations, that identifies where those annotations came from.

- tool

The name of the tool that generated this annotation.

- timestamp

The time at which this annotation was generated (in unix time

UTC – i.e., seconds since January 1, 1970).
  • digest: A Digest, carrying over any information the annotation metadata
wishes to carry over.
  • dependencies: The theories that supported this annotation.

An empty field indicates that the theory has no dependencies (e.g., an ingester).

  • kBest: An integer that represents a ranking for systems

that output k-best lists.

For systems that do not output k-best lists, the default value (1) should suffice.

read(iprot)
thrift_spec = (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))
validate()
write(oprot)
class concrete.metadata.ttypes.CommunicationMetadata(tweetInfo=None, emailInfo=None, nitfInfo=None)

Bases: object

Metadata specific to a particular Communication object. This might include corpus-specific metadata (from the Twitter API), attributes associated with the Communication (the author), or other information about the Communication.

- tweetInfo

Extra information for communications where kind==TWEET:

Information about this tweet that is provided by the Twitter API. For information about the Twitter API, see: https://dev.twitter.com/docs/platform-objects

  • emailInfo: Extra information for communications where kind==EMAIL
  • nitfInfo: Extra information that may come from the NITF

(News Industry Text Format) schema. See ‘nitf.thrift’.

read(iprot)
thrift_spec = (None, (1, 12, 'tweetInfo', (<class 'concrete.twitter.ttypes.TweetInfo'>, (None, (1, 10, 'id', None, None), None, (3, 11, 'text', 'UTF8', None), (4, 11, 'createdAt', 'UTF8', None), (5, 12, 'user', (<class 'concrete.twitter.ttypes.TwitterUser'>, (None, (1, 10, 'id', None, None), None, (3, 11, 'name', 'UTF8', None), (4, 11, 'screenName', 'UTF8', None), (5, 11, 'lang', 'UTF8', None), (6, 2, 'geoEnabled', None, None), (7, 11, 'createdAt', 'UTF8', None), (8, 8, 'friendsCount', None, None), (9, 8, 'statusesCount', None, None), (10, 2, 'verified', None, None), (11, 8, 'listedCount', None, None), (12, 8, 'favouritesCount', None, None), (13, 8, 'followersCount', None, None), (14, 11, 'location', 'UTF8', None), (15, 11, 'timeZone', 'UTF8', None), (16, 11, 'description', 'UTF8', None), None, (18, 8, 'utcOffset', None, None), (19, 11, 'url', 'UTF8', None))), None), (6, 2, 'truncated', None, None), (7, 12, 'entities', (<class 'concrete.twitter.ttypes.TwitterEntities'>, (None, (1, 15, 'hashtagList', (12, (<class 'concrete.twitter.ttypes.HashTag'>, (None, (1, 11, 'text', 'UTF8', None), (2, 8, 'startOffset', None, None), (3, 8, 'endOffset', None, None))), False), None), (2, 15, 'urlList', (12, (<class 'concrete.twitter.ttypes.URL'>, (None, (1, 8, 'startOffset', None, None), (2, 8, 'endOffset', None, None), (3, 11, 'expandedUrl', 'UTF8', None), (4, 11, 'url', 'UTF8', None), (5, 11, 'displayUrl', 'UTF8', None))), False), None), (3, 15, 'userMentionList', (12, (<class 'concrete.twitter.ttypes.UserMention'>, (None, (1, 8, 'startOffset', None, None), (2, 8, 'endOffset', None, None), None, (4, 11, 'screenName', 'UTF8', None), (5, 11, 'name', 'UTF8', None), (6, 10, 'id', None, None))), False), None))), None), (8, 11, 'source', 'UTF8', None), (9, 12, 'coordinates', (<class 'concrete.twitter.ttypes.TwitterCoordinates'>, (None, (1, 11, 'type', 'UTF8', None), (2, 12, 'coordinates', (<class 'concrete.twitter.ttypes.TwitterLatLong'>, (None, (1, 4, 'latitude', None, None), (2, 4, 'longitude', None, None))), None))), None), None, (11, 12, 'place', (<class 'concrete.twitter.ttypes.TwitterPlace'>, (None, (1, 11, 'placeType', 'UTF8', None), (2, 11, 'countryCode', 'UTF8', None), (3, 11, 'country', 'UTF8', None), (4, 11, 'fullName', 'UTF8', None), (5, 11, 'name', 'UTF8', None), (6, 11, 'id', 'UTF8', None), (7, 11, 'url', 'UTF8', None), (8, 12, 'boundingBox', (<class 'concrete.twitter.ttypes.BoundingBox'>, (None, (1, 11, 'type', 'UTF8', None), (2, 15, 'coordinateList', (12, (<class 'concrete.twitter.ttypes.TwitterLatLong'>, (None, (1, 4, 'latitude', None, None), (2, 4, 'longitude', None, None))), False), None))), None), (9, 12, 'attributes', (<class 'concrete.twitter.ttypes.PlaceAttributes'>, (None, (1, 11, 'streetAddress', 'UTF8', None), (2, 11, 'region', 'UTF8', None), (3, 11, 'locality', 'UTF8', None))), None))), None), (12, 2, 'favorited', None, None), (13, 2, 'retweeted', None, None), (14, 8, 'retweetCount', None, None), (15, 11, 'inReplyToScreenName', 'UTF8', None), (16, 10, 'inReplyToStatusId', None, None), None, (18, 10, 'inReplyToUserId', None, None), (19, 11, 'retweetedScreenName', 'UTF8', None), (20, 10, 'retweetedStatusId', None, None), (21, 10, 'retweetedUserId', None, None))), None), (2, 12, 'emailInfo', (<class 'concrete.email.ttypes.EmailCommunicationInfo'>, (None, (1, 11, 'messageId', 'UTF8', None), (2, 11, 'contentType', 'UTF8', None), (3, 11, 'userAgent', 'UTF8', None), (4, 15, 'inReplyToList', (11, 'UTF8', False), None), (5, 15, 'referenceList', (11, 'UTF8', False), None), (6, 12, 'senderAddress', (<class 'concrete.email.ttypes.EmailAddress'>, (None, (1, 11, 'address', 'UTF8', None), (2, 11, 'displayName', 'UTF8', None))), None), (7, 12, 'returnPathAddress', (<class 'concrete.email.ttypes.EmailAddress'>, (None, (1, 11, 'address', 'UTF8', None), (2, 11, 'displayName', 'UTF8', None))), None), (8, 15, 'toAddressList', (12, (<class 'concrete.email.ttypes.EmailAddress'>, (None, (1, 11, 'address', 'UTF8', None), (2, 11, 'displayName', 'UTF8', None))), False), None), (9, 15, 'ccAddressList', (12, (<class 'concrete.email.ttypes.EmailAddress'>, (None, (1, 11, 'address', 'UTF8', None), (2, 11, 'displayName', 'UTF8', None))), False), None), (10, 15, 'bccAddressList', (12, (<class 'concrete.email.ttypes.EmailAddress'>, (None, (1, 11, 'address', 'UTF8', None), (2, 11, 'displayName', 'UTF8', None))), False), None), (11, 11, 'emailFolder', 'UTF8', None), (12, 11, 'subject', 'UTF8', None), (13, 15, 'quotedAddresses', (11, 'UTF8', False), None), (14, 15, 'attachmentPaths', (11, 'UTF8', False), None), (15, 11, 'salutation', 'UTF8', None), (16, 11, 'signature', 'UTF8', None))), None), (3, 12, 'nitfInfo', (<class 'concrete.nitf.ttypes.NITFInfo'>, (None, (1, 11, 'alternateURL', 'UTF8', None), (2, 11, 'articleAbstract', 'UTF8', None), (3, 11, 'authorBiography', 'UTF8', None), (4, 11, 'banner', 'UTF8', None), (5, 15, 'biographicalCategoryList', (11, 'UTF8', False), None), (6, 11, 'columnName', 'UTF8', None), (7, 8, 'columnNumber', None, None), (8, 10, 'correctionDate', None, None), (9, 11, 'correctionText', 'UTF8', None), (10, 11, 'credit', 'UTF8', None), (11, 11, 'dayOfWeek', 'UTF8', None), (12, 15, 'descriptorList', (11, 'UTF8', False), None), (13, 11, 'featurePage', 'UTF8', None), (14, 15, 'generalOnlineDescriptorList', (11, 'UTF8', False), None), (15, 8, 'guid', None, None), (16, 11, 'kicker', 'UTF8', None), (17, 15, 'leadParagraphList', (11, 'UTF8', False), None), (18, 15, 'locationList', (11, 'UTF8', False), None), (19, 15, 'nameList', (11, 'UTF8', False), None), (20, 11, 'newsDesk', 'UTF8', None), (21, 11, 'normalizedByline', 'UTF8', None), (22, 15, 'onlineDescriptorList', (11, 'UTF8', False), None), (23, 11, 'onlineHeadline', 'UTF8', None), (24, 11, 'onlineLeadParagraph', 'UTF8', None), (25, 15, 'onlineLocationList', (11, 'UTF8', False), None), (26, 15, 'onlineOrganizationList', (11, 'UTF8', False), None), (27, 15, 'onlinePeople', (11, 'UTF8', False), None), (28, 15, 'onlineSectionList', (11, 'UTF8', False), None), (29, 15, 'onlineTitleList', (11, 'UTF8', False), None), (30, 15, 'organizationList', (11, 'UTF8', False), None), (31, 8, 'page', None, None), (32, 15, 'peopleList', (11, 'UTF8', False), None), (33, 10, 'publicationDate', None, None), (34, 8, 'publicationDayOfMonth', None, None), (35, 8, 'publicationMonth', None, None), (36, 8, 'publicationYear', None, None), (37, 11, 'section', 'UTF8', None), (38, 11, 'seriesName', 'UTF8', None), (39, 11, 'slug', 'UTF8', None), (40, 15, 'taxonomicClassifierList', (11, 'UTF8', False), None), (41, 15, 'titleList', (11, 'UTF8', False), None), (42, 15, 'typesOfMaterialList', (11, 'UTF8', False), None), (43, 11, 'url', 'UTF8', None), (44, 8, 'wordCount', None, None))), None))
validate()
write(oprot)
class concrete.metadata.ttypes.Digest(bytesValue=None, int64Value=None, doubleValue=None, stringValue=None, int64List=None, doubleList=None, stringList=None)

Bases: object

Analytic-specific information about an attribute or edge. Digests are used to combine information from multiple sources to generate a unified value. The digests generated by an analytic will only ever be used by that same analytic, so analytics can feel free to encode information in whatever way is convenient.

- bytesValue

The following fields define various ways you can store the

digest data (for convenience). If none of these meets your needs, then serialize the digest to a byte sequence and store it in bytesValue.

  • int64Value
  • doubleValue
  • stringValue
  • int64List
  • doubleList
  • stringList
read(iprot)
thrift_spec = (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))
validate()
write(oprot)
class concrete.metadata.ttypes.TheoryDependencies(sectionTheoryList=None, sentenceTheoryList=None, tokenizationTheoryList=None, posTagTheoryList=None, nerTagTheoryList=None, lemmaTheoryList=None, langIdTheoryList=None, parseTheoryList=None, dependencyParseTheoryList=None, tokenAnnotationTheoryList=None, entityMentionSetTheoryList=None, entitySetTheoryList=None, situationMentionSetTheoryList=None, situationSetTheoryList=None, communicationsList=None)

Bases: object

A struct that holds UUIDs for all theories that a particular annotation was based upon (and presumably requires).

Producers of TheoryDependencies should list all stages that they used in constructing their particular annotation. They do not, however, need to explicitly label each stage; they can label only the immediate stage before them.

Examples:

If you are producing a Tokenization, and only used the SentenceSegmentation in order to produce that Tokenization, list only the single SentenceSegmentation UUID in sentenceTheoryList.

In this example, even though the SentenceSegmentation will have a dependency on some SectionSegmentation, it is not necessary for the Tokenization to list the SectionSegmentation UUID as a dependency.

If you are a producer of EntityMentions, and you use two POSTokenTagging and one NERTokenTagging objects, add the UUIDs for the POSTokenTagging objects to posTagTheoryList, and the UUID of the NER TokenTagging to the nerTagTheoryList.

In this example, because multiple annotations influenced the new annotation, they should all be listed as dependencies.

- sectionTheoryList
- sentenceTheoryList
- tokenizationTheoryList
- posTagTheoryList
- nerTagTheoryList
- lemmaTheoryList
- langIdTheoryList
- parseTheoryList
- dependencyParseTheoryList
- tokenAnnotationTheoryList
- entityMentionSetTheoryList
- entitySetTheoryList
- situationMentionSetTheoryList
- situationSetTheoryList
- communicationsList
read(iprot)
thrift_spec = (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))
validate()
write(oprot)