concrete.structure.ttypes module

class concrete.structure.ttypes.Arc(src=None, dst=None, token=None, weight=None)

Bases: object

Type for arcs. For epsilon edges, leave ‘token’ blank.

- src
- dst
- token
- weight
read(iprot)
thrift_spec = (None, (1, 8, 'src', None, None), (2, 8, 'dst', None, None), (3, 12, 'token', (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (4, 4, 'weight', None, None))
validate()
write(oprot)
class concrete.structure.ttypes.Constituent(id=None, tag=None, childList=None, headChildIndex=-1, start=None, ending=None)

Bases: object

A single parse constituent (or “phrase”).

- id

A parse-relative identifier for this consistuent. Together

with the UUID for a Parse, this can be used to define pointers to specific constituents.

  • tag: A description of this constituency node, e.g. the category “NP”.

For leaf nodes, this should be a word and for pre-terminal nodes this should be a POS tag.

  • childList
  • headChildIndex: The index of the head child of this constituent. I.e., the

head child of constituent <tt>c</tt> is <tt>c.children[c.head_child_index]</tt>. A value of -1 indicates that no child head was identified.

  • start: The first token (inclusive) of this constituent in the
parent Tokenization. Almost certainly should be populated.
  • ending: The last token (exclusive) of this constituent in the

parent Tokenization. Almost certainly should be populated.

read(iprot)
thrift_spec = (None, (1, 8, 'id', None, None), (2, 11, 'tag', 'UTF8', None), (3, 15, 'childList', (8, None, False), None), (4, 8, 'headChildIndex', None, -1), (5, 8, 'start', None, None), (6, 8, 'ending', None, None))
validate()
write(oprot)
class concrete.structure.ttypes.ConstituentRef(parseId=None, constituentIndex=None)

Bases: object

A reference to a Constituent within a Parse.

- parseId

The UUID of the Parse that this Constituent belongs to.

- constituentIndex

The index in the constituent list of this Constituent.

read(iprot)
thrift_spec = (None, (1, 12, 'parseId', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 8, 'constituentIndex', None, None))
validate()
write(oprot)
class concrete.structure.ttypes.Dependency(gov=-1, dep=None, edgeType=None)

Bases: object

A syntactic edge between two tokens in a tokenized sentence.

- gov

The governor or the head token. 0 indexed.

- dep

The dependent token. 0 indexed.

- edgeType

The relation that holds between gov and dep.

read(iprot)
thrift_spec = (None, (1, 8, 'gov', None, -1), (2, 8, 'dep', None, None), (3, 11, 'edgeType', 'UTF8', None))
validate()
write(oprot)
class concrete.structure.ttypes.DependencyParse(uuid=None, metadata=None, dependencyList=None, structureInformation=None)

Bases: object

Represents a dependency parse with typed edges.

- uuid
- metadata
- dependencyList
- structureInformation
read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'dependencyList', (12, (<class 'concrete.structure.ttypes.Dependency'>, (None, (1, 8, 'gov', None, -1), (2, 8, 'dep', None, None), (3, 11, 'edgeType', 'UTF8', None))), False), None), (4, 12, 'structureInformation', (<class 'concrete.structure.ttypes.DependencyParseStructure'>, (None, (1, 2, 'isAcyclic', None, None), (2, 2, 'isConnected', None, None), (3, 2, 'isSingleHeaded', None, None), (4, 2, 'isProjective', None, None))), None))
validate()
write(oprot)
class concrete.structure.ttypes.DependencyParseStructure(isAcyclic=None, isConnected=None, isSingleHeaded=None, isProjective=None)

Bases: object

Information about the structure of a dependency parse. This information is computable from the list of dependencies, but this allows the consumer to make (verified) assumptions about the dependencies being processed.

- isAcyclic

True iff there are no cycles in the dependency graph.

- isConnected

True iff the dependency graph forms a single connected component.

- isSingleHeaded

True iff every node in the dependency parse has at most

one head/parent/governor.
  • isProjective: True iff there are no crossing edges in the dependency parse.
read(iprot)
thrift_spec = (None, (1, 2, 'isAcyclic', None, None), (2, 2, 'isConnected', None, None), (3, 2, 'isSingleHeaded', None, None), (4, 2, 'isProjective', None, None))
validate()
write(oprot)
class concrete.structure.ttypes.LatticePath(weight=None, tokenList=None)

Bases: object

- weight
- tokenList
read(iprot)
thrift_spec = (None, (1, 4, 'weight', None, None), (2, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))
validate()
write(oprot)
class concrete.structure.ttypes.Parse(uuid=None, metadata=None, constituentList=None)

Bases: object

A theory about the syntactic parse of a sentence.
ote If we add support for parse forests in the future, then it

will most likely be done by adding a new field (e.g. “<tt>forest_root</tt>”) that uses a new struct type to encode the forest. A “<tt>kind</tt>” field might also be added (analogous to <tt>Tokenization.kind</tt>) to indicate whether a parse is encoded using a simple tree or a parse forest.

Attributes:
  • uuid
  • metadata
  • constituentList
read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'constituentList', (12, (<class 'concrete.structure.ttypes.Constituent'>, (None, (1, 8, 'id', None, None), (2, 11, 'tag', 'UTF8', None), (3, 15, 'childList', (8, None, False), None), (4, 8, 'headChildIndex', None, -1), (5, 8, 'start', None, None), (6, 8, 'ending', None, None))), False), None))
validate()
write(oprot)
class concrete.structure.ttypes.Section(uuid=None, sentenceList=None, textSpan=None, rawTextSpan=None, audioSpan=None, kind=None, label=None, numberList=None, lidList=None)

Bases: object

A single “section” of a communication, such as a paragraph. Each section is defined using a text or audio span, and can optionally contain a list of sentences.

- uuid

The unique identifier for this section.

- sentenceList

The sentences of this “section.”

- textSpan

Location of this section in the communication text.

NOTE: This text span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

  • rawTextSpan: Location of this section in the raw text.

NOTE: This text span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

  • audioSpan: Location of this section in the original audio.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

  • kind: A short, sometimes corpus-specific term characterizing the nature

of the section; may change in a future version of concrete. This often acts as a coarse-grained descriptor that is used for filtering. For example, Gigaword uses the section kind “passage” to distinguish content-bearing paragraphs in the body of an article from other paragraphs, such as the headline and dateline.

  • label: The name of the section. For example, a title of a section on
Wikipedia.
  • numberList: Position within the communication with respect to other Sections:

The section number, E.g., 3, or 3.1, or 3.1.2, etc. Aimed at Communications with content organized in a hierarchy, such as a Book with multiple chapters, then sections, then paragraphs. Or even a dense Wikipedia page with subsections. Sections should still be arranged linearly, where reading these numbers should not be required to get a start-to-finish enumeration of the Communication’s content.

  • lidList: An optional field to be used for multi-language documents.

This field should be populated when a section is inside of a document that contains multiple languages.

Minimally, each block of text in one language should be it’s own section. For example, if a paragraph is in English and the paragraph afterwards is in French, these should be separated into two different sections, allowing language-specific analytics to run on appropriate sections.

read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 15, 'sentenceList', (12, (<class 'concrete.structure.ttypes.Sentence'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'tokenization', (<class 'concrete.structure.ttypes.Tokenization'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 12, 'tokenList', (<class 'concrete.structure.ttypes.TokenList'>, (None, (1, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None), (4, 12, 'lattice', (<class 'concrete.structure.ttypes.TokenLattice'>, (None, (1, 8, 'startState', None, 0), (2, 8, 'endState', None, 0), (3, 15, 'arcList', (12, (<class 'concrete.structure.ttypes.Arc'>, (None, (1, 8, 'src', None, None), (2, 8, 'dst', None, None), (3, 12, 'token', (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (4, 4, 'weight', None, None))), False), None), (4, 12, 'cachedBestPath', (<class 'concrete.structure.ttypes.LatticePath'>, (None, (1, 4, 'weight', None, None), (2, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None))), None), (5, 8, 'kind', None, None), (6, 15, 'tokenTaggingList', (12, (<class 'concrete.structure.ttypes.TokenTagging'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'taggedTokenList', (12, (<class 'concrete.structure.ttypes.TaggedToken'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'tag', 'UTF8', None), (3, 4, 'confidence', None, None), (4, 15, 'tagList', (11, 'UTF8', False), None), (5, 15, 'confidenceList', (4, None, False), None))), False), None), (4, 11, 'taggingType', 'UTF8', None))), False), None), (7, 15, 'parseList', (12, (<class 'concrete.structure.ttypes.Parse'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'constituentList', (12, (<class 'concrete.structure.ttypes.Constituent'>, (None, (1, 8, 'id', None, None), (2, 11, 'tag', 'UTF8', None), (3, 15, 'childList', (8, None, False), None), (4, 8, 'headChildIndex', None, -1), (5, 8, 'start', None, None), (6, 8, 'ending', None, None))), False), None))), False), None), (8, 15, 'dependencyParseList', (12, (<class 'concrete.structure.ttypes.DependencyParse'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'dependencyList', (12, (<class 'concrete.structure.ttypes.Dependency'>, (None, (1, 8, 'gov', None, -1), (2, 8, 'dep', None, None), (3, 11, 'edgeType', 'UTF8', None))), False), None), (4, 12, 'structureInformation', (<class 'concrete.structure.ttypes.DependencyParseStructure'>, (None, (1, 2, 'isAcyclic', None, None), (2, 2, 'isConnected', None, None), (3, 2, 'isSingleHeaded', None, None), (4, 2, 'isProjective', None, None))), None))), False), None), (9, 15, 'spanLinkList', (12, (<class 'concrete.structure.ttypes.SpanLink'>, (None, (1, 12, 'tokens', (<class 'concrete.structure.ttypes.TokenRefSequence'>, (None, (1, 15, 'tokenIndexList', (8, None, False), None), (2, 8, 'anchorTokenIndex', None, -1), (3, 12, 'tokenizationId', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (4, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (6, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (2, 12, 'concreteTarget', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (3, 11, 'externalTarget', 'UTF8', None), (4, 11, 'linkType', 'UTF8', None))), False), None))), None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 11, 'kind', 'UTF8', None), (6, 11, 'label', 'UTF8', None), (7, 15, 'numberList', (8, None, False), None), (8, 15, 'lidList', (12, (<class 'concrete.language.ttypes.LanguageIdentification'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 13, 'languageToProbabilityMap', (11, 'UTF8', 4, None, False), None))), False), None), (9, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))
validate()
write(oprot)
class concrete.structure.ttypes.Sentence(uuid=None, tokenization=None, textSpan=None, rawTextSpan=None, audioSpan=None)

Bases: object

A single sentence or utterance in a communication.

- uuid
- tokenization

Theory about the tokens that make up this sentence. For text

communications, these tokenizations will typically be generated by a tokenizer. For audio communications, these tokenizations will typically be generated by an automatic speech recognizer.

The “Tokenization” message type is also used to store the output of machine translation systems and text normalization systems.

  • textSpan: Location of this sentence in the communication text.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

  • rawTextSpan: Location of this sentence in the raw text.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

  • audioSpan: Location of this sentence in the original audio.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'tokenization', (<class 'concrete.structure.ttypes.Tokenization'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 12, 'tokenList', (<class 'concrete.structure.ttypes.TokenList'>, (None, (1, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None), (4, 12, 'lattice', (<class 'concrete.structure.ttypes.TokenLattice'>, (None, (1, 8, 'startState', None, 0), (2, 8, 'endState', None, 0), (3, 15, 'arcList', (12, (<class 'concrete.structure.ttypes.Arc'>, (None, (1, 8, 'src', None, None), (2, 8, 'dst', None, None), (3, 12, 'token', (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (4, 4, 'weight', None, None))), False), None), (4, 12, 'cachedBestPath', (<class 'concrete.structure.ttypes.LatticePath'>, (None, (1, 4, 'weight', None, None), (2, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None))), None), (5, 8, 'kind', None, None), (6, 15, 'tokenTaggingList', (12, (<class 'concrete.structure.ttypes.TokenTagging'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'taggedTokenList', (12, (<class 'concrete.structure.ttypes.TaggedToken'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'tag', 'UTF8', None), (3, 4, 'confidence', None, None), (4, 15, 'tagList', (11, 'UTF8', False), None), (5, 15, 'confidenceList', (4, None, False), None))), False), None), (4, 11, 'taggingType', 'UTF8', None))), False), None), (7, 15, 'parseList', (12, (<class 'concrete.structure.ttypes.Parse'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'constituentList', (12, (<class 'concrete.structure.ttypes.Constituent'>, (None, (1, 8, 'id', None, None), (2, 11, 'tag', 'UTF8', None), (3, 15, 'childList', (8, None, False), None), (4, 8, 'headChildIndex', None, -1), (5, 8, 'start', None, None), (6, 8, 'ending', None, None))), False), None))), False), None), (8, 15, 'dependencyParseList', (12, (<class 'concrete.structure.ttypes.DependencyParse'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'dependencyList', (12, (<class 'concrete.structure.ttypes.Dependency'>, (None, (1, 8, 'gov', None, -1), (2, 8, 'dep', None, None), (3, 11, 'edgeType', 'UTF8', None))), False), None), (4, 12, 'structureInformation', (<class 'concrete.structure.ttypes.DependencyParseStructure'>, (None, (1, 2, 'isAcyclic', None, None), (2, 2, 'isConnected', None, None), (3, 2, 'isSingleHeaded', None, None), (4, 2, 'isProjective', None, None))), None))), False), None), (9, 15, 'spanLinkList', (12, (<class 'concrete.structure.ttypes.SpanLink'>, (None, (1, 12, 'tokens', (<class 'concrete.structure.ttypes.TokenRefSequence'>, (None, (1, 15, 'tokenIndexList', (8, None, False), None), (2, 8, 'anchorTokenIndex', None, -1), (3, 12, 'tokenizationId', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (4, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (6, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (2, 12, 'concreteTarget', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (3, 11, 'externalTarget', 'UTF8', None), (4, 11, 'linkType', 'UTF8', None))), False), None))), None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))
validate()
write(oprot)

Bases: object

A collection of tokens that represent a link to another resource. This resource might be another Concrete object (e.g., another Concrete Communication), represented with the ‘concreteTarget’ field, or it could link to a resource outside of Concrete via the ‘externalTarget’ field.

- tokens

The tokens that make up this SpanLink object.

- concreteTarget
- externalTarget
- linkType
read(iprot)
thrift_spec = (None, (1, 12, 'tokens', (<class 'concrete.structure.ttypes.TokenRefSequence'>, (None, (1, 15, 'tokenIndexList', (8, None, False), None), (2, 8, 'anchorTokenIndex', None, -1), (3, 12, 'tokenizationId', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (4, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (6, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (2, 12, 'concreteTarget', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (3, 11, 'externalTarget', 'UTF8', None), (4, 11, 'linkType', 'UTF8', None))
validate()
write(oprot)
class concrete.structure.ttypes.TaggedToken(tokenIndex=None, tag=None, confidence=None, tagList=None, confidenceList=None)

Bases: object

- tokenIndex

A pointer to the token being tagged.

Token indices are 0-based. These indices are also 0-based.
  • tag: A string containing the annotation.

If the tag set you are using is not case sensitive, then all part of speech tags should be normalized to upper case.

  • confidence: Confidence of the annotation.
  • tagList: A list of strings that represent a distribution of possible

tags for this token.

If populated, the ‘tag’ field should also be populated with the “best” value from this list.

  • confidenceList: A list of doubles that represent confidences associated with

the tags in the ‘tagList’ field.

If populated, the ‘confidence’ field should also be populated with the confidence associated with the “best” tag in ‘tagList’.

read(iprot)
thrift_spec = (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'tag', 'UTF8', None), (3, 4, 'confidence', None, None), (4, 15, 'tagList', (11, 'UTF8', False), None), (5, 15, 'confidenceList', (4, None, False), None))
validate()
write(oprot)
class concrete.structure.ttypes.Token(tokenIndex=None, text=None, textSpan=None, rawTextSpan=None, audioSpan=None)

Bases: object

A single token (typically a word) in a communication. The exact definition of what counts as a token is left up to the tools that generate token sequences.

Usually, each token will include at least a text string.

- tokenIndex

A 0-based tokenization-relative identifier for this token that

represents the order that this token appears in the sentence. Together with the UUID for a Tokenization, this can be used to define pointers to specific tokens. If a Tokenization object contains multiple Token objects with the same id (e.g., in different n-best lists), then all of their other fields must be identical as well.

  • text: The text associated with this token.

Note - we may have a destructive tokenizer (e.g., Stanford rewriting) and as a result, we want to maintain this field.

  • textSpan: Location of this token in this perspective’s text (.text field).

In cases where this token does not correspond directly with any text span in the text (such as word insertion during MT), this field may be given a value indicating “approximately” where the token comes from. A span covering the entire sentence may be used if no more precise value seems appropriate.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the document, but is the annotation’s best effort at such a representation.

  • rawTextSpan: Location of this token in the original, raw text (.originalText

field). In cases where this token does not correspond directly with any text span in the original text (such as word insertion during MT), this field may be given a value indicating “approximately” where the token comes from. A span covering the entire sentence may be used if no more precise value seems appropriate.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original raw document, but is the annotation’s best effort at such a representation.

  • audioSpan: Location of this token in the original audio.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

read(iprot)
thrift_spec = (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))
validate()
write(oprot)
class concrete.structure.ttypes.TokenLattice(startState=0, endState=0, arcList=None, cachedBestPath=None)

Bases: object

A lattice structure that assigns scores to a set of token sequences. The lattice is encoded as an FSA, where states are identified by integers, and each arc is annotated with an optional tokens and a weight. (Arcs with no tokens are “epsilon” arcs.) The lattice has a single start state and a single end state. (You can use epsilon edges to simulate multiple start states or multiple end states, if desired.)

The score of a path through the lattice is the sum of the weights of the arcs that make up that path. A path with a lower score is considered “better” than a path with a higher score.

If possible, path scores should be negative log likelihoods (with base e – e.g. if P=1, then weight=0; and if P=0.5, then weight=0.693). Furthermore, if possible, the path scores should be globally normalized (i.e., they should encode probabilities). This will allow for them to be combined with other information in a reasonable way when determining confidences for system outputs.

TokenLattices should never contain any paths with cycles. Every arc in the lattice should be included in some path from the start state to the end state.

- startState
- endState
- arcList
- cachedBestPath
read(iprot)
thrift_spec = (None, (1, 8, 'startState', None, 0), (2, 8, 'endState', None, 0), (3, 15, 'arcList', (12, (<class 'concrete.structure.ttypes.Arc'>, (None, (1, 8, 'src', None, None), (2, 8, 'dst', None, None), (3, 12, 'token', (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (4, 4, 'weight', None, None))), False), None), (4, 12, 'cachedBestPath', (<class 'concrete.structure.ttypes.LatticePath'>, (None, (1, 4, 'weight', None, None), (2, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None))
validate()
write(oprot)
class concrete.structure.ttypes.TokenList(tokenList=None)

Bases: object

A wrapper around a list of tokens.

- tokenList
read(iprot)
thrift_spec = (None, (1, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))
validate()
write(oprot)
class concrete.structure.ttypes.TokenRefSequence(tokenIndexList=None, anchorTokenIndex=-1, tokenizationId=None, textSpan=None, rawTextSpan=None, audioSpan=None)

Bases: object

A list of pointers to tokens that all belong to the same tokenization.

- tokenIndexList

The tokenization-relative identifiers for each token that is

included in this sequence.
  • anchorTokenIndex: An optional field that can be used to describe

the root of a sentence (if this sequence is a full sentence), the head of a constituent (if this sequence is a constituent), or some other form of “canonical” token in this sequence if, for instance, it is not easy to map this sequence to a another annotation that has a head.

This field is defined with respect to the Tokenization given by tokenizationId, and not to this object’s tokenIndexList.

  • tokenizationId: The UUID of the tokenization that contains the tokens.
  • textSpan: The text span in the main text (.text field) associated with this

TokenRefSequence.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

  • rawTextSpan: The text span in the original text (.originalText field)

associated with this TokenRefSequence.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original raw document, but is the annotation’s best effort at such a representation.

  • audioSpan: The audio span associated with this TokenRefSequence.

NOTE: This span represents a best guess, or ‘provenance’: it cannot be guaranteed that this text span matches the _exact_ text of the original document, but is the annotation’s best effort at such a representation.

read(iprot)
thrift_spec = (None, (1, 15, 'tokenIndexList', (8, None, False), None), (2, 8, 'anchorTokenIndex', None, -1), (3, 12, 'tokenizationId', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (4, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (6, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))
validate()
write(oprot)
class concrete.structure.ttypes.TokenTagging(uuid=None, metadata=None, taggedTokenList=None, taggingType=None)

Bases: object

A theory about some token-level annotation. The TokenTagging consists of a mapping from tokens (using token ids) to string tags (e.g. part-of-speech tags or lemmas).

The mapping defined by a TokenTagging may be partial – i.e., some tokens may not be assigned any part of speech tags.

For lattice tokenizations, you may need to create multiple part-of-speech taggings (for different paths through the lattice), since the appropriate tag for a given token may depend on the path taken. For example, you might define a separate TokenTagging for each of the top K paths, which leaves all tokens that are not part of the path unlabeled.

Currently, we use strings to encode annotations. In the future, we may add fields for encoding specific tag sets (eg treebank tags), or for adding compound tags.

- uuid

The UUID of this TokenTagging object.

- metadata

Information about where the annotation came from.

This should be used to tell between gold-standard annotations and automatically-generated theories about the data

  • taggedTokenList: The mapping from tokens to annotations.
This may be a partial mapping.
  • taggingType: An ontology-backed string that represents the

type of token taggings this TokenTagging object produces.

read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'taggedTokenList', (12, (<class 'concrete.structure.ttypes.TaggedToken'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'tag', 'UTF8', None), (3, 4, 'confidence', None, None), (4, 15, 'tagList', (11, 'UTF8', False), None), (5, 15, 'confidenceList', (4, None, False), None))), False), None), (4, 11, 'taggingType', 'UTF8', None))
validate()
write(oprot)
class concrete.structure.ttypes.Tokenization(uuid=None, metadata=None, tokenList=None, lattice=None, kind=None, tokenTaggingList=None, parseList=None, dependencyParseList=None, spanLinkList=None)

Bases: object

A theory (or set of alternative theories) about the sequence of tokens that make up a sentence.

This message type is used to record the output of not just for tokenizers, but also for a wide variety of other tools, including machine translation systems, text normalizers, part-of-speech taggers, and stemmers.

Each Tokenization is encoded using either a TokenList or a TokenLattice. (If you want to encode an n-best list, then you should store it as n separate Tokenization objects.) The “kind” field is used to indicate whether this Tokenization contains a list of tokens or a TokenLattice.

The confidence value for each sequence is determined by combining the confidence from the “metadata” field with confidence information from individual token sequences as follows:

<ul> <li> For n-best lists: metadata.confidence </li> <li> For lattices: metadata.confidence * exp(-sum(arc.weight)) </li> </ul>

Note: in some cases (such as the output of a machine translation tool), the order of the tokens in a token sequence may not correspond with the order of their original text span offsets.

- uuid
- metadata

Information about where this tokenization came from.

- tokenList

A wrapper around an ordered list of the tokens in this tokenization.

This may also give easy access to the “reconstructed text” associated with this tokenization. This field should only have a value if kind==TOKEN_LIST.

  • lattice: A lattice that compactly describes a set of token sequences that

might make up this tokenization. This field should only have a value if kind==LATTICE.

  • kind: Enumerated value indicating whether this tokenization is
implemented using an n-best list or a lattice.
  • tokenTaggingList
  • parseList
  • dependencyParseList
  • spanLinkList
read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 12, 'tokenList', (<class 'concrete.structure.ttypes.TokenList'>, (None, (1, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None), (4, 12, 'lattice', (<class 'concrete.structure.ttypes.TokenLattice'>, (None, (1, 8, 'startState', None, 0), (2, 8, 'endState', None, 0), (3, 15, 'arcList', (12, (<class 'concrete.structure.ttypes.Arc'>, (None, (1, 8, 'src', None, None), (2, 8, 'dst', None, None), (3, 12, 'token', (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (4, 4, 'weight', None, None))), False), None), (4, 12, 'cachedBestPath', (<class 'concrete.structure.ttypes.LatticePath'>, (None, (1, 4, 'weight', None, None), (2, 15, 'tokenList', (12, (<class 'concrete.structure.ttypes.Token'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'text', 'UTF8', None), (3, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (4, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), False), None))), None))), None), (5, 8, 'kind', None, None), (6, 15, 'tokenTaggingList', (12, (<class 'concrete.structure.ttypes.TokenTagging'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'taggedTokenList', (12, (<class 'concrete.structure.ttypes.TaggedToken'>, (None, (1, 8, 'tokenIndex', None, None), (2, 11, 'tag', 'UTF8', None), (3, 4, 'confidence', None, None), (4, 15, 'tagList', (11, 'UTF8', False), None), (5, 15, 'confidenceList', (4, None, False), None))), False), None), (4, 11, 'taggingType', 'UTF8', None))), False), None), (7, 15, 'parseList', (12, (<class 'concrete.structure.ttypes.Parse'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'constituentList', (12, (<class 'concrete.structure.ttypes.Constituent'>, (None, (1, 8, 'id', None, None), (2, 11, 'tag', 'UTF8', None), (3, 15, 'childList', (8, None, False), None), (4, 8, 'headChildIndex', None, -1), (5, 8, 'start', None, None), (6, 8, 'ending', None, None))), False), None))), False), None), (8, 15, 'dependencyParseList', (12, (<class 'concrete.structure.ttypes.DependencyParse'>, (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 15, 'dependencyList', (12, (<class 'concrete.structure.ttypes.Dependency'>, (None, (1, 8, 'gov', None, -1), (2, 8, 'dep', None, None), (3, 11, 'edgeType', 'UTF8', None))), False), None), (4, 12, 'structureInformation', (<class 'concrete.structure.ttypes.DependencyParseStructure'>, (None, (1, 2, 'isAcyclic', None, None), (2, 2, 'isConnected', None, None), (3, 2, 'isSingleHeaded', None, None), (4, 2, 'isProjective', None, None))), None))), False), None), (9, 15, 'spanLinkList', (12, (<class 'concrete.structure.ttypes.SpanLink'>, (None, (1, 12, 'tokens', (<class 'concrete.structure.ttypes.TokenRefSequence'>, (None, (1, 15, 'tokenIndexList', (8, None, False), None), (2, 8, 'anchorTokenIndex', None, -1), (3, 12, 'tokenizationId', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (4, 12, 'textSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (5, 12, 'rawTextSpan', (<class 'concrete.spans.ttypes.TextSpan'>, (None, (1, 8, 'start', None, None), (2, 8, 'ending', None, None))), None), (6, 12, 'audioSpan', (<class 'concrete.spans.ttypes.AudioSpan'>, (None, (1, 10, 'start', None, None), (2, 10, 'ending', None, None))), None))), None), (2, 12, 'concreteTarget', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (3, 11, 'externalTarget', 'UTF8', None), (4, 11, 'linkType', 'UTF8', None))), False), None))
validate()
write(oprot)
class concrete.structure.ttypes.TokenizationKind

Bases: object

Enumerated types of Tokenizations

TOKEN_LATTICE = 2
TOKEN_LIST = 1