concrete.language.ttypes module

class concrete.language.ttypes.LanguageIdentification(uuid=None, metadata=None, languageToProbabilityMap=None)

Bases: object

A theory about what languages are present in a given communication or piece of communication. Note that it is possible to have more than one language present in a given communication.

- uuid

Unique identifier for this language identification.

- metadata

Information about where this language identification came from.

- languageToProbabilityMap

A list mapping from a language to the probability that that

language occurs in a given communication. Each language code should occur at most once in this list. The probabilities do <i>not</i> need to sum to one – for example, if a single communication is known to contain both English and French, then it would be appropriate to assign a probability of 1 to both langauges. (Manually annotated LanguageProb objects should always have probabilities of either zero or one; machine-generated LanguageProbs may have intermediate probabilities.)

Note: The string key should represent the ISO 639-3 three-letter code.

read(iprot)
thrift_spec = (None, (1, 12, 'uuid', (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), None), (2, 12, 'metadata', (<class 'concrete.metadata.ttypes.AnnotationMetadata'>, (None, (1, 11, 'tool', 'UTF8', None), (2, 10, 'timestamp', None, None), None, (4, 12, 'digest', (<class 'concrete.metadata.ttypes.Digest'>, (None, (1, 11, 'bytesValue', 'BINARY', None), (2, 10, 'int64Value', None, None), (3, 4, 'doubleValue', None, None), (4, 11, 'stringValue', 'UTF8', None), (5, 15, 'int64List', (10, None, False), None), (6, 15, 'doubleList', (4, None, False), None), (7, 15, 'stringList', (11, 'UTF8', False), None))), None), (5, 12, 'dependencies', (<class 'concrete.metadata.ttypes.TheoryDependencies'>, (None, (1, 15, 'sectionTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (2, 15, 'sentenceTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (3, 15, 'tokenizationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (4, 15, 'posTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (5, 15, 'nerTagTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (6, 15, 'lemmaTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (7, 15, 'langIdTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (8, 15, 'parseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (9, 15, 'dependencyParseTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (10, 15, 'tokenAnnotationTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (11, 15, 'entityMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (12, 15, 'entitySetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (13, 15, 'situationMentionSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (14, 15, 'situationSetTheoryList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None), (15, 15, 'communicationsList', (12, (<class 'concrete.uuid.ttypes.UUID'>, (None, (1, 11, 'uuidString', 'UTF8', None))), False), None))), None), (6, 8, 'kBest', None, 1))), None), (3, 13, 'languageToProbabilityMap', (11, 'UTF8', 4, None, False), None))
validate()
write(oprot)