As of January 1, 2020 this library no longer supports Python 2 on the latest released version.
Library versions released prior to that date will continue to be available. For more information please
visit Python 2 support on Google Cloud.
Source code for google.cloud.automl_v1beta1.types.data_stats
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import proto # type: ignore
__protobuf__ = proto.module(
package="google.cloud.automl.v1beta1",
manifest={
"DataStats",
"Float64Stats",
"StringStats",
"TimestampStats",
"ArrayStats",
"StructStats",
"CategoryStats",
"CorrelationStats",
},
)
[docs]class DataStats(proto.Message):
r"""The data statistics of a series of values that share the same
DataType.
Attributes:
float64_stats (google.cloud.automl_v1beta1.types.Float64Stats):
The statistics for FLOAT64 DataType.
string_stats (google.cloud.automl_v1beta1.types.StringStats):
The statistics for STRING DataType.
timestamp_stats (google.cloud.automl_v1beta1.types.TimestampStats):
The statistics for TIMESTAMP DataType.
array_stats (google.cloud.automl_v1beta1.types.ArrayStats):
The statistics for ARRAY DataType.
struct_stats (google.cloud.automl_v1beta1.types.StructStats):
The statistics for STRUCT DataType.
category_stats (google.cloud.automl_v1beta1.types.CategoryStats):
The statistics for CATEGORY DataType.
distinct_value_count (int):
The number of distinct values.
null_value_count (int):
The number of values that are null.
valid_value_count (int):
The number of values that are valid.
"""
float64_stats = proto.Field(
proto.MESSAGE, number=3, oneof="stats", message="Float64Stats",
)
string_stats = proto.Field(
proto.MESSAGE, number=4, oneof="stats", message="StringStats",
)
timestamp_stats = proto.Field(
proto.MESSAGE, number=5, oneof="stats", message="TimestampStats",
)
array_stats = proto.Field(
proto.MESSAGE, number=6, oneof="stats", message="ArrayStats",
)
struct_stats = proto.Field(
proto.MESSAGE, number=7, oneof="stats", message="StructStats",
)
category_stats = proto.Field(
proto.MESSAGE, number=8, oneof="stats", message="CategoryStats",
)
distinct_value_count = proto.Field(proto.INT64, number=1,)
null_value_count = proto.Field(proto.INT64, number=2,)
valid_value_count = proto.Field(proto.INT64, number=9,)
[docs]class Float64Stats(proto.Message):
r"""The data statistics of a series of FLOAT64 values.
Attributes:
mean (float):
The mean of the series.
standard_deviation (float):
The standard deviation of the series.
quantiles (Sequence[float]):
Ordered from 0 to k k-quantile values of the data series of
n values. The value at index i is, approximately, the
i*n/k-th smallest value in the series; for i = 0 and i = k
these are, respectively, the min and max values.
histogram_buckets (Sequence[google.cloud.automl_v1beta1.types.Float64Stats.HistogramBucket]):
Histogram buckets of the data series. Sorted by the min
value of the bucket, ascendingly, and the number of the
buckets is dynamically generated. The buckets are
non-overlapping and completely cover whole FLOAT64 range
with min of first bucket being ``"-Infinity"``, and max of
the last one being ``"Infinity"``.
"""
[docs] class HistogramBucket(proto.Message):
r"""A bucket of a histogram.
Attributes:
min_ (float):
The minimum value of the bucket, inclusive.
max_ (float):
The maximum value of the bucket, exclusive unless max =
``"Infinity"``, in which case it's inclusive.
count (int):
The number of data values that are in the
bucket, i.e. are between min and max values.
"""
min_ = proto.Field(proto.DOUBLE, number=1,)
max_ = proto.Field(proto.DOUBLE, number=2,)
count = proto.Field(proto.INT64, number=3,)
mean = proto.Field(proto.DOUBLE, number=1,)
standard_deviation = proto.Field(proto.DOUBLE, number=2,)
quantiles = proto.RepeatedField(proto.DOUBLE, number=3,)
histogram_buckets = proto.RepeatedField(
proto.MESSAGE, number=4, message=HistogramBucket,
)
[docs]class StringStats(proto.Message):
r"""The data statistics of a series of STRING values.
Attributes:
top_unigram_stats (Sequence[google.cloud.automl_v1beta1.types.StringStats.UnigramStats]):
The statistics of the top 20 unigrams, ordered by
[count][google.cloud.automl.v1beta1.StringStats.UnigramStats.count].
"""
[docs] class UnigramStats(proto.Message):
r"""The statistics of a unigram.
Attributes:
value (str):
The unigram.
count (int):
The number of occurrences of this unigram in
the series.
"""
value = proto.Field(proto.STRING, number=1,)
count = proto.Field(proto.INT64, number=2,)
top_unigram_stats = proto.RepeatedField(
proto.MESSAGE, number=1, message=UnigramStats,
)
[docs]class TimestampStats(proto.Message):
r"""The data statistics of a series of TIMESTAMP values.
Attributes:
granular_stats (Sequence[google.cloud.automl_v1beta1.types.TimestampStats.GranularStatsEntry]):
The string key is the pre-defined granularity. Currently
supported: hour_of_day, day_of_week, month_of_year.
Granularities finer that the granularity of timestamp data
are not populated (e.g. if timestamps are at day
granularity, then hour_of_day is not populated).
"""
[docs] class GranularStats(proto.Message):
r"""Stats split by a defined in context granularity.
Attributes:
buckets (Sequence[google.cloud.automl_v1beta1.types.TimestampStats.GranularStats.BucketsEntry]):
A map from granularity key to example count for that key.
E.g. for hour_of_day ``13`` means 1pm, or for month_of_year
``5`` means May).
"""
buckets = proto.MapField(proto.INT32, proto.INT64, number=1,)
granular_stats = proto.MapField(
proto.STRING, proto.MESSAGE, number=1, message=GranularStats,
)
[docs]class ArrayStats(proto.Message):
r"""The data statistics of a series of ARRAY values.
Attributes:
member_stats (google.cloud.automl_v1beta1.types.DataStats):
Stats of all the values of all arrays, as if
they were a single long series of data. The type
depends on the element type of the array.
"""
member_stats = proto.Field(proto.MESSAGE, number=2, message="DataStats",)
[docs]class StructStats(proto.Message):
r"""The data statistics of a series of STRUCT values.
Attributes:
field_stats (Sequence[google.cloud.automl_v1beta1.types.StructStats.FieldStatsEntry]):
Map from a field name of the struct to data
stats aggregated over series of all data in that
field across all the structs.
"""
field_stats = proto.MapField(
proto.STRING, proto.MESSAGE, number=1, message="DataStats",
)
[docs]class CategoryStats(proto.Message):
r"""The data statistics of a series of CATEGORY values.
Attributes:
top_category_stats (Sequence[google.cloud.automl_v1beta1.types.CategoryStats.SingleCategoryStats]):
The statistics of the top 20 CATEGORY values, ordered by
[count][google.cloud.automl.v1beta1.CategoryStats.SingleCategoryStats.count].
"""
[docs] class SingleCategoryStats(proto.Message):
r"""The statistics of a single CATEGORY value.
Attributes:
value (str):
The CATEGORY value.
count (int):
The number of occurrences of this value in
the series.
"""
value = proto.Field(proto.STRING, number=1,)
count = proto.Field(proto.INT64, number=2,)
top_category_stats = proto.RepeatedField(
proto.MESSAGE, number=1, message=SingleCategoryStats,
)
[docs]class CorrelationStats(proto.Message):
r"""A correlation statistics between two series of DataType
values. The series may have differing DataType-s, but within a
single series the DataType must be the same.
Attributes:
cramers_v (float):
The correlation value using the Cramer's V
measure.
"""
cramers_v = proto.Field(proto.DOUBLE, number=1,)
__all__ = tuple(sorted(__protobuf__.manifest))