As of January 1, 2020 this library no longer supports Python 2 on the latest released version. Library versions released prior to that date will continue to be available. For more information please visit Python 2 support on Google Cloud.

Source code for google.cloud.automl_v1beta1.types.tables

# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import proto  # type: ignore

from google.cloud.automl_v1beta1.types import column_spec
from google.cloud.automl_v1beta1.types import data_stats
from google.cloud.automl_v1beta1.types import ranges
from google.protobuf import struct_pb2  # type: ignore
from google.protobuf import timestamp_pb2  # type: ignore


__protobuf__ = proto.module(
    package="google.cloud.automl.v1beta1",
    manifest={
        "TablesDatasetMetadata",
        "TablesModelMetadata",
        "TablesAnnotation",
        "TablesModelColumnInfo",
    },
)


[docs]class TablesDatasetMetadata(proto.Message): r"""Metadata for a dataset used for AutoML Tables. Attributes: primary_table_spec_id (str): Output only. The table_spec_id of the primary table of this dataset. target_column_spec_id (str): column_spec_id of the primary table's column that should be used as the training & prediction target. This column must be non-nullable and have one of following data types (otherwise model creation will error): - CATEGORY - FLOAT64 If the type is CATEGORY , only up to 100 unique values may exist in that column across all rows. NOTE: Updates of this field will instantly affect any other users concurrently working with the dataset. weight_column_spec_id (str): column_spec_id of the primary table's column that should be used as the weight column, i.e. the higher the value the more important the row will be during model training. Required type: FLOAT64. Allowed values: 0 to 10000, inclusive on both ends; 0 means the row is ignored for training. If not set all rows are assumed to have equal weight of 1. NOTE: Updates of this field will instantly affect any other users concurrently working with the dataset. ml_use_column_spec_id (str): column_spec_id of the primary table column which specifies a possible ML use of the row, i.e. the column will be used to split the rows into TRAIN, VALIDATE and TEST sets. Required type: STRING. This column, if set, must either have all of ``TRAIN``, ``VALIDATE``, ``TEST`` among its values, or only have ``TEST``, ``UNASSIGNED`` values. In the latter case the rows with ``UNASSIGNED`` value will be assigned by AutoML. Note that if a given ml use distribution makes it impossible to create a "good" model, that call will error describing the issue. If both this column_spec_id and primary table's time_column_spec_id are not set, then all rows are treated as ``UNASSIGNED``. NOTE: Updates of this field will instantly affect any other users concurrently working with the dataset. target_column_correlations (Sequence[google.cloud.automl_v1beta1.types.TablesDatasetMetadata.TargetColumnCorrelationsEntry]): Output only. Correlations between [TablesDatasetMetadata.target_column_spec_id][google.cloud.automl.v1beta1.TablesDatasetMetadata.target_column_spec_id], and other columns of the [TablesDatasetMetadataprimary_table][google.cloud.automl.v1beta1.TablesDatasetMetadata.primary_table_spec_id]. Only set if the target column is set. Mapping from other column spec id to its CorrelationStats with the target column. This field may be stale, see the stats_update_time field for for the timestamp at which these stats were last updated. stats_update_time (google.protobuf.timestamp_pb2.Timestamp): Output only. The most recent timestamp when target_column_correlations field and all descendant ColumnSpec.data_stats and ColumnSpec.top_correlated_columns fields were last (re-)generated. Any changes that happened to the dataset afterwards are not reflected in these fields values. The regeneration happens in the background on a best effort basis. """ primary_table_spec_id = proto.Field(proto.STRING, number=1,) target_column_spec_id = proto.Field(proto.STRING, number=2,) weight_column_spec_id = proto.Field(proto.STRING, number=3,) ml_use_column_spec_id = proto.Field(proto.STRING, number=4,) target_column_correlations = proto.MapField( proto.STRING, proto.MESSAGE, number=6, message=data_stats.CorrelationStats, ) stats_update_time = proto.Field( proto.MESSAGE, number=7, message=timestamp_pb2.Timestamp, )
[docs]class TablesModelMetadata(proto.Message): r"""Model metadata specific to AutoML Tables. Attributes: optimization_objective_recall_value (float): Required when optimization_objective is "MAXIMIZE_PRECISION_AT_RECALL". Must be between 0 and 1, inclusive. optimization_objective_precision_value (float): Required when optimization_objective is "MAXIMIZE_RECALL_AT_PRECISION". Must be between 0 and 1, inclusive. target_column_spec (google.cloud.automl_v1beta1.types.ColumnSpec): Column spec of the dataset's primary table's column the model is predicting. Snapshotted when model creation started. Only 3 fields are used: name - May be set on CreateModel, if it's not then the ColumnSpec corresponding to the current target_column_spec_id of the dataset the model is trained from is used. If neither is set, CreateModel will error. display_name - Output only. data_type - Output only. input_feature_column_specs (Sequence[google.cloud.automl_v1beta1.types.ColumnSpec]): Column specs of the dataset's primary table's columns, on which the model is trained and which are used as the input for predictions. The [target_column][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec] as well as, according to dataset's state upon model creation, [weight_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.weight_column_spec_id], and [ml_use_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.ml_use_column_spec_id] must never be included here. Only 3 fields are used: - name - May be set on CreateModel, if set only the columns specified are used, otherwise all primary table's columns (except the ones listed above) are used for the training and prediction input. - display_name - Output only. - data_type - Output only. optimization_objective (str): Objective function the model is optimizing towards. The training process creates a model that maximizes/minimizes the value of the objective function over the validation set. The supported optimization objectives depend on the prediction type. If the field is not set, a default objective function is used. CLASSIFICATION_BINARY: "MAXIMIZE_AU_ROC" (default) - Maximize the area under the receiver operating characteristic (ROC) curve. "MINIMIZE_LOG_LOSS" - Minimize log loss. "MAXIMIZE_AU_PRC" - Maximize the area under the precision-recall curve. "MAXIMIZE_PRECISION_AT_RECALL" - Maximize precision for a specified recall value. "MAXIMIZE_RECALL_AT_PRECISION" - Maximize recall for a specified precision value. CLASSIFICATION_MULTI_CLASS : "MINIMIZE_LOG_LOSS" (default) - Minimize log loss. REGRESSION: "MINIMIZE_RMSE" (default) - Minimize root-mean-squared error (RMSE). "MINIMIZE_MAE" - Minimize mean-absolute error (MAE). "MINIMIZE_RMSLE" - Minimize root-mean-squared log error (RMSLE). tables_model_column_info (Sequence[google.cloud.automl_v1beta1.types.TablesModelColumnInfo]): Output only. Auxiliary information for each of the input_feature_column_specs with respect to this particular model. train_budget_milli_node_hours (int): Required. The train budget of creating this model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. The training cost of the model will not exceed this budget. The final cost will be attempted to be close to the budget, though may end up being (even) noticeably smaller - at the backend's discretion. This especially may happen when further model training ceases to provide any improvements. If the budget is set to a value known to be insufficient to train a model for the given dataset, the training won't be attempted and will error. The train budget must be between 1,000 and 72,000 milli node hours, inclusive. train_cost_milli_node_hours (int): Output only. The actual training cost of the model, expressed in milli node hours, i.e. 1,000 value in this field means 1 node hour. Guaranteed to not exceed the train budget. disable_early_stopping (bool): Use the entire training budget. This disables the early stopping feature. By default, the early stopping feature is enabled, which means that AutoML Tables might stop training before the entire training budget has been used. """ optimization_objective_recall_value = proto.Field( proto.FLOAT, number=17, oneof="additional_optimization_objective_config", ) optimization_objective_precision_value = proto.Field( proto.FLOAT, number=18, oneof="additional_optimization_objective_config", ) target_column_spec = proto.Field( proto.MESSAGE, number=2, message=column_spec.ColumnSpec, ) input_feature_column_specs = proto.RepeatedField( proto.MESSAGE, number=3, message=column_spec.ColumnSpec, ) optimization_objective = proto.Field(proto.STRING, number=4,) tables_model_column_info = proto.RepeatedField( proto.MESSAGE, number=5, message="TablesModelColumnInfo", ) train_budget_milli_node_hours = proto.Field(proto.INT64, number=6,) train_cost_milli_node_hours = proto.Field(proto.INT64, number=7,) disable_early_stopping = proto.Field(proto.BOOL, number=12,)
[docs]class TablesAnnotation(proto.Message): r"""Contains annotation details specific to Tables. Attributes: score (float): Output only. A confidence estimate between 0.0 and 1.0, inclusive. A higher value means greater confidence in the returned value. For [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec] of FLOAT64 data type the score is not populated. prediction_interval (google.cloud.automl_v1beta1.types.DoubleRange): Output only. Only populated when [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec] has FLOAT64 data type. An interval in which the exactly correct target value has 95% chance to be in. value (google.protobuf.struct_pb2.Value): The predicted value of the row's [target_column][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]. The value depends on the column's DataType: - CATEGORY - the predicted (with the above confidence ``score``) CATEGORY value. - FLOAT64 - the predicted (with above ``prediction_interval``) FLOAT64 value. tables_model_column_info (Sequence[google.cloud.automl_v1beta1.types.TablesModelColumnInfo]): Output only. Auxiliary information for each of the model's [input_feature_column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs] with respect to this particular prediction. If no other fields than [column_spec_name][google.cloud.automl.v1beta1.TablesModelColumnInfo.column_spec_name] and [column_display_name][google.cloud.automl.v1beta1.TablesModelColumnInfo.column_display_name] would be populated, then this whole field is not. baseline_score (float): Output only. Stores the prediction score for the baseline example, which is defined as the example with all values set to their baseline values. This is used as part of the Sampled Shapley explanation of the model's prediction. This field is populated only when feature importance is requested. For regression models, this holds the baseline prediction for the baseline example. For classification models, this holds the baseline prediction for the baseline example for the argmax class. """ score = proto.Field(proto.FLOAT, number=1,) prediction_interval = proto.Field( proto.MESSAGE, number=4, message=ranges.DoubleRange, ) value = proto.Field(proto.MESSAGE, number=2, message=struct_pb2.Value,) tables_model_column_info = proto.RepeatedField( proto.MESSAGE, number=3, message="TablesModelColumnInfo", ) baseline_score = proto.Field(proto.FLOAT, number=5,)
[docs]class TablesModelColumnInfo(proto.Message): r"""An information specific to given column and Tables Model, in context of the Model and the predictions created by it. Attributes: column_spec_name (str): Output only. The name of the ColumnSpec describing the column. Not populated when this proto is outputted to BigQuery. column_display_name (str): Output only. The display name of the column (same as the display_name of its ColumnSpec). feature_importance (float): Output only. When given as part of a Model (always populated): Measurement of how much model predictions correctness on the TEST data depend on values in this column. A value between 0 and 1, higher means higher influence. These values are normalized - for all input feature columns of a given model they add to 1. When given back by Predict (populated iff [feature_importance param][google.cloud.automl.v1beta1.PredictRequest.params] is set) or Batch Predict (populated iff [feature_importance][google.cloud.automl.v1beta1.PredictRequest.params] param is set): Measurement of how impactful for the prediction returned for the given row the value in this column was. Specifically, the feature importance specifies the marginal contribution that the feature made to the prediction score compared to the baseline score. These values are computed using the Sampled Shapley method. """ column_spec_name = proto.Field(proto.STRING, number=1,) column_display_name = proto.Field(proto.STRING, number=2,) feature_importance = proto.Field(proto.FLOAT, number=3,)
__all__ = tuple(sorted(__protobuf__.manifest))