great_expectations

import pandas as pd import numpy as np import great_expectations as ge from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults from great_expectations.data_context import BaseDataContext from great_expectations.checkpoint import SimpleCheckpoint from great_expectations.core.batch import RuntimeBatchRequest import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) #!great_expectations --yes --v3-api init

!great_expectations --yes --v3-api init

Using v3 (Batch Request) API

  ___              _     ___                  _        _   _
 / __|_ _ ___ __ _| |_  | __|_ ___ __  ___ __| |_ __ _| |_(_)___ _ _  ___
| (_ | '_/ -_) _` |  _| | _|\ \ / '_ \/ -_) _|  _/ _` |  _| / _ \ ' \(_-<
 \___|_| \___\__,_|\__| |___/_\_\ .__/\___\__|\__\__,_|\__|_\___/_||_/__/
                                |_|
             ~ Always know what to expect from your data ~

This looks like an existing project that appears complete! You are ready to roll.

products=np.random.choice(['camera', 'phone', 'computer', 'speaker', 'TV', 'cable', 'movie', 'guitar', 'printer'], size=5) quantities=np.random.choice(list(range(10))+[None], size=5) dates=np.random.choice(pd.date_range(start="2020-12-30",end="2021-01-8", ), size=5) df=pd.DataFrame({'products': products, 'quantities': quantities, 'dates': dates}) df

productsobject

quantitiesobject

speaker

guitar

camera

computer

df=ge.from_pandas(df) df.expect_column_values_to_be_unique('products'); # ~30% chance of passing df.expect_column_values_to_not_be_null('quantities'); # ~60% chance of passing df.expect_column_values_to_be_between('dates', '2021-01-01', '2021-01-8', parse_strings_as_datetimes=True); #~60% chance of passing

data_context_config = DataContextConfig( datasources={ "my_datasource": DatasourceConfig( class_name="Datasource", module_name="great_expectations.datasource", execution_engine={ "class_name": "PandasExecutionEngine", "module_name": "great_expectations.execution_engine" }, data_connectors={ "default_runtime_data_connector_name": { "class_name": "RuntimeDataConnector", "batch_identifiers": ["default_identifier_name"], } } ) }, store_backend_defaults=FilesystemStoreBackendDefaults(root_directory="/work/great_expectations"), ) context = BaseDataContext(project_config=data_context_config) context.save_expectation_suite(expectation_suite_name='my_expectation_suite', expectation_suite=df.get_expectation_suite(discard_failed_expectations=False));

batch_request = RuntimeBatchRequest( datasource_name="my_datasource", data_connector_name="default_runtime_data_connector_name", data_asset_name="df", runtime_parameters={"batch_data": df}, batch_identifiers={"default_identifier_name": "df"}, ) checkpoint_config = { "name": "my_checkpoint", "config_version": 1, "class_name": "SimpleCheckpoint", "expectation_suite_name": "my_expectation_suite" } context.add_checkpoint(**checkpoint_config); results = context.run_checkpoint( checkpoint_name="my_checkpoint", validations = [ {"batch_request": batch_request} ], run_id="my_run_id", )

{
  "name": "my_checkpoint",
  "config_version": 1.0,
  "template_name": null,
  "module_name": "great_expectations.checkpoint",
  "class_name": "Checkpoint",
  "run_name_template": null,
  "expectation_suite_name": "my_expectation_suite",
  "batch_request": null,
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "evaluation_parameters": {},
  "runtime_configuration": {},
  "validations": [],
  "profilers": [],
  "ge_cloud_id": null,
  "expectation_suite_ge_cloud_id": null
}

context.build_data_docs(); # Uncomment this line to serve up the documentation at https://5119d502-592c-43bd-b99d-244e09f7080a.deepnoteproject.com #!python -m http.server 8080 --directory great_expectations/uncommitted/data_docs/local_site