import pandas as pd
import numpy as np
import great_expectations as ge
from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults
from great_expectations.data_context import BaseDataContext
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.core.batch import RuntimeBatchRequest
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
#!great_expectations --yes --v3-api init
!great_expectations --yes --v3-api init
Using v3 (Batch Request) API
___ _ ___ _ _ _
/ __|_ _ ___ __ _| |_ | __|_ ___ __ ___ __| |_ __ _| |_(_)___ _ _ ___
| (_ | '_/ -_) _` | _| | _|\ \ / '_ \/ -_) _| _/ _` | _| / _ \ ' \(_-<
\___|_| \___\__,_|\__| |___/_\_\ .__/\___\__|\__\__,_|\__|_\___/_||_/__/
|_|
~ Always know what to expect from your data ~
This looks like an existing project that appears complete! You are ready to roll.
products=np.random.choice(['camera', 'phone', 'computer', 'speaker', 'TV',
'cable', 'movie', 'guitar', 'printer'], size=5)
quantities=np.random.choice(list(range(10))+[None], size=5)
dates=np.random.choice(pd.date_range(start="2020-12-30",end="2021-01-8", ), size=5)
df=pd.DataFrame({'products': products, 'quantities': quantities, 'dates': dates})
df
productsobject
quantitiesobject
0
speaker
3
1
guitar
0
2
guitar
3
3
camera
4
4
computer
3
df=ge.from_pandas(df)
df.expect_column_values_to_be_unique('products'); # ~30% chance of passing
df.expect_column_values_to_not_be_null('quantities'); # ~60% chance of passing
df.expect_column_values_to_be_between('dates', '2021-01-01', '2021-01-8', parse_strings_as_datetimes=True); #~60% chance of passing
data_context_config = DataContextConfig(
datasources={
"my_datasource": DatasourceConfig(
class_name="Datasource",
module_name="great_expectations.datasource",
execution_engine={
"class_name": "PandasExecutionEngine",
"module_name": "great_expectations.execution_engine"
},
data_connectors={
"default_runtime_data_connector_name": {
"class_name": "RuntimeDataConnector",
"batch_identifiers": ["default_identifier_name"],
}
}
)
},
store_backend_defaults=FilesystemStoreBackendDefaults(root_directory="/work/great_expectations"),
)
context = BaseDataContext(project_config=data_context_config)
context.save_expectation_suite(expectation_suite_name='my_expectation_suite', expectation_suite=df.get_expectation_suite(discard_failed_expectations=False));
batch_request = RuntimeBatchRequest(
datasource_name="my_datasource",
data_connector_name="default_runtime_data_connector_name",
data_asset_name="df",
runtime_parameters={"batch_data": df},
batch_identifiers={"default_identifier_name": "df"},
)
checkpoint_config = {
"name": "my_checkpoint",
"config_version": 1,
"class_name": "SimpleCheckpoint",
"expectation_suite_name": "my_expectation_suite"
}
context.add_checkpoint(**checkpoint_config);
results = context.run_checkpoint(
checkpoint_name="my_checkpoint",
validations = [
{"batch_request": batch_request}
],
run_id="my_run_id",
)
{
"name": "my_checkpoint",
"config_version": 1.0,
"template_name": null,
"module_name": "great_expectations.checkpoint",
"class_name": "Checkpoint",
"run_name_template": null,
"expectation_suite_name": "my_expectation_suite",
"batch_request": null,
"action_list": [
{
"name": "store_validation_result",
"action": {
"class_name": "StoreValidationResultAction"
}
},
{
"name": "store_evaluation_params",
"action": {
"class_name": "StoreEvaluationParametersAction"
}
},
{
"name": "update_data_docs",
"action": {
"class_name": "UpdateDataDocsAction",
"site_names": []
}
}
],
"evaluation_parameters": {},
"runtime_configuration": {},
"validations": [],
"profilers": [],
"ge_cloud_id": null,
"expectation_suite_ge_cloud_id": null
}
context.build_data_docs();
# Uncomment this line to serve up the documentation at https://5119d502-592c-43bd-b99d-244e09f7080a.deepnoteproject.com
#!python -m http.server 8080 --directory great_expectations/uncommitted/data_docs/local_site