Module markov.api.data.data_set

Functions

def compare_datasets(to_compare: str, compare_with: List[str], force: bool = False)
def get_dataset_info(ds_id: str) ‑> DataSet
def get_dataset_preview(ds_id: str) ‑> DatasetPreviewData

Classes

class ArtifactExistsRequest (artifact_type: MKVArtifactType, artifact_id: str, artifact_str: str = '')

ArtifactExistsRequest(artifact_type: 'MKVArtifactType', artifact_id: 'str', artifact_str: 'str' = '')

Class variables

var artifact_id : str
var artifact_str : str
var artifact_typeMKVArtifactType

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> str
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class BaseApiResponse (return_code: str = 'COMMAND_FAILED', message: Optional[str] = '')

BaseApiResponse(return_code: 'str' = 'COMMAND_FAILED', message: 'Optional[str]' = '')

Subclasses

Class variables

var message : Optional[str]
var return_code : str

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DataAnalysisRunRequest (ds_id: str, force: bool = False)

DataAnalysisRunRequest(ds_id: 'str', force: 'bool' = False)

Class variables

var ds_id : str
var force : bool

Static methods

def create_from_dict(value: Dict) ‑> DataAnalysisRunRequest
def create_from_json(value: str) ‑> DataAnalysisRunRequest
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DataSet (ds_prop: DataSetProperties, ds_paths: List[DataSegmentPath], ds_id: Optional[str] = '', analysis_status: str = '', cred_id: str = '', data_segments: dict = <factory>, datasource_type: DataSourceType = DataSourceType.S3)

DataSet(ds_prop: 'DataSetProperties', ds_paths: 'List[DataSegmentPath]', ds_id: 'Optional[str]' = '', analysis_status: 'str' = '', cred_id: 'str' = '', _credentials: 'Optional[GenericCredential]' = None, _quality: 'Optional[DatasetQuality]' = None, data_segments: 'dict' = , datasource_type: 'DataSourceType' = )

Class variables

var analysis_status : str
var cred_id : str
var data_segments : dict
var datasource_typeDataSourceType
var ds_id : Optional[str]
var ds_paths : List[DataSegmentPath]
var ds_propDataSetProperties

Static methods

def create_dataset_from_response(response: Dict) ‑> DataSet

This method is for internal use only. This deserializes the response received from MarkovML backend to Dataset object.

Args

response : dict
The response received from the MarkovML as json

Returns

Dataset object

def create_from_dict(json_dict: dict) ‑> DataSet
def create_from_json(json_str: str) ‑> DataSet
def from_cloud(df_id: str, x_col_names: List[str], delimiter: str, name: str, data_category: DataCategory, credential_id: str, y_name: str = None, train_source: str = '', test_source: str = '', validate_source: str = '', unsplit_source: str = '')

Creates a DataSet instance from cloud storage paths.

This method initializes a DataSet with properties and data sources specified as cloud storage paths (e.g., S3 paths). It is designed to handle data stored in cloud environments, allowing for the separation of data into training, testing, and validation segments, or the use of an unsplit data source. The method is particularly useful for datasets stored in cloud services like AWS S3.

Parameters: df_id (str): Unique identifier for the dataset. x_col_names (List[str]): List of column names to be used as features. delimiter (str): Delimiter used in the data files. name (str): Human-readable name for the dataset. data_category (str): Category or type of the data. credential_id (str): Identifier for the credentials used to access cloud storage. y_name (str): Name of the column to be used as the target variable. If y_name is None, dataset will be considered as unlabelled. storage_type (str, optional): Type of cloud storage to be used, defaults to S3. train_source (str, optional): Cloud storage path for the training data. test_source (str, optional): Cloud storage path for the testing data. validate_source (str, optional): Cloud storage path for the validation data. unsplit_source (str, optional): Cloud storage path for unsplit data. If provided, train_source, test_source, and validate_source should be empty strings.

Returns: DataSet: An instance of DataSet configured with the provided cloud storage paths as data sources.

Raises: IOError: If both unsplit_source and any of the split sources (train, test, validate) are provided.

def from_dataframe(df_id: str, x_col_names: List[str], delimiter: str, name: str, data_category: DataCategory, y_name: str = None, train_source: pd.DataFrame = None, test_source: pd.DataFrame = None, validate_source: pd.DataFrame = None, unsplit_source: pd.DataFrame = None)

Creates a DataSet instance from provided DataFrame sources.

This method initializes a DataSet with properties and data sources specified as DataFrame objects. It allows for the separation of data into training, testing, and validation segments, or the use of an unsplit data source.

Parameters: df_id (str): Unique identifier for the dataset. x_col_names (List[str]): List of column names to be used as features. delimiter (str): Delimiter used in the data files. name (str): Human-readable name for the dataset. data_category (str): Category or type of the data. y_name (str): Name of the column to be used as the target variable. If y_name is None, dataset will be considered as unlabelled. storage_type (str, optional): Type of storage to be used, defaults to S3. train_source (pd.DataFrame, optional): DataFrame source for training data. test_source (pd.DataFrame, optional): DataFrame source for testing data. validate_source (pd.DataFrame, optional): DataFrame source for validation data. unsplit_source (pd.DataFrame, optional): DataFrame source for unsplit data. If provided, train_source, test_source, and validate_source should be None.

Returns: DataSet: An instance of DataSet configured with the provided DataFrame sources.

Raises: IOError: If both unsplit_source and any of the split sources (train, test, validate) are provided.

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_filepath(df_id: str, x_col_names: List[str], delimiter: str, name: str, data_category: DataCategory, y_name: str = None, train_source: str = '', test_source: str = '', validate_source: str = '', unsplit_source: str = '')

Creates a DataSet instance from file paths pointing to data sources.

This method initializes a DataSet with properties and data sources specified as file paths. It allows for the separation of data into training, testing, and validation segments, or the use of an unsplit data source.

Parameters: df_id (str): Unique identifier for the dataset. x_col_names (List[str]): List of column names to be used as features. delimiter (str): Delimiter used in the data files. name (str): Human-readable name for the dataset. data_category (str): Category or type of the data. y_name (str): Name of the column to be used as the target variable. If y_name is None, dataset will be considered as unlabelled. storage_type (str, optional): Type of storage to be used, defaults to S3. train_source (str, optional): File path for the training data. test_source (str, optional): File path for the testing data. validate_source (str, optional): File path for the validation data. unsplit_source (str, optional): File path for unsplit data. If provided, train_source, test_source, and validate_source should be empty strings.

Returns: DataSet: An instance of DataSet configured with the provided file paths as data sources.

Raises: IOError: If both unsplit_source and any of the split sources (train, test, validate) are provided.

def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def get_by_id(dataset_id: str) ‑> DataSet

Get dataset object by dataset id of the dataset registered with Markov

def get_by_name(dataset_name: str) ‑> DataSet

Get dataset object by name of the dataset registered with Markov

def get_datasets()

Fetch all datasets from the logged in workspace

def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Instance variables

prop data_family : DataFamily

Returns: The datafamily this dataset belongs to

prop delimiter : str

Returns: Delimiter for this dataset that was used while registering the dataset

prop description : str
prop df_id

Data Family id that uniquely identifies the data family with MarkovML backend

prop features : List[str]

Returns the list of features chosen by the user while registering the dataset

prop get_analysis_status

Check if the data set is analyzed Returns:

prop name

Returns: Name assigned to this dataset during registration

prop qualityDatasetQuality

Returns: The dataquality object containing details about the dataquality of this dataset and how to get the quality score for each record of the dataset

prop segments : List[str]

Returns: The available segments from this dataset

prop storage : str
prop target : str

Returns the target column name chosen by the user while registering the dataset

prop test : DataSegment

Returns: The test segment of this dataset if available otherwise None

prop train : DataSegment

Returns: The train segment of this dataset if available otherwise None

prop unknown : DataSegment

Returns: The unknown segment of this dataset if available otherwise None

prop unsplit : DataSegment

Returns: The unknown segment of this dataset if available otherwise None

prop validate : DataSegment

Returns: The validate segment of this dataset if available otherwise None

prop x_col_names : List[str]

Returns: Name of the feature columns for this dataset

prop x_indexes : List[int]
prop y_col_index : int

Returns: target index for this dataset

prop y_col_name : str

Returns: Name of the target column for this dataset

Methods

def compare(self, compare_input, force: bool = False) ‑> DataSetComparisonResult

Trigger comparison of this dataset with another dataset

Args

compare_input ():Generic compare_input containing args for triggering dataset comparison. The input
is verified by the non_overloaded method below and processed
force : bool
Bypass the cached past comparison if any and continue with the

Returns

DataSetComparisonResult

def create_vector_store(self, timeout=None)

Create a vector store for this dataset which will help us perform actions like similarity search etc.

Args

timeout
Timeout in seconds for the vector store creation

Returns: VectorStore Resource object

def data_category(self)
def download_as_csv(self)

Downloads all segments of the dataset as csv individually For example: ">>> ds.download_as_csv()"

Returns

File train.csv downloaded successfully File test.csv downloaded successfully

def download_segment(self, segment_type: Union[SegmentType, str]) ‑> str

download dataset segment to your machine in the same folder this script is run for local processing For example: SEGMENT_TYPE = TRAIN|TEST|VALIDATE|UNSPLIT ">>> dataset.download_segment(segment_type="SEGMENT_TYPE")" File filename.csv downloaded successfully

def get_credentials(self) ‑> GenericCredential

Return the original credentials registered with MarkovML for reading this dataset

Returns

GenericCredential that contains the credentials registered

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def get_preview(self) ‑> DatasetPreviewData

Get dataset preview. This returns a sample from the registered dataset on CLI terminal

>>> dataset.get_preview()
def get_segment(self, seg_type: SegmentType) ‑> DataSegment

Returns the DataSegment of type seg_type for this Dataset

Args

seg_type : SegmentType
Segment types are (Train/Test/Validate/ Unknown (un split data).

Returns

DataSegment if it is available for the given dataset, otherwise None is returned

def get_train_test_split(self, **kwargs) ‑> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]

This is a wrapper over sklearn.model_selection.train_test_split https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html All the arguments are passed on to the downstream wrapped method in sklearn

Args

**kwargs : all kwargs that are applicable for

Returns

List containing train, test split

def get_url(self) ‑> str

Gives the url of the dataset based on environment For example: ">>> ds.get_url()" "{base_url}/{workspace_id}/datasets/{dataset_id}"

def register(self, cred: Union[str, GenericCredential], analyze: bool = True)

Register the data_set with new credential_id and/or already registered credential id

Args

cred(GenericCredential): contains the credentials to read this dataset from the cloud storage analyze(bool): set to true if you want to analyze the dataset on successful registration.

Returns

DatasetRegistration Response containing the results of the registration

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
def update_datafamily(self, updated_df_id: str)

Update the datafamily for this dataset.

Args

updated_df_id : str
unique id of the new datafamily to assign this dataset to

Returns

None

def upload(self)

Validates that data source are from same type [Dataframe, S3, or Filepath] Uploads the datasource to corresponding s3 storage (hybrid deployment) and validates if the dataset is valid. Finally, registers the dataset.

def view_details(self)

Opens up the dataset details page of this dataset in the browser. For example: ">>> ds.view_details()"

class DataSetCompareRequest (primary_id: str, compare_ids: List[str], notes: str = '', force: bool = False)

DataSetCompareRequest(primary_id: 'str', compare_ids: 'List[str]', notes: 'str' = '', force: 'bool' = False)

Class variables

var compare_ids : List[str]
var force : bool
var notes : str
var primary_id : str

Static methods

def create_from_dict(value: Dict) ‑> DataSetCompareRequest
def create_from_json(value: str) ‑> DataSetCompareRequest
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DataSetComparisonResult (msg: str = '')

DataSetComparisonResult(msg: 'str' = '')

Class variables

var msg : str

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DataSetProperties (name: str, data_category: Optional[str], delimiter: str, df_id: str, storage_type: str, notes: str = '', x_indexes: Optional[List] = <factory>, y_index: int = -1, x_col_names: List[str] = <factory>, y_name: str = '', storage_format: str = StorageFormatType.CSV, info: dict = <factory>, source: str = '')

DataSetProperties(name: 'str', data_category: 'Optional[str]', delimiter: 'str', df_id: 'str', storage_type: 'str', notes: 'str' = '', x_indexes: 'Optional[List]' = , y_index: 'int' = -1, x_col_names: 'List[str]' = , y_name: 'str' = '', storage_format: 'str' = , info: 'dict' = , source: 'str' = '')

Class variables

var data_category : Optional[str]
var delimiter : str
var df_id : str
var info : dict
var name : str
var notes : str
var source : str
var storage_format : str
var storage_type : str
var x_col_names : List[str]
var x_indexes : Optional[List]
var y_index : int
var y_name : str

Static methods

def create_dataset_properties_from_response(response: Dict)
def create_from_dict(json_dict: dict) ‑> DataSetProperties
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DataSetRegistrationRequest (data_set: DataSet, credential: GenericCredential, should_analyze: bool = True, upload_mode: WorkflowMode = WorkflowMode.EXTERNAL_CLOUD_STORAGE)

DataSetRegistrationRequest(data_set: 'DataSet', credential: 'GenericCredential', should_analyze: 'bool' = True, upload_mode: 'WorkflowMode' = )

Class variables

var credentialGenericCredential
var data_setDataSet
var should_analyze : bool
var upload_modeWorkflowMode

Static methods

def create_from_dict(value: str) ‑> DataSetRegistrationRequest
def create_from_json(value: str) ‑> DataSetRegistrationRequest
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DataSetRegistrationResponse (ds_id: str, run_details: Optional[Dict], return_code: Optional[str], create_time: str, message: str)

DataSetRegistrationResponse(ds_id: 'str', run_details: 'Optional[Dict]', return_code: 'Optional[str]', create_time: 'str', message: 'str')

Class variables

var create_time : str
var ds_id : str
var message : str
var return_code : Optional[str]
var run_details : Optional[Dict]

Static methods

def create_from_dict(value: Dict) ‑> DataSetRegistrationResponse
def create_from_json(value: str) ‑> DataSetRegistrationResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> str
def get_json(self) ‑> str
def is_being_analyzed(self) ‑> bool
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DatasetDataFamilyUpdate (ds_id: str, updated_df_id: str)

DatasetDataFamilyUpdate(ds_id: 'str', updated_df_id: 'str')

Class variables

var ds_id : str
var updated_df_id : str

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DatasetPreviewData (segments: List[SegmentType], preview: Dict[SegmentType, FilePreview], delimiter: Optional[str])

DatasetPreviewData(segments: 'List[SegmentType]', preview: 'Dict[SegmentType, FilePreview]', delimiter: 'Optional[str]')

Class variables

var delimiter : Optional[str]
var preview : Dict[SegmentTypeFilePreview]
var segments : List[SegmentType]

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DatasetQuality (url: str, storage_format: StorageFormat = 'csv')

DatasetQuality(url: 'str', storage_format: 'StorageFormat' = 'csv', _df: 'Optional[pandas.DataFrame]' = None)

Class variables

var storage_formatStorageFormat
var url : str

Static methods

def create_from_dict(value: Dict) ‑> DatasetQuality
def create_from_json(value: str) ‑> DatasetQuality
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Instance variables

prop df : pandas.DataFrame

Pandas DataFrame that contains the record-wise quality scores of the dataset.

If the DataFrame has not been loaded yet, it will be loaded from the URL specified by the url attribute.

Raises

IOError
If the url attribute is not set.

Returns

pandas.DataFrame
A DataFrame with the record-wise quality scores of the dataset.

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DatasetQualityResponse (return_code: str = 'COMMAND_FAILED', message: Optional[str] = '', dataset_quality: Optional[DatasetQuality] = None)

DatasetQualityResponse(return_code: 'str' = 'COMMAND_FAILED', message: 'Optional[str]' = '', dataset_quality: 'Optional[DatasetQuality]' = None)

Ancestors

Class variables

var dataset_quality : Optional[DatasetQuality]

Static methods

def create_from_dict(value: Dict) ‑> DatasetQuality
def create_from_json(value: str) ‑> DatasetQuality
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DatasetValidationResponse (return_code: Optional[str], message: Optional[str] = '')

DatasetValidationResponse(return_code: 'Optional[str]', message: 'Optional[str]' = '')

Class variables

var message : Optional[str]
var return_code : Optional[str]

Static methods

def create_from_dict(value: Dict) ‑> DatasetValidationResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class DownloadDatasetResponse (response: DownloadDatasetInfo, return_code: ReturnCode, message: str)

DownloadDatasetResponse(response: 'DownloadDatasetInfo', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var responseDownloadDatasetInfo
var return_codeReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> DownloadDatasetResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class FilePreview (*args, **kwargs)

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Ancestors

  • builtins.dict

Class variables

var data : List[str]
var metadataFilePreviewMetaData
class FilePreviewMetaData (*args, **kwargs)

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Ancestors

  • builtins.dict

Class variables

var line_separator : str
class GetDatasetInfoResponse (response: dict, return_code: ReturnCode, message: str)

GetDatasetInfoResponse(response: 'dict', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : dict
var return_codeReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> GetDatasetInfoResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class GetDatasetPreviewResponse (response: Dict, return_code: ReturnCode, message: str)

GetDatasetPreviewResponse(response: 'Dict', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : Dict
var return_codeReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> GetDatasetPreviewResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class GetDatasetsResponse (response: List[Dict], return_code: ReturnCode, message: str)

GetDatasetsResponse(response: 'List[Dict]', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : List[Dict]
var return_codeReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> GetDatasetsResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class RunDetails (run_id: str, run_tasks: List[str], run_type: str, notes: str, create_time: str)

RunDetails(run_id: 'str', run_tasks: 'List[str]', run_type: 'str', notes: 'str', create_time: 'str')

Class variables

var create_time : str
var notes : str
var run_id : str
var run_tasks : List[str]
var run_type : str

Static methods

def create_from_dict(dict_value: Dict) ‑> RunDetails
def create_from_json(json_str: str) ‑> RunDetails
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str
class StorageFormat (...)

Supported Data Storage Formats

Ancestors

  • builtins.str

Class variables

var CSV : Final
var PARQUET : Final