Module `markov.api.data.data_set`

Functions

def compare_datasets(to_compare: str, compare_with: List[str], force: bool = False)
def get_dataset_info(ds_id: str) ‑> DataSet
def get_dataset_preview(ds_id: str) ‑> DatasetPreviewData

Classes

class ArtifactExistsRequest (artifact_type: MKVArtifactType, artifact_id: str, artifact_str: str = '')

ArtifactExistsRequest(artifact_type: 'MKVArtifactType', artifact_id: 'str', artifact_str: 'str' = '')

Class variables

var artifact_id : str
var artifact_str : str
var artifact_type : MKVArtifactType

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> str
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class BaseApiResponse (return_code: str = 'COMMAND_FAILED', message: Optional[str] = '')

BaseApiResponse(return_code: 'str' = 'COMMAND_FAILED', message: 'Optional[str]' = '')

Subclasses

DatasetQualityResponse

Class variables

var message : Optional[str]
var return_code : str

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DataAnalysisRunRequest (ds_id: str, force: bool = False)

DataAnalysisRunRequest(ds_id: 'str', force: 'bool' = False)

Class variables

var ds_id : str
var force : bool

Static methods

def create_from_dict(value: Dict) ‑> DataAnalysisRunRequest
def create_from_json(value: str) ‑> DataAnalysisRunRequest
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DataSet (ds_prop: DataSetProperties, ds_paths: List[DataSegmentPath], ds_id: Optional[str] = '', analysis_status: str = '', cred_id: str = '', data_segments: dict = <factory>, datasource_type: DataSourceType = DataSourceType.S3)

DataSet(ds_prop: 'DataSetProperties', ds_paths: 'List[DataSegmentPath]', ds_id: 'Optional[str]' = '', analysis_status: 'str' = '', cred_id: 'str' = '', _credentials: 'Optional[GenericCredential]' = None, _quality: 'Optional[DatasetQuality]' = None, data_segments: 'dict' = , datasource_type: 'DataSourceType' = )

Class variables

var analysis_status : str
var cred_id : str
var data_segments : dict
var datasource_type : DataSourceType
var ds_id : Optional[str]
var ds_paths : List[DataSegmentPath]
var ds_prop : DataSetProperties

Static methods

def create_dataset_from_response(response: Dict) ‑> DataSet

This method is for internal use only. This deserializes the response received from MarkovML backend to Dataset object.

Args

response : dict: The response received from the MarkovML as json

Returns

Dataset object

def create_from_dict(json_dict: dict) ‑> DataSet

def create_from_json(json_str: str) ‑> DataSet

def from_cloud(df_id: str, x_col_names: List[str], delimiter: str, name: str, data_category: DataCategory, credential_id: str, y_name: str = None, train_source: str = '', test_source: str = '', validate_source: str = '', unsplit_source: str = '')

Creates a DataSet instance from cloud storage paths.

This method initializes a DataSet with properties and data sources specified as cloud storage paths (e.g., S3 paths). It is designed to handle data stored in cloud environments, allowing for the separation of data into training, testing, and validation segments, or the use of an unsplit data source. The method is particularly useful for datasets stored in cloud services like AWS S3.

Parameters: df_id (str): Unique identifier for the dataset. x_col_names (List[str]): List of column names to be used as features. delimiter (str): Delimiter used in the data files. name (str): Human-readable name for the dataset. data_category (str): Category or type of the data. credential_id (str): Identifier for the credentials used to access cloud storage. y_name (str): Name of the column to be used as the target variable. If y_name is None, dataset will be considered as unlabelled. storage_type (str, optional): Type of cloud storage to be used, defaults to S3. train_source (str, optional): Cloud storage path for the training data. test_source (str, optional): Cloud storage path for the testing data. validate_source (str, optional): Cloud storage path for the validation data. unsplit_source (str, optional): Cloud storage path for unsplit data. If provided, train_source, test_source, and validate_source should be empty strings.

Returns: DataSet: An instance of DataSet configured with the provided cloud storage paths as data sources.

Raises: IOError: If both unsplit_source and any of the split sources (train, test, validate) are provided.

def from_dataframe(df_id: str, x_col_names: List[str], delimiter: str, name: str, data_category: DataCategory, y_name: str = None, train_source: pd.DataFrame = None, test_source: pd.DataFrame = None, validate_source: pd.DataFrame = None, unsplit_source: pd.DataFrame = None)

Creates a DataSet instance from provided DataFrame sources.

This method initializes a DataSet with properties and data sources specified as DataFrame objects. It allows for the separation of data into training, testing, and validation segments, or the use of an unsplit data source.

Parameters: df_id (str): Unique identifier for the dataset. x_col_names (List[str]): List of column names to be used as features. delimiter (str): Delimiter used in the data files. name (str): Human-readable name for the dataset. data_category (str): Category or type of the data. y_name (str): Name of the column to be used as the target variable. If y_name is None, dataset will be considered as unlabelled. storage_type (str, optional): Type of storage to be used, defaults to S3. train_source (pd.DataFrame, optional): DataFrame source for training data. test_source (pd.DataFrame, optional): DataFrame source for testing data. validate_source (pd.DataFrame, optional): DataFrame source for validation data. unsplit_source (pd.DataFrame, optional): DataFrame source for unsplit data. If provided, train_source, test_source, and validate_source should be None.

Returns: DataSet: An instance of DataSet configured with the provided DataFrame sources.

Raises: IOError: If both unsplit_source and any of the split sources (train, test, validate) are provided.

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A

def from_filepath(df_id: str, x_col_names: List[str], delimiter: str, name: str, data_category: DataCategory, y_name: str = None, train_source: str = '', test_source: str = '', validate_source: str = '', unsplit_source: str = '')

Creates a DataSet instance from file paths pointing to data sources.

This method initializes a DataSet with properties and data sources specified as file paths. It allows for the separation of data into training, testing, and validation segments, or the use of an unsplit data source.

Parameters: df_id (str): Unique identifier for the dataset. x_col_names (List[str]): List of column names to be used as features. delimiter (str): Delimiter used in the data files. name (str): Human-readable name for the dataset. data_category (str): Category or type of the data. y_name (str): Name of the column to be used as the target variable. If y_name is None, dataset will be considered as unlabelled. storage_type (str, optional): Type of storage to be used, defaults to S3. train_source (str, optional): File path for the training data. test_source (str, optional): File path for the testing data. validate_source (str, optional): File path for the validation data. unsplit_source (str, optional): File path for unsplit data. If provided, train_source, test_source, and validate_source should be empty strings.

Returns: DataSet: An instance of DataSet configured with the provided file paths as data sources.

Raises: IOError: If both unsplit_source and any of the split sources (train, test, validate) are provided.

def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A

def get_by_id(dataset_id: str) ‑> DataSet

Get dataset object by dataset id of the dataset registered with Markov

def get_by_name(dataset_name: str) ‑> DataSet

Get dataset object by name of the dataset registered with Markov

def get_datasets()

Fetch all datasets from the logged in workspace

def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Instance variables

prop data_family : DataFamily: Returns: The datafamily this dataset belongs to
prop delimiter : str: Returns: Delimiter for this dataset that was used while registering the dataset
prop description : str
prop df_id: Data Family id that uniquely identifies the data family with MarkovML backend
prop features : List[str]: Returns the list of features chosen by the user while registering the dataset
prop get_analysis_status: Check if the data set is analyzed Returns:
prop name: Returns: Name assigned to this dataset during registration
prop quality : DatasetQuality: Returns: The dataquality object containing details about the dataquality of this dataset and how to get the quality score for each record of the dataset
prop segments : List[str]: Returns: The available segments from this dataset
prop storage : str
prop target : str: Returns the target column name chosen by the user while registering the dataset
prop test : DataSegment: Returns: The test segment of this dataset if available otherwise None
prop train : DataSegment: Returns: The train segment of this dataset if available otherwise None
prop unknown : DataSegment: Returns: The unknown segment of this dataset if available otherwise None
prop unsplit : DataSegment: Returns: The unknown segment of this dataset if available otherwise None
prop validate : DataSegment: Returns: The validate segment of this dataset if available otherwise None
prop x_col_names : List[str]: Returns: Name of the feature columns for this dataset
prop x_indexes : List[int]
prop y_col_index : int: Returns: target index for this dataset
prop y_col_name : str: Returns: Name of the target column for this dataset

Methods

def compare(self, compare_input, force: bool = False) ‑> DataSetComparisonResult

Trigger comparison of this dataset with another dataset

Args

compare_input ():Generic compare_input containing args for triggering dataset comparison. The input
is verified by the non_overloaded method below and processed
force : bool: Bypass the cached past comparison if any and continue with the

Returns

DataSetComparisonResult

def create_vector_store(self, timeout=None)

Create a vector store for this dataset which will help us perform actions like similarity search etc.

Args

timeout: Timeout in seconds for the vector store creation

Returns: VectorStore Resource object

def data_category(self)

def download_as_csv(self)

Downloads all segments of the dataset as csv individually For example: ">>> ds.download_as_csv()"

Returns

File train.csv downloaded successfully File test.csv downloaded successfully

def download_segment(self, segment_type: Union[SegmentType, str]) ‑> str

download dataset segment to your machine in the same folder this script is run for local processing For example: SEGMENT_TYPE = TRAIN|TEST|VALIDATE|UNSPLIT ">>> dataset.download_segment(segment_type="SEGMENT_TYPE")" File filename.csv downloaded successfully

def get_credentials(self) ‑> GenericCredential

Return the original credentials registered with MarkovML for reading this dataset

Returns

GenericCredential that contains the credentials registered

def get_dict(self) ‑> Dict

def get_json(self) ‑> str

def get_preview(self) ‑> DatasetPreviewData

Get dataset preview. This returns a sample from the registered dataset on CLI terminal

>>> dataset.get_preview()

def get_segment(self, seg_type: SegmentType) ‑> DataSegment

Returns the DataSegment of type seg_type for this Dataset

Args

seg_type : SegmentType: Segment types are (Train/Test/Validate/ Unknown (un split data).

Returns

DataSegment if it is available for the given dataset, otherwise None is returned

def get_train_test_split(self, **kwargs) ‑> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]

This is a wrapper over sklearn.model_selection.train_test_split https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html All the arguments are passed on to the downstream wrapped method in sklearn

Args

**kwargs : all kwargs that are applicable for

Returns

List containing train, test split

def get_url(self) ‑> str

Gives the url of the dataset based on environment For example: ">>> ds.get_url()" "{base_url}/{workspace_id}/datasets/{dataset_id}"

def register(self, cred: Union[str, GenericCredential], analyze: bool = True)

Args

cred(GenericCredential): contains the credentials to read this dataset from the cloud storage analyze(bool): set to true if you want to analyze the dataset on successful registration.

Returns

DatasetRegistration Response containing the results of the registration

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]

def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

def update_datafamily(self, updated_df_id: str)

Update the datafamily for this dataset.

Args

updated_df_id : str: unique id of the new datafamily to assign this dataset to

Returns

None

def upload(self)

Validates that data source are from same type [Dataframe, S3, or Filepath] Uploads the datasource to corresponding s3 storage (hybrid deployment) and validates if the dataset is valid. Finally, registers the dataset.

def view_details(self)

Opens up the dataset details page of this dataset in the browser. For example: ">>> ds.view_details()"

class DataSetCompareRequest (primary_id: str, compare_ids: List[str], notes: str = '', force: bool = False)

DataSetCompareRequest(primary_id: 'str', compare_ids: 'List[str]', notes: 'str' = '', force: 'bool' = False)

Class variables

var compare_ids : List[str]
var force : bool
var notes : str
var primary_id : str

Static methods

def create_from_dict(value: Dict) ‑> DataSetCompareRequest
def create_from_json(value: str) ‑> DataSetCompareRequest
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DataSetComparisonResult (msg: str = '')

DataSetComparisonResult(msg: 'str' = '')

Class variables

var msg : str

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DataSetProperties (name: str, data_category: Optional[str], delimiter: str, df_id: str, storage_type: str, notes: str = '', x_indexes: Optional[List] = <factory>, y_index: int = -1, x_col_names: List[str] = <factory>, y_name: str = '', storage_format: str = StorageFormatType.CSV, info: dict = <factory>, source: str = '')

DataSetProperties(name: 'str', data_category: 'Optional[str]', delimiter: 'str', df_id: 'str', storage_type: 'str', notes: 'str' = '', x_indexes: 'Optional[List]' = , y_index: 'int' = -1, x_col_names: 'List[str]' = , y_name: 'str' = '', storage_format: 'str' = , info: 'dict' = , source: 'str' = '')

Class variables

var data_category : Optional[str]
var delimiter : str
var df_id : str
var info : dict
var name : str
var notes : str
var source : str
var storage_format : str
var storage_type : str
var x_col_names : List[str]
var x_indexes : Optional[List]
var y_index : int
var y_name : str

Static methods

def create_dataset_properties_from_response(response: Dict)
def create_from_dict(json_dict: dict) ‑> DataSetProperties
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DataSetRegistrationRequest (data_set: DataSet, credential: GenericCredential, should_analyze: bool = True, upload_mode: WorkflowMode = WorkflowMode.EXTERNAL_CLOUD_STORAGE)

DataSetRegistrationRequest(data_set: 'DataSet', credential: 'GenericCredential', should_analyze: 'bool' = True, upload_mode: 'WorkflowMode' = )

Class variables

var credential : GenericCredential
var data_set : DataSet
var should_analyze : bool
var upload_mode : WorkflowMode

Static methods

def create_from_dict(value: str) ‑> DataSetRegistrationRequest
def create_from_json(value: str) ‑> DataSetRegistrationRequest
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DataSetRegistrationResponse (ds_id: str, run_details: Optional[Dict], return_code: Optional[str], create_time: str, message: str)

DataSetRegistrationResponse(ds_id: 'str', run_details: 'Optional[Dict]', return_code: 'Optional[str]', create_time: 'str', message: 'str')

Class variables

var create_time : str
var ds_id : str
var message : str
var return_code : Optional[str]
var run_details : Optional[Dict]

Static methods

def create_from_dict(value: Dict) ‑> DataSetRegistrationResponse
def create_from_json(value: str) ‑> DataSetRegistrationResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> str
def get_json(self) ‑> str
def is_being_analyzed(self) ‑> bool
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DatasetDataFamilyUpdate (ds_id: str, updated_df_id: str)

DatasetDataFamilyUpdate(ds_id: 'str', updated_df_id: 'str')

Class variables

var ds_id : str
var updated_df_id : str

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DatasetPreviewData (segments: List[SegmentType], preview: Dict[SegmentType, FilePreview], delimiter: Optional[str])

DatasetPreviewData(segments: 'List[SegmentType]', preview: 'Dict[SegmentType, FilePreview]', delimiter: 'Optional[str]')

Class variables

var delimiter : Optional[str]
var preview : Dict[SegmentType, FilePreview]
var segments : List[SegmentType]

Static methods

def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DatasetQuality (url: str, storage_format: StorageFormat = 'csv')

DatasetQuality(url: 'str', storage_format: 'StorageFormat' = 'csv', _df: 'Optional[pandas.DataFrame]' = None)

Class variables

var storage_format : StorageFormat
var url : str

Static methods

def create_from_dict(value: Dict) ‑> DatasetQuality
def create_from_json(value: str) ‑> DatasetQuality
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Instance variables

prop df : pandas.DataFrame

Pandas DataFrame that contains the record-wise quality scores of the dataset.

If the DataFrame has not been loaded yet, it will be loaded from the URL specified by the url attribute.

Raises

IOError: If the url attribute is not set.

Returns

pandas.DataFrame: A DataFrame with the record-wise quality scores of the dataset.

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DatasetQualityResponse (return_code: str = 'COMMAND_FAILED', message: Optional[str] = '', dataset_quality: Optional[DatasetQuality] = None)

DatasetQualityResponse(return_code: 'str' = 'COMMAND_FAILED', message: 'Optional[str]' = '', dataset_quality: 'Optional[DatasetQuality]' = None)

Ancestors

BaseApiResponse

Class variables

var dataset_quality : Optional[DatasetQuality]

Static methods

def create_from_dict(value: Dict) ‑> DatasetQuality
def create_from_json(value: str) ‑> DatasetQuality
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DatasetValidationResponse (return_code: Optional[str], message: Optional[str] = '')

DatasetValidationResponse(return_code: 'Optional[str]', message: 'Optional[str]' = '')

Class variables

var message : Optional[str]
var return_code : Optional[str]

Static methods

def create_from_dict(value: Dict) ‑> DatasetValidationResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class DownloadDatasetResponse (response: DownloadDatasetInfo, return_code: ReturnCode, message: str)

DownloadDatasetResponse(response: 'DownloadDatasetInfo', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : DownloadDatasetInfo
var return_code : ReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> DownloadDatasetResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class FilePreview (*args, **kwargs)

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Ancestors

builtins.dict

Class variables

var data : List[str]
var metadata : FilePreviewMetaData

class FilePreviewMetaData (*args, **kwargs)

Ancestors

builtins.dict

Class variables

var line_separator : str

class GetDatasetInfoResponse (response: dict, return_code: ReturnCode, message: str)

GetDatasetInfoResponse(response: 'dict', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : dict
var return_code : ReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> GetDatasetInfoResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class GetDatasetPreviewResponse (response: Dict, return_code: ReturnCode, message: str)

GetDatasetPreviewResponse(response: 'Dict', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : Dict
var return_code : ReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> GetDatasetPreviewResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class GetDatasetsResponse (response: List[Dict], return_code: ReturnCode, message: str)

GetDatasetsResponse(response: 'List[Dict]', return_code: 'ReturnCode', message: 'str')

Class variables

var message : str
var response : List[Dict]
var return_code : ReturnCode

Static methods

def create_from_dict(dict_value: Dict) ‑> GetDatasetsResponse
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class RunDetails (run_id: str, run_tasks: List[str], run_type: str, notes: str, create_time: str)

RunDetails(run_id: 'str', run_tasks: 'List[str]', run_type: 'str', notes: 'str', create_time: 'str')

Class variables

var create_time : str
var notes : str
var run_id : str
var run_tasks : List[str]
var run_type : str

Static methods

def create_from_dict(dict_value: Dict) ‑> RunDetails
def create_from_json(json_str: str) ‑> RunDetails
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]

Methods

def get_dict(self) ‑> Dict
def get_json(self) ‑> str
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) ‑> str

class StorageFormat (...)

Supported Data Storage Formats

Ancestors

builtins.str

Class variables

var CSV : Final
var PARQUET : Final