Skip to main content

easyfabric.data.tableconfig

logging

re

dataclass

field

Any

Optional

BooleanType

DateType

DoubleType

FloatType

IntegerType

LongType

StringType

StructField

StructType

TimestampType

ConfigManager

get_config

initialize_config

DataClassFromDictMixin

check_format

load_yaml

read_file_as_string

yaml_to_json

to_snake_case

def to_snake_case(string: str) -> str

Connection Objects

@dataclass
class Connection(DataClassFromDictMixin)

Data source connection details.

Defines how to connect to and read from an external data source, including file type, delimiter, credentials, and load ordering.

connectionname

connectionprefix

connectiontype

bronzefolder

bronzekeyviolationaction

ignorenullkey

keyvaultsecretconnectionstring

container

sourcefolder

filetype

processtype

fileextension

isbronzestringtypes

istriggerable

delimiter

bronzenotebookconcurrency

silvernotebookconcurrency

bronzeloadorder

silverloadorder

bronzeloadskip

multiline

inferschema

mode

escape

maxfileagehours

quote

quotemode

dateformat

charset

jsonpath

notebooktimeout

bronzecleanupactive

bronzemaxretentionhours

json_to_spark_path

def json_to_spark_path()

from_yaml_file

@classmethod
def from_yaml_file(cls, file_path: str)

Create a Connection instance from a YAML file.

from_yaml_files

@classmethod
def from_yaml_files(cls, file_paths: list[str])

Create a list of TableConfig instances from multiple YAML files.

NotebookConfig Objects

@dataclass
class NotebookConfig(DataClassFromDictMixin)

Configuration for pre/post notebook execution hooks.

Specifies a notebook to run at a particular stage of the load pipeline, along with an optional timeout and up to three parameters.

notebook

timeout

param001

param002

param003

Trigger Objects

@dataclass
class Trigger(DataClassFromDictMixin)

File trigger configuration for automatic loading.

Defines the notebook to execute and optional webhook to notify when a file event triggers a table load.

notebook

notifywebhookurl

notifywebhooktype

HistorySettings Objects

@dataclass
class HistorySettings(DataClassFromDictMixin)

SCD Type 2 history tracking configuration.

Controls how historical changes are tracked at the bronze and silver layers, including filter queries for deletes and source rows.

bronzedeletefilterquery

bronzeskipdelete

sourcefilterquery

silverfilterquery

is_silver

is_bronze

__post_init__

def __post_init__()

Column Objects

@dataclass
class Column(DataClassFromDictMixin)

Column definition with source mapping and data type.

Maps a source column to its destination name and data type, including optional silver-layer type conversion expressions and primary key / z-order flags.

sourcecolumn

sourcedatatype

destinationcolumn

sourceexpression

silverexpression

conversionexpression

dateformat

silverdatatype

silverconvertedcolumn

defaultvalue

dataplatformname

isnullable

isprimarykey

issilveronly

iszorder

isdeletedidentifier

__post_init__

def __post_init__()

TableConfig Objects

@dataclass
class TableConfig(DataClassFromDictMixin)

Per-table configuration loaded from a YAML definition file.

Defines the source connection, column mappings, load settings, and data quality rules for a single table in the data pipeline.

connection

dataplatformobjectname

sourcetable

sourcefilter

sourcefolder

sourceorder

bronzefolder

bronzekeyviolationaction

bronzeloadviolationaction

continue or stop

ignorenullkey

filetype

fileextension

sheetname

prebronzenotebook

midbronzenotebook

postbronzenotebook

presilvernotebook

midsilvernotebook

postsilvernotebook

bronzenotebook

silvernotebook

bronzemaxexpectedduration

silvermaxexpectedduration

bronzeloadorder

silverloadorder

silverloadtype

bronzeloadskip

keephistory

skipifsourceunchanged

keeponedayrecords

historysettings

isactive

columns

connectiontype

xmlrowpath

dateformat

connectioninfo

trigger

layers

meta_yamlfile

bronzecleanupactive

bronzemaxretentionhours

__post_init__

def __post_init__()

set_date_format_columns

def set_date_format_columns()

validate_layer

def validate_layer(layer) -> bool

Validates if the provided layer is supported for this table.

Arguments:

  • layer str - The layer to validate

Raises:

  • ValueError - If the layer is not supported for this table

get_object_name

def get_object_name()

Get the logical object name for this table.

Returns dataplatformobjectname if set, otherwise derives the name from sourcetable.

get_conn_config

def get_conn_config(config_manager: ConfigManager = None)

set_conn_config

def set_conn_config(config_manager: ConfigManager = None)

get_bronze_tablename

def get_bronze_tablename(config_manager: ConfigManager = None)

get_bronze_history_tablename

def get_bronze_history_tablename(config_manager: ConfigManager = None)

get_deleted_identifier_column

def get_deleted_identifier_column() -> Optional[Column]

has_deleted_column

def has_deleted_column() -> bool

get_silver_tablename

def get_silver_tablename(config_manager: ConfigManager = None)

get_history_settings

def get_history_settings(layer: str) -> HistorySettings | None

get_abfs_table_path

def get_abfs_table_path(config_manager: ConfigManager = None,
layer: str = "bronze",
history: bool = False) -> str

Get the full ABFS path for the table based on the layer. This includes the lakehouse and schema.

get_full_table_name

def get_full_table_name(config_manager: ConfigManager = None,
layer: str = "bronze",
history: bool = False) -> str

Get the full table name based on the layer. This includes the lakehouse and schema.

Examples:

History = false: Bronze.dbo.my_tablename Silver.dbo.my_tablename History = true: Bronze.his.my_tablename Silver.his.my_tablename

get_table_name

def get_table_name(config_manager: ConfigManager = None,
layer: str = "bronze") -> str

Get the table name based on the layer.

get_column_objects

def get_column_objects()

get_column_array

def get_column_array()

get_keycolumn_array

def get_keycolumn_array(
config_manager: ConfigManager = None) -> list[dict[str, Any]]

get_zorder_column_names

def get_zorder_column_names(config_manager: ConfigManager = None) -> list[str]

Returns list of ACTUAL column names (as they exist in the Delta table) that should be used in ZORDER BY, based on iszorder=True flag.

get_key_columns

def get_key_columns(
config_manager: ConfigManager = None
) -> tuple[list[str], dict[str, Any]]

get_bronzekeyviolationaction

def get_bronzekeyviolationaction()

get_ignorenullkey

def get_ignorenullkey() -> bool

get_structtype_schema

def get_structtype_schema() -> tuple[StructType, dict[str, Any]]

Convert column definitions to a PySpark StructType schema.

Returns:

A tuple of (StructType schema, dict mapping column names to their PySpark data types).

to_param_map_prebronze

def to_param_map_prebronze() -> dict

to_param_map_midbronze

def to_param_map_midbronze() -> dict

to_param_map_postbronze

def to_param_map_postbronze() -> dict

to_param_map_presilver

def to_param_map_presilver() -> dict

to_param_map_midsilver

def to_param_map_midsilver() -> dict

to_param_map_postsilver

def to_param_map_postsilver() -> dict

get_notebook_timeout

def get_notebook_timeout(nb_config: NotebookConfig = None) -> int

from_yaml

@classmethod
def from_yaml(cls, yaml_string: str)

Create a Model instance from a YAML string.

from_yaml_file

@classmethod
def from_yaml_file(cls, file_path: str)

Create a Model instance from a YAML file.

from_yaml_files

@classmethod
def from_yaml_files(cls, file_paths: list[str])

Create a list of TableConfig instances from multiple YAML files.