Source code for pyspark_pipeline_framework.core.config.loader
"""HOCON configuration loader using dataconf.
This module provides functions for loading configuration from HOCON files,
strings, and environment variables using dataconf.
"""
from typing import TypeVar, cast
import dataconf
T = TypeVar("T")
[docs]
def load_from_file(path: str, config_class: type[T]) -> T:
"""Load configuration from a HOCON file.
Args:
path: Path to the HOCON configuration file
config_class: The configuration dataclass type to load into
Returns:
Instance of config_class populated with configuration from the file
Example:
>>> config = load_from_file("pipeline.conf", PipelineConfig)
"""
return cast(T, dataconf.file(path, config_class))
[docs]
def load_from_string(hocon_str: str, config_class: type[T]) -> T:
"""Load configuration from a HOCON string.
Args:
hocon_str: HOCON configuration as a string
config_class: The configuration dataclass type to load into
Returns:
Instance of config_class populated with configuration from the string
Example:
>>> hocon = '''
... {
... name: "my-pipeline"
... version: "1.0.0"
... }
... '''
>>> config = load_from_string(hocon, PipelineConfig)
"""
return cast(T, dataconf.string(hocon_str, config_class))
[docs]
def load_from_env(prefix: str, config_class: type[T]) -> T:
"""Load configuration from environment variables.
Args:
prefix: Prefix for environment variables (e.g., ``PPF_``)
config_class: The configuration dataclass type to load into
Returns:
Instance of config_class populated with configuration from env vars
Example:
>>> # With PPF_NAME=my-pipeline PPF_VERSION=1.0.0
>>> config = load_from_env("PPF_", PipelineConfig)
Note:
Environment variables should use the format: PREFIX_FIELD_NAME=value
Nested fields use underscores: PREFIX_SPARK_APP_NAME=my-app
"""
return cast(T, dataconf.env(prefix, config_class))