Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions docs/content/en/latest/pipelines/ldm_extension/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ The custom dataset represents a new dataset appended to the child LDM. It is def
| dataset_reference_source_column_data_type | [ColumnDataType](#columndatatype) | Column data type. |
| workspace_data_filter_id | string | ID of the workspace data filter to use. |
| workspace_data_filter_column_name | string | Name of the column in custom dataset used for filtering. |
| dataset_description | string \| None | Optional declarative description on the custom dataset. |
| dataset_tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |

#### Validity constraints

Expand All @@ -63,6 +65,8 @@ The custom fields define the individual fields in the custom datasets defined ab
| custom_field_type | [CustomFieldType](#customfieldtype) | Indicates whether the field represents an attribute, a date, or a fact. |
| custom_field_source_column | string | Name of the column in the physical data model. |
| custom_field_source_column_data_type | [ColumnDataType](#columndatatype) | Data type of the field. |
| description | string \| None | Optional declarative description on the attribute, fact, or date dataset. |
| tags | string[] \| None | Optional tag list; when omitted, defaults to a single tag derived from the dataset display name. |

#### Validity constraints

Expand Down Expand Up @@ -128,6 +132,25 @@ ldm_extension_manager.process(

```

### Merging into an existing child workspace LDM

By default, `process` **replaces** the child workspace LDM with the declarative fragment built from your inputs. Any prior custom datasets or date instances that aren't in the current call are lost.

Set `merge_into_existing_ldm=True` to switch to an **append / update** behaviour: `process` loads the current workspace LDM first, replaces any dataset or date instance whose `id` matches one in your input, and keeps the rest of the model as is (including previously uploaded custom extensions).

Optional cleanup: when `remove_managed_datasets_missing_from_input=True` and `management_tag` is set, datasets that carry that tag but are **not** in the current `process` call are removed from the merged LDM before the upload. This lets tools such as BCA reliably delete their own obsolete custom datasets without touching anything else.

```python
ldm_extension_manager.process(
custom_datasets=custom_dataset_definitions,
custom_fields=custom_field_definitions,
check_relations=False,
merge_into_existing_ldm=True,
remove_managed_datasets_missing_from_input=True,
management_tag="bca_tooling_managed",
)
```

## Example

Here is a complete example of extending a child workspace's LDM:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
into objects defined in the GoodData Python SDK.
"""

import copy

from gooddata_sdk.catalog.identifier import (
CatalogDatasetWorkspaceDataFilterIdentifier,
CatalogGrainIdentifier,
Expand Down Expand Up @@ -36,11 +38,26 @@
from gooddata_pipelines.ldm_extension.models.custom_data_object import (
ColumnDataType,
CustomDataset,
CustomDatasetDefinition,
CustomFieldDefinition,
CustomFieldType,
)


def _effective_field_tags(
dataset_name: str, custom_field: CustomFieldDefinition
) -> list[str]:
if custom_field.tags is not None:
return list(custom_field.tags)
return [dataset_name]


def _effective_dataset_tags(definition: CustomDatasetDefinition) -> list[str]:
if definition.dataset_tags is not None:
return list(definition.dataset_tags)
return [definition.dataset_name]


class LdmExtensionDataProcessor:
"""Create GoodData LDM from validated custom datasets and fields."""

Expand Down Expand Up @@ -77,7 +94,8 @@ def _attribute_from_field(
source_column=custom_field.custom_field_source_column,
labels=[],
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
tags=[dataset_name],
description=custom_field.description,
tags=_effective_field_tags(dataset_name, custom_field),
)

@staticmethod
Expand All @@ -91,7 +109,8 @@ def _fact_from_field(
title=custom_field.custom_field_name,
source_column=custom_field.custom_field_source_column,
source_column_data_type=custom_field.custom_field_source_column_data_type.value,
tags=[dataset_name],
description=custom_field.description,
tags=_effective_field_tags(dataset_name, custom_field),
)

def _date_from_field(
Expand All @@ -109,7 +128,8 @@ def _date_from_field(
title_pattern="%titleBase - %granularityTitle",
),
granularities=self.DATE_GRANULARITIES,
tags=[dataset_name],
description=custom_field.description,
tags=_effective_field_tags(dataset_name, custom_field),
)

@staticmethod
Expand Down Expand Up @@ -258,7 +278,7 @@ def datasets_to_ldm(
),
]
+ date_references,
description=None,
description=dataset.definition.dataset_description,
attributes=attributes,
facts=facts,
data_source_table_id=dataset_source_table_id,
Expand All @@ -278,7 +298,7 @@ def datasets_to_ldm(
filter_column_data_type=ColumnDataType.STRING.value,
)
],
tags=[dataset.definition.dataset_name],
tags=_effective_dataset_tags(dataset.definition),
)
)

Expand All @@ -287,3 +307,60 @@ def datasets_to_ldm(
datasets=declarative_datasets, date_instances=date_instances
)
return CatalogDeclarativeModel(ldm=ldm)

def merge_custom_ldm_into_existing(
self,
existing: CatalogDeclarativeModel,
custom_datasets: dict[DatasetId, CustomDataset],
*,
remove_managed_datasets_missing_from_input: bool = False,
management_tag: str | None = None,
) -> CatalogDeclarativeModel:
"""Merge datasets produced from ``custom_datasets`` into an existing declarative LDM.

Custom datasets and date instances that share an ``id`` with the fragment replace
their previous definitions. When ``remove_managed_datasets_missing_from_input`` is
set, datasets that carry ``management_tag`` but are absent from the incoming
fragment are removed first (typical for tooling-owned extension datasets).

Any other pre-existing LDM objects (previously uploaded extensions whose ids
are not in the incoming fragment) are preserved unchanged.
"""
fragment = self.datasets_to_ldm(custom_datasets)
fragment_ldm = fragment.ldm or CatalogDeclarativeLdm(
datasets=[], date_instances=[]
)

result = copy.deepcopy(existing)
result_ldm = result.ldm or CatalogDeclarativeLdm(
datasets=[], date_instances=[]
)
result.ldm = result_ldm

incoming_dataset_ids = {d.id for d in fragment_ldm.datasets}
incoming_date_ids = {d.id for d in fragment_ldm.date_instances}

datasets = list(result_ldm.datasets)
if remove_managed_datasets_missing_from_input and management_tag:
datasets = [
d
for d in datasets
if not (
d.tags
and management_tag in d.tags
and d.id not in incoming_dataset_ids
)
]
datasets = [d for d in datasets if d.id not in incoming_dataset_ids]
datasets.extend(fragment_ldm.datasets)
result_ldm.datasets = datasets

date_instances = [
d
for d in result_ldm.date_instances
if d.id not in incoming_date_ids
]
date_instances.extend(fragment_ldm.date_instances)
result_ldm.date_instances = date_instances

return result
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

from pathlib import Path

from gooddata_sdk.catalog.workspace.declarative_model.workspace.logical_model.ldm import (
CatalogDeclarativeModel,
)
from gooddata_sdk.sdk import GoodDataSdk
from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content

Expand Down Expand Up @@ -147,9 +150,35 @@ def _new_ldm_does_not_invalidate_relations(
# If the set of new invalid relations is a subset of the current one,
return set_new_invalid_relations.issubset(set_current_invalid_relations)

def _ldm_payload_for_workspace(
self,
workspace_id: str,
datasets: dict[DatasetId, CustomDataset],
*,
merge_into_existing_ldm: bool,
remove_managed_datasets_missing_from_input: bool,
management_tag: str | None,
) -> CatalogDeclarativeModel:
"""Build the declarative LDM payload to upload for one workspace."""
if not merge_into_existing_ldm:
return self._processor.datasets_to_ldm(datasets)
current = self._sdk.catalog_workspace_content.get_declarative_ldm(
workspace_id
)
return self._processor.merge_custom_ldm_into_existing(
current,
datasets,
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
management_tag=management_tag,
)

def _process_with_relations_check(
self,
validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]],
*,
merge_into_existing_ldm: bool = False,
remove_managed_datasets_missing_from_input: bool = False,
management_tag: str | None = None,
) -> None:
"""Check whether relations of analytical objects are valid before and after
updating the LDM in the GoodData workspace.
Expand All @@ -173,7 +202,13 @@ def _process_with_relations_check(
# Put the LDM with custom datasets into the GoodData workspace.
self._sdk.catalog_workspace_content.put_declarative_ldm(
workspace_id=workspace_id,
ldm=self._processor.datasets_to_ldm(datasets),
ldm=self._ldm_payload_for_workspace(
workspace_id,
datasets,
merge_into_existing_ldm=merge_into_existing_ldm,
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
management_tag=management_tag,
),
)

# Get a set of objects with invalid relations from the new workspace state
Expand Down Expand Up @@ -232,13 +267,23 @@ def _log_diff_invalid_relations(
def _process_without_relations_check(
self,
validated_data: dict[WorkspaceId, dict[DatasetId, CustomDataset]],
*,
merge_into_existing_ldm: bool = False,
remove_managed_datasets_missing_from_input: bool = False,
management_tag: str | None = None,
) -> None:
"""Update the LDM in the GoodData workspace without checking relations."""
for workspace_id, datasets in validated_data.items():
# Put the LDM with custom datasets into the GoodData workspace.
self._sdk.catalog_workspace_content.put_declarative_ldm(
workspace_id=workspace_id,
ldm=self._processor.datasets_to_ldm(datasets),
ldm=self._ldm_payload_for_workspace(
workspace_id,
datasets,
merge_into_existing_ldm=merge_into_existing_ldm,
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
management_tag=management_tag,
),
)
self._log_success_message(workspace_id)

Expand All @@ -251,6 +296,9 @@ def process(
custom_datasets: list[CustomDatasetDefinition],
custom_fields: list[CustomFieldDefinition],
check_relations: bool = True,
merge_into_existing_ldm: bool = False,
remove_managed_datasets_missing_from_input: bool = False,
management_tag: str | None = None,
) -> None:
"""Create custom datasets and fields in GoodData workspaces.

Expand All @@ -266,6 +314,14 @@ def process(
after updating the LDM. If the number of invalid relations increases,
the LDM will be reverted to its previous state. If False, the check
is skiped and the LDM is updated directly. Defaults to True.
merge_into_existing_ldm (bool): When True, load the workspace LDM first and merge
the generated custom datasets and date instances into it instead of uploading
only the extension fragment. Defaults to False for backward compatibility.
remove_managed_datasets_missing_from_input (bool): When ``merge_into_existing_ldm``
is True, remove existing datasets that contain ``management_tag`` but whose
dataset id is not present in this ``process`` call (tooling cleanup).
management_tag (str | None): Tag value used with
``remove_managed_datasets_missing_from_input``.

Raises:
ValueError: If there are validation errors in the dataset or field definitions.
Expand All @@ -278,6 +334,16 @@ def process(

if check_relations:
# Process the validated data with relations check.
self._process_with_relations_check(validated_data)
self._process_with_relations_check(
validated_data,
merge_into_existing_ldm=merge_into_existing_ldm,
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
management_tag=management_tag,
)
else:
self._process_without_relations_check(validated_data)
self._process_without_relations_check(
validated_data,
merge_into_existing_ldm=merge_into_existing_ldm,
remove_managed_datasets_missing_from_input=remove_managed_datasets_missing_from_input,
management_tag=management_tag,
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from enum import Enum

from pydantic import BaseModel, model_validator
from pydantic import BaseModel, Field, model_validator


class CustomFieldType(str, Enum):
Expand Down Expand Up @@ -42,6 +42,14 @@ class CustomFieldDefinition(BaseModel):
custom_field_type: CustomFieldType
custom_field_source_column: str
custom_field_source_column_data_type: ColumnDataType
description: str | None = Field(
default=None,
description="Declarative description on the attribute, fact, or date dataset.",
)
tags: list[str] | None = Field(
default=None,
description="If set, replaces the default tag list (dataset display name only).",
)

@model_validator(mode="after")
def check_ids_not_equal(self) -> "CustomFieldDefinition":
Expand All @@ -68,6 +76,14 @@ class CustomDatasetDefinition(BaseModel):
dataset_reference_source_column_data_type: ColumnDataType
workspace_data_filter_id: str
workspace_data_filter_column_name: str
dataset_description: str | None = Field(
default=None,
description="Declarative description on the custom dataset.",
)
dataset_tags: list[str] | None = Field(
default=None,
description="If set, replaces the default tag list (dataset display name only).",
)

@model_validator(mode="after")
def check_source(self) -> "CustomDatasetDefinition":
Expand Down
Loading
Loading