Skip to content

factories

carbon.node.factories ¤

Base class for compute nodes.

Classes¤

DummyNodeFactory(cpu_type, gpu_type, mem_type, node, component_powers) dataclass ¤

Bases: NodeFactory

Dummy factory for creating Node objects with hardcoded values.

Functions¤
create(node_labels) ¤

Create a dummy Node object.

Parameters:

Name Type Description Default
node_labels list[str]

The label of the node to create.

required

Returns:

Name Type Description
Node list[Node]

An instance of Node with dummy hardware and power info.

Source code in carbon/node/factories.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def create(self, node_labels: list[str]) -> list[Node]:
    """Create a dummy Node object.

    Args:
        node_labels (list[str]): The label of the node to create.

    Returns:
        Node: An instance of Node with dummy hardware and power info.
    """
    cpu_type = self.cpu_type
    per_core_power_watts = self.component_powers["cpus"][cpu_type][
        "per_core_power_watts"
    ]
    gpu_type = self.gpu_type
    if gpu_type:
        per_gpu_power_watts = self.component_powers["gpus"][gpu_type][
            "per_gpu_power_watts"
        ]
    else:
        per_gpu_power_watts = 0.0

    mem_type = self.mem_type
    per_gb_power_watts = self.component_powers["memory"][mem_type][
        "per_gb_power_watts"
    ]

    return [
        Node(
            name=node,
            cpu_type=cpu_type,
            gpu_type=gpu_type,
            mem_type=mem_type,
            per_core_power_watts=per_core_power_watts,
            per_gpu_power_watts=per_gpu_power_watts,
            per_gb_power_watts=per_gb_power_watts,
        )
        for node in node_labels
    ]
from_config(config, component_powers) classmethod ¤

Initialize the DummyNodeFactory with a config.

Parameters:

Name Type Description Default
config dict

Configuration dictionary (not used in dummy factory).

required
component_powers dict

Dictionary of power usages for components.

required
Source code in carbon/node/factories.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@classmethod
def from_config(  # type: ignore[explicit-any]
    cls,
    config: dict[str, Any],
    component_powers: ComponentPower,
) -> Self:
    """Initialize the DummyNodeFactory with a config.

    Args:
        config (dict): Configuration dictionary (not used in dummy factory).
        component_powers (dict): Dictionary of power usages for components.
    """
    validated_config = DummySchedulerConfig(**config)
    return cls(
        cpu_type=validated_config.cpu_type,
        gpu_type=validated_config.gpu_type,
        mem_type=validated_config.mem_type,
        node=validated_config.node,
        component_powers=component_powers,
    )

NodeFactory ¤

Bases: Protocol

Abstract base class for Node factories.

Functions¤
create(node_labels) abstractmethod ¤

Abstract method to create multiple Node objects.

Source code in carbon/node/factories.py
31
32
33
@abstractmethod
def create(self, node_labels: list[str]) -> list[Node]:
    """Abstract method to create multiple Node objects."""
from_config(config, component_powers) abstractmethod classmethod ¤

Initialize the NodeFactoryBase with a config.

Parameters:

Name Type Description Default
config dict

Configuration dictionary.

required
component_powers dict

Dictionary of power usages for components.

required
Source code in carbon/node/factories.py
17
18
19
20
21
22
23
24
25
26
27
28
29
@classmethod
@abstractmethod
def from_config(  # type: ignore[explicit-any]
    cls,
    config: dict[str, Any],
    component_powers: ComponentPower,
) -> Self:
    """Initialize the NodeFactoryBase with a config.

    Args:
        config (dict): Configuration dictionary.
        component_powers (dict): Dictionary of power usages for components.
    """

PBSNodeFactory(component_powers) dataclass ¤

Bases: NodeFactory

Factory for creating Node objects by querying PBS.

Functions¤
create(node_labels) ¤

Create a Node object by fetching info from PBS and cluster config.

Parameters:

Name Type Description Default
node_labels list[str]

The labels of the nodes to query.

required

Returns:

Type Description
list[Node]

list[Node]: A list of Node instances with hardware and power info.

Source code in carbon/node/factories.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def create(self, node_labels: list[str]) -> list[Node]:
    """Create a Node object by fetching info from PBS and cluster config.

    Args:
        node_labels (list[str]): The labels of the nodes to query.

    Returns:
        list[Node]: A list of Node instances with hardware and power info.
    """
    # list of object ids passed to qmgr should be comma-seperated
    cmd = 'qmgr -c "list node ' + ",".join(node_labels) + '"'
    result = subprocess.run(
        cmd, shell=True, timeout=20, capture_output=True, text=True, check=True
    )

    node_list = []

    node_info_list = [lines for lines in result.stdout.split("\n\n") if lines]
    for node_info in node_info_list:
        node_label: str = ""
        cpu_type: str = ""
        gpu_type: str | None = None
        mem_type: str = "common"  # Memory hardcoded to common type
        for line in node_info.splitlines():
            if line.lstrip().startswith("resources_available"):
                if line.lstrip().startswith("resources_available.host"):
                    node_label = line.split("=")[-1].strip()
                if line.lstrip().startswith("resources_available.cpu_type"):
                    cpu_type = line.split("=")[-1].strip()
                if line.lstrip().startswith("resources_available.gpu_type"):
                    val = line.split("=")[-1].strip()
                    gpu_type = val if val != "None" else None

        # Look up power usage for cpu/gpu/memory
        try:
            per_core_power_watts = self.component_powers["cpus"][cpu_type][
                "per_core_power_watts"
            ]
        except KeyError:
            raise ValueError(f"CPU type '{cpu_type}' not found in cluster config.")

        if gpu_type:
            try:
                per_gpu_power_watts = self.component_powers["gpus"][gpu_type][
                    "per_gpu_power_watts"
                ]
            except KeyError:
                raise ValueError(
                    f"GPU type '{gpu_type}' not found in cluster config."
                )
        else:
            per_gpu_power_watts = 0.0

        try:
            per_gb_power_watts = self.component_powers["memory"][mem_type][
                "per_gb_power_watts"
            ]
        except KeyError:
            raise ValueError(
                f"Memory type '{mem_type}' not found in cluster config."
            )

        if cpu_type is None or cpu_type == "":
            raise ValueError(f"Could not determine cpu_type for node {node_label}")

        node_list.append(
            Node(
                name=node_label,
                cpu_type=cpu_type,
                gpu_type=gpu_type,
                mem_type=mem_type,
                per_core_power_watts=per_core_power_watts,
                per_gpu_power_watts=per_gpu_power_watts,
                per_gb_power_watts=per_gb_power_watts,
            )
        )
    return node_list
from_config(config, component_powers) classmethod ¤

Initialize the DummyNodeFactory with a config.

Parameters:

Name Type Description Default
config dict

Configuration dictionary (not used in dummy factory).

required
component_powers dict

Dictionary of power usages for components.

required
Source code in carbon/node/factories.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@classmethod
def from_config(  # type: ignore[explicit-any]
    cls,
    config: dict[str, Any],
    component_powers: ComponentPower,
) -> Self:
    """Initialize the DummyNodeFactory with a config.

    Args:
        config (dict): Configuration dictionary (not used in dummy factory).
        component_powers (dict): Dictionary of power usages for components.
    """
    # no use for the config here currently but this could be used to pass in site
    # specific configuration in the future
    return cls(component_powers=component_powers)