add dataclass PowerMetrics

This commit is contained in:
2026-01-30 15:11:01 +01:00
parent f3c4bc1953
commit 9bedf0c799

View File

@@ -392,34 +392,25 @@ async def get_power_data(session, host: HostConfig):
# Start time measurement
start = time.monotonic()
# Root ressource abfragen
# Get Root ressources
resources = await discover_redfish_resources(session, host)
if not resources:
if not resources or not resources.chassis:
logging.error("Could not discover any resources for %s", host.fqdn)
host.mark_failure()
UP_GAUGE.labels(host=host.fqdn).set(0)
return
chassis_url = resources.get("Chassis")
if not chassis_url:
logging.error("No valid Chassis URL found for %s", host.fqdn)
host.mark_failure()
UP_GAUGE.labels(host=host.fqdn).set(0)
return
# Mark host as up
host.mark_success()
UP_GAUGE.labels(host=host.fqdn).set(1)
# Get chassis ressource
chassis_url = f"https://{host.fqdn}{chassis_url}"
chassis_url = resources.get("Chassis")
chassis_data = await fetch_with_retry(session, host, chassis_url)
if not chassis_data:
host.mark_failure()
UP_GAUGE.labels(host=host.fqdn).set(0)
return
# loop over each member in chassis ressource
for chassis_member in chassis_data.get("Members", []):
chassis_member_url = chassis_member.get("@odata.id")
if not chassis_member_url:
@@ -427,7 +418,6 @@ async def get_power_data(session, host: HostConfig):
# Normalize URL... I needed this for realy old Redfish versions :S (<1.6.0)
chassis_member_url = normalize_url(chassis_member_url)
# Get chassis id from url ("/redfish/v1/Chassis/1" -> 1)
chassis_member_id = chassis_member_url.split("/")[-1]
# Check if the chassis id is in config (had problem with chassis "NVMe")
@@ -441,9 +431,7 @@ async def get_power_data(session, host: HostConfig):
continue
# Get Power ressource (fallback to "Power")
power_resource_url, power_resource_type = get_power_resource_info(
member_data, host.fqdn
)
power_resource_url, power_resource_type = get_power_resource_info(member_data, host.fqdn)
if not power_resource_url:
continue
@@ -454,16 +442,14 @@ async def get_power_data(session, host: HostConfig):
# Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem")
if power_resource_type == "PowerSubsystem":
# PowerSupplies-URL abfragen (für PowerSubsystem)
# Request PowerSupplies url (for PowerSubsystem)
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if not power_supplies_url:
logging.warning("No PowerSupplies found for %s", host.fqdn)
continue
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
power_supplies_data = await fetch_with_retry(
session, host, power_supplies_url
)
power_supplies_data = await fetch_with_retry(session, host, power_supplies_url)
if not power_supplies_data:
continue
@@ -479,13 +465,17 @@ async def get_power_data(session, host: HostConfig):
continue
# Process PowerSupplies object
await process_power_supply(session, host, psu_data, "PowerSubsystem")
metrics = await process_power_supply(session, host, psu_data, "PowerSubsystem")
if metrics:
update_prometheus_metrics(host, metrics)
elif power_resource_type == "Power":
# Loop over PowerSupplies for older Redfish versions
for psu in power_data.get("PowerSupplies", []):
# Process PowerSupplies object
await process_power_supply(session, host, psu, "Power")
metrics = await process_power_supply(session, host, psu, "Power")
if metrics:
update_prometheus_metrics(host, metrics)
else:
logging.error("Unknown power resource type for host %s", host.fqdn)
@@ -494,6 +484,15 @@ async def get_power_data(session, host: HostConfig):
# Measure request and process latency
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
def update_prometheus_metrics(host: HostConfig, metrics: PowerMetrics):
"""Update Prometheus metrics with PowerMetrics data."""
if metrics.voltage is not None and metrics.serial:
VOLTAGE_GAUGE.labels(host=host.fqdn, psu_serial=metrics.serial).set(metrics.voltage)
if metrics.watts is not None and metrics.serial:
WATTS_GAUGE.labels(host=host.fqdn, psu_serial=metrics.serial).set(metrics.watts)
if metrics.amps is not None and metrics.serial:
AMPS_GAUGE.labels(host=host.fqdn, psu_serial=metrics.serial).set(metrics.amps)
async def get_system_info(session, host: HostConfig):
"""Query Redfish for system data and update Prometheus metrics"""