cover two use cases

This commit is contained in:
2026-01-30 10:25:31 +01:00
parent a899d43f8d
commit 08f03d2cc3

View File

@@ -9,7 +9,14 @@ import asyncio
import aiohttp import aiohttp
import urllib3 import urllib3
import yaml import yaml
from prometheus_client import Gauge, start_http_server, Summary, Counter, Histogram, Info from prometheus_client import (
Gauge,
start_http_server,
Summary,
Counter,
Histogram,
Info,
)
@dataclass @dataclass
@@ -78,7 +85,10 @@ amps_gauge = Gauge(
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"] "redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
) )
# set info metric # set info metric
system_info = Info("redfish_system_info", "System information (vendor, model, serial, etc.)", ["host"]) system_info = Info(
"redfish_system_info", "System information (vendor, model, serial, etc.)", ["host"]
)
@REQUEST_TIME.time() @REQUEST_TIME.time()
async def process_request(t): async def process_request(t):
@@ -205,6 +215,119 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
return None return None
def get_power_resource_info(
member_data: dict, host_fqdn: str
) -> tuple[str | None, str | None]:
"""Get the URL and type of Power resource (PowerSubsystem or Power)."""
# Try PowerSubsystem (new Redfish versions)
power_url = member_data.get("PowerSubsystem", {}).get("@odata.id")
if power_url:
return f"https://{host_fqdn}{power_url}", "PowerSubsystem"
# Try Power for older Redfish versions
power_url = member_data.get("Power", {}).get("@odata.id")
if power_url:
logging.warning(
"DEPRECATED: Host %s uses old Redfish API (Power instead of PowerSubsystem). "
"Consider updating the firmware for full compatibility.",
host_fqdn,
)
return f"https://{host_fqdn}{power_url}", "Power"
# Nothing found -> Error
logging.error("No Power or PowerSubsystem found for host %s", host_fqdn)
return None, None
def get_power_supplies_url(
power_data: dict, power_resource_type: str, host_fqdn: str
) -> str | None:
"""Get the URL for PowerSupplies based on the Power resource type."""
if power_resource_type == "PowerSubsystem":
# Bei PowerSubsystem: PowerSupplies ist ein separates Objekt
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if power_supplies_url:
return f"https://{host_fqdn}{power_supplies_url}"
elif power_resource_type == "Power":
# Bei Power: PowerSupplies ist direkt im Power-Objekt enthalten
if "PowerSupplies" in power_data:
return f"https://{host_fqdn}/redfish/v1/Chassis/1/Power"
logging.error("No PowerSupplies found in Power resource for host %s", host_fqdn)
return None
def get_power_supplies(
power_data: dict, power_resource_type: str, host_fqdn: str
) -> list[dict] | None:
"""Get PowerSupplies data based on the Power resource type."""
if power_resource_type == "PowerSubsystem":
# PowerSubsystem: PowerSupplies is a ressource with Members
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if not power_supplies_url:
logging.error("No PowerSupplies URL found for PowerSubsystem")
return None
return None # If none, then use the PowerSubsystem member url
elif power_resource_type == "Power":
# Power: PowerSupplies is an array!
return power_data.get("PowerSupplies", [])
logging.error("Unknown power resource type")
return None
async def process_power_supply(
session, host: HostConfig, psu_data: dict, power_resource_type: str
):
"""Extract metrics from PowerSupply"""
serial = psu_data.get("SerialNumber")
print("Debug Marco Lucarelli")
print(serial)
print("Debug Marco Lucarelli")
if power_resource_type == "PowerSubsystem":
# Newer Redfish API: Metrics are an own "Metrics" ressource
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
if not metrics_url:
logging.warning("No Metrics found for PowerSupply %s", psu_data.get("Id"))
return
metrics_url = f"https://{host.fqdn}{metrics_url}"
metrics_data = await fetch_with_retry(session, host, metrics_url)
if not metrics_data:
return
# Get metrics from Metrics ressource
line_input_v = metrics_data.get("InputVoltage", {}).get("Reading")
watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading")
amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading")
elif power_resource_type == "Power":
# Older Redfish API: Metrics are direct in PowerSupply as an array
line_input_v = psu_data.get("LineInputVoltage")
watts_input = psu_data.get("PowerInputWatts")
amps_input = psu_data.get("InputCurrentAmps")
else:
logging.error(
"Unknown power resource type for PowerSupply %s", psu_data.get("Id")
)
return
if amps_input is None and line_input_v and watts_input:
amps_input = round(watts_input / line_input_v, 2)
# Update Prometheus metrics
if line_input_v is not None:
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
if watts_input is not None:
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
if amps_input is not None:
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
async def get_power_data(session, host: HostConfig): async def get_power_data(session, host: HostConfig):
"""Query Redfish for power data and update Prometheus metrics""" """Query Redfish for power data and update Prometheus metrics"""
if host.should_skip(): if host.should_skip():
@@ -254,75 +377,61 @@ async def get_power_data(session, host: HostConfig):
if not member_data: if not member_data:
continue continue
# PowerSubsystem url # Get Power ressource (fallback to "Power")
power_subsystem_url = member_data.get("PowerSubsystem", {}).get("@odata.id") power_resource_url, power_resource_type = get_power_resource_info(
if not power_subsystem_url: member_data, host.fqdn
logging.warning("No PowerSubsystem found for %s", host.fqdn)
continue
# Get PowerSubsystem collection
power_subsystem_url = f"https://{host.fqdn}{power_subsystem_url}"
power_subsystem_data = await fetch_with_retry(
session, host, power_subsystem_url
) )
if not power_subsystem_data: if not power_resource_url:
logging.warning("No PowerSubsystem data found for %s", host.fqdn)
continue continue
# Get PowerSupplies url # Get Power Data
power_supplies_url = power_subsystem_data.get("PowerSupplies", {}).get( power_data = await fetch_with_retry(session, host, power_resource_url)
"@odata.id" if not power_data:
)
if not power_supplies_url:
logging.warning("No PowerSupplies found for %s", host.fqdn)
continue continue
# List PowerSupplies members # Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem")
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}" if power_resource_type == "PowerSubsystem":
power_supplies_data = await fetch_with_retry(session, host, power_supplies_url) # PowerSupplies-URL abfragen (für PowerSubsystem)
if not power_supplies_data: power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if not power_supplies_url:
logging.warning("No PowerSupplies found for %s", host.fqdn)
continue
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
power_supplies_data = await fetch_with_retry(
session, host, power_supplies_url
)
if not power_supplies_data:
continue
# loop over Members for "PowerSubsystem"
for psu_member in power_supplies_data.get("Members", []):
psu_url = psu_member.get("@odata.id")
if not psu_url:
continue
psu_url = f"https://{host.fqdn}{psu_url}"
psu_data = await fetch_with_retry(session, host, psu_url)
if not psu_data:
continue
# Process PowerSupplies object
await process_power_supply(session, host, psu_data, "PowerSubsystem")
elif power_resource_type == "Power":
# Loop over PowerSupplies for older Redfish versions
for psu in power_data.get("PowerSupplies", []):
# Process PowerSupplies object
await process_power_supply(session, host, psu, "Power")
else:
logging.error("Unknown power resource type for host %s", host.fqdn)
continue continue
# Loop over PowerSupply members # Measure request and process latency
for psu_member in power_supplies_data.get("Members", []):
psu_url = psu_member.get("@odata.id")
if not psu_url:
continue
psu_url = f"https://{host.fqdn}{psu_url}"
psu_data = await fetch_with_retry(session, host, psu_url)
if not psu_data:
continue
# Get Metrics URL
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
if not metrics_url:
logging.warning(
"No Metrics found for PowerSupply %s", psu_data.get("Id")
)
continue
metrics_url = f"https://{host.fqdn}{metrics_url}"
metrics_data = await fetch_with_retry(session, host, metrics_url)
if not metrics_data:
continue
# Get Metrics from data
line_input_v = metrics_data.get("InputVoltage", {}).get("Reading")
watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading")
amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading")
serial = psu_data.get("SerialNumber")
if line_input_v is not None:
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(
line_input_v
)
if watts_input is not None:
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
if amps_input is not None:
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start) REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
async def get_system_info(session, host: HostConfig): async def get_system_info(session, host: HostConfig):
"""Query Redfish for system data and update Prometheus metrics""" """Query Redfish for system data and update Prometheus metrics"""
if host.should_skip(): if host.should_skip():
@@ -339,7 +448,12 @@ async def get_system_info(session, host: HostConfig):
return return
redfish_version = root_data.get("RedfishVersion") redfish_version = root_data.get("RedfishVersion")
if not redfish_version:
print("Marco Lucarelli: INFO! redfish_version")
vendor = root_data.get("Vendor") vendor = root_data.get("Vendor")
if not vendor:
print("Marco Lucarelli: INFO! vendor")
# Get Manufacturer, Serial and Model # Get Manufacturer, Serial and Model
systems_url = f"https://{host.fqdn}/redfish/v1/Systems/" systems_url = f"https://{host.fqdn}/redfish/v1/Systems/"
@@ -354,7 +468,9 @@ async def get_system_info(session, host: HostConfig):
if not system_url: if not system_url:
continue continue
system_data = await fetch_with_retry(session, host, f"https://{host.fqdn}{system_url}") system_data = await fetch_with_retry(
session, host, f"https://{host.fqdn}{system_url}"
)
if not system_data: if not system_data:
continue continue
@@ -374,7 +490,6 @@ async def get_system_info(session, host: HostConfig):
) )
async def logout_host(session, host): async def logout_host(session, host):
"""Clean logout for Redfish with session tokens""" """Clean logout for Redfish with session tokens"""
if not host.session_token: if not host.session_token:
@@ -438,7 +553,7 @@ async def run_exporter(config, stop_event):
tasks = [] tasks = []
for hc in host_objs: for hc in host_objs:
tasks.append(get_power_data(session, hc)) tasks.append(get_power_data(session, hc))
tasks.append(get_system_info(session, hc)) # tasks.append(get_system_info(session, hc))
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
await process_request(interval) await process_request(interval)
finally: finally: