diff --git a/python/redfish-api/redfish_exporter_v9000.py b/python/redfish-api/redfish_exporter_v9000.py index 457394b..d4bca77 100644 --- a/python/redfish-api/redfish_exporter_v9000.py +++ b/python/redfish-api/redfish_exporter_v9000.py @@ -9,7 +9,14 @@ import asyncio import aiohttp import urllib3 import yaml -from prometheus_client import Gauge, start_http_server, Summary, Counter, Histogram, Info +from prometheus_client import ( + Gauge, + start_http_server, + Summary, + Counter, + Histogram, + Info, +) @dataclass @@ -78,7 +85,10 @@ amps_gauge = Gauge( "redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"] ) # set info metric -system_info = Info("redfish_system_info", "System information (vendor, model, serial, etc.)", ["host"]) +system_info = Info( + "redfish_system_info", "System information (vendor, model, serial, etc.)", ["host"] +) + @REQUEST_TIME.time() async def process_request(t): @@ -205,6 +215,119 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None: return None +def get_power_resource_info( + member_data: dict, host_fqdn: str +) -> tuple[str | None, str | None]: + """Get the URL and type of Power resource (PowerSubsystem or Power).""" + # Try PowerSubsystem (new Redfish versions) + power_url = member_data.get("PowerSubsystem", {}).get("@odata.id") + if power_url: + return f"https://{host_fqdn}{power_url}", "PowerSubsystem" + + # Try Power for older Redfish versions + power_url = member_data.get("Power", {}).get("@odata.id") + if power_url: + logging.warning( + "DEPRECATED: Host %s uses old Redfish API (Power instead of PowerSubsystem). " + "Consider updating the firmware for full compatibility.", + host_fqdn, + ) + return f"https://{host_fqdn}{power_url}", "Power" + + # Nothing found -> Error + logging.error("No Power or PowerSubsystem found for host %s", host_fqdn) + return None, None + + +def get_power_supplies_url( + power_data: dict, power_resource_type: str, host_fqdn: str +) -> str | None: + """Get the URL for PowerSupplies based on the Power resource type.""" + if power_resource_type == "PowerSubsystem": + # Bei PowerSubsystem: PowerSupplies ist ein separates Objekt + power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id") + if power_supplies_url: + return f"https://{host_fqdn}{power_supplies_url}" + + elif power_resource_type == "Power": + # Bei Power: PowerSupplies ist direkt im Power-Objekt enthalten + if "PowerSupplies" in power_data: + return f"https://{host_fqdn}/redfish/v1/Chassis/1/Power" + + logging.error("No PowerSupplies found in Power resource for host %s", host_fqdn) + return None + + +def get_power_supplies( + power_data: dict, power_resource_type: str, host_fqdn: str +) -> list[dict] | None: + """Get PowerSupplies data based on the Power resource type.""" + if power_resource_type == "PowerSubsystem": + # PowerSubsystem: PowerSupplies is a ressource with Members + power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id") + if not power_supplies_url: + logging.error("No PowerSupplies URL found for PowerSubsystem") + return None + return None # If none, then use the PowerSubsystem member url + + elif power_resource_type == "Power": + # Power: PowerSupplies is an array! + return power_data.get("PowerSupplies", []) + + logging.error("Unknown power resource type") + return None + + +async def process_power_supply( + session, host: HostConfig, psu_data: dict, power_resource_type: str +): + """Extract metrics from PowerSupply""" + serial = psu_data.get("SerialNumber") + print("Debug Marco Lucarelli") + print(serial) + print("Debug Marco Lucarelli") + + if power_resource_type == "PowerSubsystem": + # Newer Redfish API: Metrics are an own "Metrics" ressource + metrics_url = psu_data.get("Metrics", {}).get("@odata.id") + if not metrics_url: + logging.warning("No Metrics found for PowerSupply %s", psu_data.get("Id")) + return + + metrics_url = f"https://{host.fqdn}{metrics_url}" + metrics_data = await fetch_with_retry(session, host, metrics_url) + if not metrics_data: + return + + # Get metrics from Metrics ressource + line_input_v = metrics_data.get("InputVoltage", {}).get("Reading") + watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading") + amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading") + + elif power_resource_type == "Power": + # Older Redfish API: Metrics are direct in PowerSupply as an array + line_input_v = psu_data.get("LineInputVoltage") + watts_input = psu_data.get("PowerInputWatts") + amps_input = psu_data.get("InputCurrentAmps") + + else: + logging.error( + "Unknown power resource type for PowerSupply %s", psu_data.get("Id") + ) + return + + if amps_input is None and line_input_v and watts_input: + amps_input = round(watts_input / line_input_v, 2) + + # Update Prometheus metrics + if line_input_v is not None: + voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(line_input_v) + if watts_input is not None: + watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input) + if amps_input is not None: + amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input) + + async def get_power_data(session, host: HostConfig): """Query Redfish for power data and update Prometheus metrics""" if host.should_skip(): @@ -254,75 +377,61 @@ async def get_power_data(session, host: HostConfig): if not member_data: continue - # PowerSubsystem url - power_subsystem_url = member_data.get("PowerSubsystem", {}).get("@odata.id") - if not power_subsystem_url: - logging.warning("No PowerSubsystem found for %s", host.fqdn) - continue - - # Get PowerSubsystem collection - power_subsystem_url = f"https://{host.fqdn}{power_subsystem_url}" - power_subsystem_data = await fetch_with_retry( - session, host, power_subsystem_url + # Get Power ressource (fallback to "Power") + power_resource_url, power_resource_type = get_power_resource_info( + member_data, host.fqdn ) - if not power_subsystem_data: - logging.warning("No PowerSubsystem data found for %s", host.fqdn) + if not power_resource_url: continue - # Get PowerSupplies url - power_supplies_url = power_subsystem_data.get("PowerSupplies", {}).get( - "@odata.id" - ) - if not power_supplies_url: - logging.warning("No PowerSupplies found for %s", host.fqdn) + # Get Power Data + power_data = await fetch_with_retry(session, host, power_resource_url) + if not power_data: continue - # List PowerSupplies members - power_supplies_url = f"https://{host.fqdn}{power_supplies_url}" - power_supplies_data = await fetch_with_retry(session, host, power_supplies_url) - if not power_supplies_data: + # Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem") + if power_resource_type == "PowerSubsystem": + # PowerSupplies-URL abfragen (für PowerSubsystem) + power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id") + if not power_supplies_url: + logging.warning("No PowerSupplies found for %s", host.fqdn) + continue + + power_supplies_url = f"https://{host.fqdn}{power_supplies_url}" + power_supplies_data = await fetch_with_retry( + session, host, power_supplies_url + ) + if not power_supplies_data: + continue + + # loop over Members for "PowerSubsystem" + for psu_member in power_supplies_data.get("Members", []): + psu_url = psu_member.get("@odata.id") + if not psu_url: + continue + + psu_url = f"https://{host.fqdn}{psu_url}" + psu_data = await fetch_with_retry(session, host, psu_url) + if not psu_data: + continue + + # Process PowerSupplies object + await process_power_supply(session, host, psu_data, "PowerSubsystem") + + elif power_resource_type == "Power": + # Loop over PowerSupplies for older Redfish versions + for psu in power_data.get("PowerSupplies", []): + # Process PowerSupplies object + await process_power_supply(session, host, psu, "Power") + + else: + logging.error("Unknown power resource type for host %s", host.fqdn) continue - # Loop over PowerSupply members - for psu_member in power_supplies_data.get("Members", []): - psu_url = psu_member.get("@odata.id") - if not psu_url: - continue - - psu_url = f"https://{host.fqdn}{psu_url}" - psu_data = await fetch_with_retry(session, host, psu_url) - if not psu_data: - continue - - # Get Metrics URL - metrics_url = psu_data.get("Metrics", {}).get("@odata.id") - if not metrics_url: - logging.warning( - "No Metrics found for PowerSupply %s", psu_data.get("Id") - ) - continue - - metrics_url = f"https://{host.fqdn}{metrics_url}" - metrics_data = await fetch_with_retry(session, host, metrics_url) - if not metrics_data: - continue - - # Get Metrics from data - line_input_v = metrics_data.get("InputVoltage", {}).get("Reading") - watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading") - amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading") - serial = psu_data.get("SerialNumber") - if line_input_v is not None: - voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set( - line_input_v - ) - if watts_input is not None: - watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input) - if amps_input is not None: - amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input) - + # Measure request and process latency REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start) + async def get_system_info(session, host: HostConfig): """Query Redfish for system data and update Prometheus metrics""" if host.should_skip(): @@ -330,7 +439,7 @@ async def get_system_info(session, host: HostConfig): "Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time ) return - + # Get Vendor and Redfish Version root_url = f"https://{host.fqdn}/redfish/v1/" root_data = await fetch_with_retry(session, host, root_url) @@ -339,7 +448,12 @@ async def get_system_info(session, host: HostConfig): return redfish_version = root_data.get("RedfishVersion") + if not redfish_version: + print("Marco Lucarelli: INFO! redfish_version") + vendor = root_data.get("Vendor") + if not vendor: + print("Marco Lucarelli: INFO! vendor") # Get Manufacturer, Serial and Model systems_url = f"https://{host.fqdn}/redfish/v1/Systems/" @@ -354,7 +468,9 @@ async def get_system_info(session, host: HostConfig): if not system_url: continue - system_data = await fetch_with_retry(session, host, f"https://{host.fqdn}{system_url}") + system_data = await fetch_with_retry( + session, host, f"https://{host.fqdn}{system_url}" + ) if not system_data: continue @@ -374,7 +490,6 @@ async def get_system_info(session, host: HostConfig): ) - async def logout_host(session, host): """Clean logout for Redfish with session tokens""" if not host.session_token: @@ -438,7 +553,7 @@ async def run_exporter(config, stop_event): tasks = [] for hc in host_objs: tasks.append(get_power_data(session, hc)) - tasks.append(get_system_info(session, hc)) + # tasks.append(get_system_info(session, hc)) await asyncio.gather(*tasks) await process_request(interval) finally: