cover two use cases
This commit is contained in:
@@ -9,7 +9,14 @@ import asyncio
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import urllib3
|
import urllib3
|
||||||
import yaml
|
import yaml
|
||||||
from prometheus_client import Gauge, start_http_server, Summary, Counter, Histogram, Info
|
from prometheus_client import (
|
||||||
|
Gauge,
|
||||||
|
start_http_server,
|
||||||
|
Summary,
|
||||||
|
Counter,
|
||||||
|
Histogram,
|
||||||
|
Info,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -78,7 +85,10 @@ amps_gauge = Gauge(
|
|||||||
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
|
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
|
||||||
)
|
)
|
||||||
# set info metric
|
# set info metric
|
||||||
system_info = Info("redfish_system_info", "System information (vendor, model, serial, etc.)", ["host"])
|
system_info = Info(
|
||||||
|
"redfish_system_info", "System information (vendor, model, serial, etc.)", ["host"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@REQUEST_TIME.time()
|
@REQUEST_TIME.time()
|
||||||
async def process_request(t):
|
async def process_request(t):
|
||||||
@@ -205,6 +215,119 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_power_resource_info(
|
||||||
|
member_data: dict, host_fqdn: str
|
||||||
|
) -> tuple[str | None, str | None]:
|
||||||
|
"""Get the URL and type of Power resource (PowerSubsystem or Power)."""
|
||||||
|
# Try PowerSubsystem (new Redfish versions)
|
||||||
|
power_url = member_data.get("PowerSubsystem", {}).get("@odata.id")
|
||||||
|
if power_url:
|
||||||
|
return f"https://{host_fqdn}{power_url}", "PowerSubsystem"
|
||||||
|
|
||||||
|
# Try Power for older Redfish versions
|
||||||
|
power_url = member_data.get("Power", {}).get("@odata.id")
|
||||||
|
if power_url:
|
||||||
|
logging.warning(
|
||||||
|
"DEPRECATED: Host %s uses old Redfish API (Power instead of PowerSubsystem). "
|
||||||
|
"Consider updating the firmware for full compatibility.",
|
||||||
|
host_fqdn,
|
||||||
|
)
|
||||||
|
return f"https://{host_fqdn}{power_url}", "Power"
|
||||||
|
|
||||||
|
# Nothing found -> Error
|
||||||
|
logging.error("No Power or PowerSubsystem found for host %s", host_fqdn)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def get_power_supplies_url(
|
||||||
|
power_data: dict, power_resource_type: str, host_fqdn: str
|
||||||
|
) -> str | None:
|
||||||
|
"""Get the URL for PowerSupplies based on the Power resource type."""
|
||||||
|
if power_resource_type == "PowerSubsystem":
|
||||||
|
# Bei PowerSubsystem: PowerSupplies ist ein separates Objekt
|
||||||
|
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
|
||||||
|
if power_supplies_url:
|
||||||
|
return f"https://{host_fqdn}{power_supplies_url}"
|
||||||
|
|
||||||
|
elif power_resource_type == "Power":
|
||||||
|
# Bei Power: PowerSupplies ist direkt im Power-Objekt enthalten
|
||||||
|
if "PowerSupplies" in power_data:
|
||||||
|
return f"https://{host_fqdn}/redfish/v1/Chassis/1/Power"
|
||||||
|
|
||||||
|
logging.error("No PowerSupplies found in Power resource for host %s", host_fqdn)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_power_supplies(
|
||||||
|
power_data: dict, power_resource_type: str, host_fqdn: str
|
||||||
|
) -> list[dict] | None:
|
||||||
|
"""Get PowerSupplies data based on the Power resource type."""
|
||||||
|
if power_resource_type == "PowerSubsystem":
|
||||||
|
# PowerSubsystem: PowerSupplies is a ressource with Members
|
||||||
|
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
|
||||||
|
if not power_supplies_url:
|
||||||
|
logging.error("No PowerSupplies URL found for PowerSubsystem")
|
||||||
|
return None
|
||||||
|
return None # If none, then use the PowerSubsystem member url
|
||||||
|
|
||||||
|
elif power_resource_type == "Power":
|
||||||
|
# Power: PowerSupplies is an array!
|
||||||
|
return power_data.get("PowerSupplies", [])
|
||||||
|
|
||||||
|
logging.error("Unknown power resource type")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def process_power_supply(
|
||||||
|
session, host: HostConfig, psu_data: dict, power_resource_type: str
|
||||||
|
):
|
||||||
|
"""Extract metrics from PowerSupply"""
|
||||||
|
serial = psu_data.get("SerialNumber")
|
||||||
|
print("Debug Marco Lucarelli")
|
||||||
|
print(serial)
|
||||||
|
print("Debug Marco Lucarelli")
|
||||||
|
|
||||||
|
if power_resource_type == "PowerSubsystem":
|
||||||
|
# Newer Redfish API: Metrics are an own "Metrics" ressource
|
||||||
|
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
|
||||||
|
if not metrics_url:
|
||||||
|
logging.warning("No Metrics found for PowerSupply %s", psu_data.get("Id"))
|
||||||
|
return
|
||||||
|
|
||||||
|
metrics_url = f"https://{host.fqdn}{metrics_url}"
|
||||||
|
metrics_data = await fetch_with_retry(session, host, metrics_url)
|
||||||
|
if not metrics_data:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get metrics from Metrics ressource
|
||||||
|
line_input_v = metrics_data.get("InputVoltage", {}).get("Reading")
|
||||||
|
watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading")
|
||||||
|
amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading")
|
||||||
|
|
||||||
|
elif power_resource_type == "Power":
|
||||||
|
# Older Redfish API: Metrics are direct in PowerSupply as an array
|
||||||
|
line_input_v = psu_data.get("LineInputVoltage")
|
||||||
|
watts_input = psu_data.get("PowerInputWatts")
|
||||||
|
amps_input = psu_data.get("InputCurrentAmps")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logging.error(
|
||||||
|
"Unknown power resource type for PowerSupply %s", psu_data.get("Id")
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if amps_input is None and line_input_v and watts_input:
|
||||||
|
amps_input = round(watts_input / line_input_v, 2)
|
||||||
|
|
||||||
|
# Update Prometheus metrics
|
||||||
|
if line_input_v is not None:
|
||||||
|
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
|
||||||
|
if watts_input is not None:
|
||||||
|
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
|
||||||
|
if amps_input is not None:
|
||||||
|
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
|
||||||
|
|
||||||
|
|
||||||
async def get_power_data(session, host: HostConfig):
|
async def get_power_data(session, host: HostConfig):
|
||||||
"""Query Redfish for power data and update Prometheus metrics"""
|
"""Query Redfish for power data and update Prometheus metrics"""
|
||||||
if host.should_skip():
|
if host.should_skip():
|
||||||
@@ -254,75 +377,61 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
if not member_data:
|
if not member_data:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# PowerSubsystem url
|
# Get Power ressource (fallback to "Power")
|
||||||
power_subsystem_url = member_data.get("PowerSubsystem", {}).get("@odata.id")
|
power_resource_url, power_resource_type = get_power_resource_info(
|
||||||
if not power_subsystem_url:
|
member_data, host.fqdn
|
||||||
logging.warning("No PowerSubsystem found for %s", host.fqdn)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get PowerSubsystem collection
|
|
||||||
power_subsystem_url = f"https://{host.fqdn}{power_subsystem_url}"
|
|
||||||
power_subsystem_data = await fetch_with_retry(
|
|
||||||
session, host, power_subsystem_url
|
|
||||||
)
|
)
|
||||||
if not power_subsystem_data:
|
if not power_resource_url:
|
||||||
logging.warning("No PowerSubsystem data found for %s", host.fqdn)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get PowerSupplies url
|
# Get Power Data
|
||||||
power_supplies_url = power_subsystem_data.get("PowerSupplies", {}).get(
|
power_data = await fetch_with_retry(session, host, power_resource_url)
|
||||||
"@odata.id"
|
if not power_data:
|
||||||
)
|
|
||||||
if not power_supplies_url:
|
|
||||||
logging.warning("No PowerSupplies found for %s", host.fqdn)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# List PowerSupplies members
|
# Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem")
|
||||||
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
|
if power_resource_type == "PowerSubsystem":
|
||||||
power_supplies_data = await fetch_with_retry(session, host, power_supplies_url)
|
# PowerSupplies-URL abfragen (für PowerSubsystem)
|
||||||
if not power_supplies_data:
|
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
|
||||||
|
if not power_supplies_url:
|
||||||
|
logging.warning("No PowerSupplies found for %s", host.fqdn)
|
||||||
|
continue
|
||||||
|
|
||||||
|
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
|
||||||
|
power_supplies_data = await fetch_with_retry(
|
||||||
|
session, host, power_supplies_url
|
||||||
|
)
|
||||||
|
if not power_supplies_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# loop over Members for "PowerSubsystem"
|
||||||
|
for psu_member in power_supplies_data.get("Members", []):
|
||||||
|
psu_url = psu_member.get("@odata.id")
|
||||||
|
if not psu_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
psu_url = f"https://{host.fqdn}{psu_url}"
|
||||||
|
psu_data = await fetch_with_retry(session, host, psu_url)
|
||||||
|
if not psu_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process PowerSupplies object
|
||||||
|
await process_power_supply(session, host, psu_data, "PowerSubsystem")
|
||||||
|
|
||||||
|
elif power_resource_type == "Power":
|
||||||
|
# Loop over PowerSupplies for older Redfish versions
|
||||||
|
for psu in power_data.get("PowerSupplies", []):
|
||||||
|
# Process PowerSupplies object
|
||||||
|
await process_power_supply(session, host, psu, "Power")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logging.error("Unknown power resource type for host %s", host.fqdn)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Loop over PowerSupply members
|
# Measure request and process latency
|
||||||
for psu_member in power_supplies_data.get("Members", []):
|
|
||||||
psu_url = psu_member.get("@odata.id")
|
|
||||||
if not psu_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
psu_url = f"https://{host.fqdn}{psu_url}"
|
|
||||||
psu_data = await fetch_with_retry(session, host, psu_url)
|
|
||||||
if not psu_data:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get Metrics URL
|
|
||||||
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
|
|
||||||
if not metrics_url:
|
|
||||||
logging.warning(
|
|
||||||
"No Metrics found for PowerSupply %s", psu_data.get("Id")
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
metrics_url = f"https://{host.fqdn}{metrics_url}"
|
|
||||||
metrics_data = await fetch_with_retry(session, host, metrics_url)
|
|
||||||
if not metrics_data:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get Metrics from data
|
|
||||||
line_input_v = metrics_data.get("InputVoltage", {}).get("Reading")
|
|
||||||
watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading")
|
|
||||||
amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading")
|
|
||||||
serial = psu_data.get("SerialNumber")
|
|
||||||
if line_input_v is not None:
|
|
||||||
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(
|
|
||||||
line_input_v
|
|
||||||
)
|
|
||||||
if watts_input is not None:
|
|
||||||
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
|
|
||||||
if amps_input is not None:
|
|
||||||
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
|
|
||||||
|
|
||||||
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
|
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
|
||||||
|
|
||||||
|
|
||||||
async def get_system_info(session, host: HostConfig):
|
async def get_system_info(session, host: HostConfig):
|
||||||
"""Query Redfish for system data and update Prometheus metrics"""
|
"""Query Redfish for system data and update Prometheus metrics"""
|
||||||
if host.should_skip():
|
if host.should_skip():
|
||||||
@@ -330,7 +439,7 @@ async def get_system_info(session, host: HostConfig):
|
|||||||
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
|
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get Vendor and Redfish Version
|
# Get Vendor and Redfish Version
|
||||||
root_url = f"https://{host.fqdn}/redfish/v1/"
|
root_url = f"https://{host.fqdn}/redfish/v1/"
|
||||||
root_data = await fetch_with_retry(session, host, root_url)
|
root_data = await fetch_with_retry(session, host, root_url)
|
||||||
@@ -339,7 +448,12 @@ async def get_system_info(session, host: HostConfig):
|
|||||||
return
|
return
|
||||||
|
|
||||||
redfish_version = root_data.get("RedfishVersion")
|
redfish_version = root_data.get("RedfishVersion")
|
||||||
|
if not redfish_version:
|
||||||
|
print("Marco Lucarelli: INFO! redfish_version")
|
||||||
|
|
||||||
vendor = root_data.get("Vendor")
|
vendor = root_data.get("Vendor")
|
||||||
|
if not vendor:
|
||||||
|
print("Marco Lucarelli: INFO! vendor")
|
||||||
|
|
||||||
# Get Manufacturer, Serial and Model
|
# Get Manufacturer, Serial and Model
|
||||||
systems_url = f"https://{host.fqdn}/redfish/v1/Systems/"
|
systems_url = f"https://{host.fqdn}/redfish/v1/Systems/"
|
||||||
@@ -354,7 +468,9 @@ async def get_system_info(session, host: HostConfig):
|
|||||||
if not system_url:
|
if not system_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
system_data = await fetch_with_retry(session, host, f"https://{host.fqdn}{system_url}")
|
system_data = await fetch_with_retry(
|
||||||
|
session, host, f"https://{host.fqdn}{system_url}"
|
||||||
|
)
|
||||||
if not system_data:
|
if not system_data:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -374,7 +490,6 @@ async def get_system_info(session, host: HostConfig):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def logout_host(session, host):
|
async def logout_host(session, host):
|
||||||
"""Clean logout for Redfish with session tokens"""
|
"""Clean logout for Redfish with session tokens"""
|
||||||
if not host.session_token:
|
if not host.session_token:
|
||||||
@@ -438,7 +553,7 @@ async def run_exporter(config, stop_event):
|
|||||||
tasks = []
|
tasks = []
|
||||||
for hc in host_objs:
|
for hc in host_objs:
|
||||||
tasks.append(get_power_data(session, hc))
|
tasks.append(get_power_data(session, hc))
|
||||||
tasks.append(get_system_info(session, hc))
|
# tasks.append(get_system_info(session, hc))
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
await process_request(interval)
|
await process_request(interval)
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
Reference in New Issue
Block a user