Compare commits
2 Commits
381d06ae58
...
9bedf0c799
| Author | SHA1 | Date | |
|---|---|---|---|
|
9bedf0c799
|
|||
|
f3c4bc1953
|
@@ -19,6 +19,22 @@ from prometheus_client import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RedfishResource:
|
||||||
|
"""Container for Redfish resource URLs."""
|
||||||
|
chassis: str | None = None
|
||||||
|
systems: str | None = None
|
||||||
|
power: str | None = None
|
||||||
|
session_service: str | None = None
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PowerMetrics:
|
||||||
|
"""Container for power metrics."""
|
||||||
|
voltage: float | None = None
|
||||||
|
watts: float | None = None
|
||||||
|
amps: float | None = None
|
||||||
|
serial: str | None = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RedfishSession:
|
class RedfishSession:
|
||||||
"""Container for Redfish session data."""
|
"""Container for Redfish session data."""
|
||||||
@@ -132,8 +148,8 @@ async def login_hpe(session, host: HostConfig) -> bool:
|
|||||||
try:
|
try:
|
||||||
async with session.post(login_url, json=payload, ssl=False, timeout=10) as login_resp:
|
async with session.post(login_url, json=payload, ssl=False, timeout=10) as login_resp:
|
||||||
if login_resp.status == 201:
|
if login_resp.status == 201:
|
||||||
host.session_token = login_resp.headers.get("X-Auth-Token")
|
host.session.token = login_resp.headers.get("X-Auth-Token")
|
||||||
host.session_logout = login_resp.headers.get("Location")
|
host.session.logout_url = login_resp.headers.get("Location")
|
||||||
|
|
||||||
if not host.session.token or not host.session.logout_url:
|
if not host.session.token or not host.session.logout_url:
|
||||||
raise RuntimeError("Invalid login response")
|
raise RuntimeError("Invalid login response")
|
||||||
@@ -228,23 +244,25 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def discover_redfish_resources(session, host: HostConfig) -> dict:
|
async def discover_redfish_resources(session, host: HostConfig) -> RedfishResource | None:
|
||||||
"""Discover available Redfish resources and return relevant URLs"""
|
"""Discover available Redfish resources and return relevant URLs"""
|
||||||
root_url = f"https://{host.fqdn}/redfish/v1/"
|
root_url = f"https://{host.fqdn}/redfish/v1/"
|
||||||
data = await fetch_with_retry(session, host, root_url)
|
data = await fetch_with_retry(session, host, root_url)
|
||||||
if not data:
|
if not data:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
# Extrahiere Links aus der Root-Antwort
|
# Create RedfishRessource object
|
||||||
links = {
|
resources = RedfishResource(
|
||||||
"Chassis": data.get("Chassis", {}).get("@odata.id"),
|
chassis=data.get("Chassis", {}).get("@odata.id"),
|
||||||
"Systems": data.get("Systems", {}).get("@odata.id"),
|
systems=data.get("Systems", {}).get("@odata.id"),
|
||||||
"SessionService": data.get("SessionService", {}).get("@odata.id"),
|
session_service=data.get("SessionService", {}).get("@odata.id"),
|
||||||
}
|
)
|
||||||
if not links["Chassis"]:
|
|
||||||
|
if not resources.chassis:
|
||||||
logging.error("No valid Chassis URL found for host %s", host.fqdn)
|
logging.error("No valid Chassis URL found for host %s", host.fqdn)
|
||||||
return {}
|
return None
|
||||||
return links
|
|
||||||
|
return resources
|
||||||
|
|
||||||
|
|
||||||
def get_power_resource_info(
|
def get_power_resource_info(
|
||||||
@@ -313,54 +331,46 @@ def process_power_supplies(
|
|||||||
|
|
||||||
async def process_power_supply(
|
async def process_power_supply(
|
||||||
session, host: HostConfig, psu_data: dict, power_resource_type: str
|
session, host: HostConfig, psu_data: dict, power_resource_type: str
|
||||||
):
|
) -> PowerMetrics | None:
|
||||||
"""Extract metrics from PowerSupply"""
|
"""Extract metrics from PowerSupply"""
|
||||||
serial = psu_data.get("SerialNumber")
|
serial = psu_data.get("SerialNumber")
|
||||||
|
metrics = PowerMetrics(serial=serial)
|
||||||
|
|
||||||
if power_resource_type == "PowerSubsystem":
|
if power_resource_type == "PowerSubsystem":
|
||||||
# Newer Redfish API: Metrics are an own "Metrics" ressource
|
# New Redfish API: Metrics are an own "Metrics" ressource
|
||||||
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
|
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
|
||||||
if not metrics_url:
|
if not metrics_url:
|
||||||
logging.warning("No Metrics found for PowerSupply %s", psu_data.get("Id"))
|
logging.warning("No Metrics found for PowerSupply %s", psu_data.get("Id"))
|
||||||
return
|
return None
|
||||||
|
|
||||||
metrics_url = f"https://{host.fqdn}{metrics_url}"
|
metrics_url = f"https://{host.fqdn}{metrics_url}"
|
||||||
metrics_data = await fetch_with_retry(session, host, metrics_url)
|
metrics_data = await fetch_with_retry(session, host, metrics_url)
|
||||||
if not metrics_data:
|
if not metrics_data:
|
||||||
return
|
return None
|
||||||
|
|
||||||
# Get metrics from Metrics ressource
|
# Get metrics from Metrics ressource
|
||||||
line_input_v = metrics_data.get("InputVoltage", {}).get("Reading")
|
metrics.voltage = metrics_data.get("InputVoltage", {}).get("Reading")
|
||||||
watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading")
|
metrics.watts = metrics_data.get("InputPowerWatts", {}).get("Reading")
|
||||||
amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading")
|
metrics.amps = metrics_data.get("InputCurrentAmps", {}).get("Reading")
|
||||||
|
|
||||||
elif power_resource_type == "Power":
|
elif power_resource_type == "Power":
|
||||||
# Older Redfish API: Metrics are direct in PowerSupply as an array
|
# Older Redfish API: Metrics are direct in PowerSupply as an array
|
||||||
line_input_v = psu_data.get("LineInputVoltage")
|
metrics.voltage = psu_data.get("LineInputVoltage")
|
||||||
watts_input = psu_data.get("PowerInputWatts")
|
metrics.watts = psu_data.get("PowerInputWatts")
|
||||||
if watts_input is None:
|
if metrics.watts is None:
|
||||||
watts_input = psu_data.get("LastPowerOutputWatts")
|
metrics.watts = psu_data.get("LastPowerOutputWatts")
|
||||||
amps_input = psu_data.get("InputCurrentAmps")
|
metrics.amps = psu_data.get("InputCurrentAmps")
|
||||||
if amps_input is None:
|
if metrics.amps is None and metrics.voltage and metrics.watts:
|
||||||
if line_input_v and watts_input:
|
metrics.amps = round(metrics.watts / metrics.voltage, 2)
|
||||||
amps_input = round(watts_input / line_input_v, 2)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logging.error(
|
logging.error(
|
||||||
"Unknown power resource type for PowerSupply %s", psu_data.get("Id")
|
"Unknown power resource type for PowerSupply %s", psu_data.get("Id")
|
||||||
)
|
)
|
||||||
return
|
|
||||||
|
|
||||||
if amps_input is None and line_input_v and watts_input:
|
return None
|
||||||
amps_input = round(watts_input / line_input_v, 2)
|
|
||||||
|
|
||||||
# Update Prometheus metrics
|
return metrics
|
||||||
if line_input_v is not None:
|
|
||||||
VOLTAGE_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
|
|
||||||
if watts_input is not None:
|
|
||||||
WATTS_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
|
|
||||||
if amps_input is not None:
|
|
||||||
AMPS_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_url(url: str) -> str:
|
def normalize_url(url: str) -> str:
|
||||||
@@ -382,34 +392,25 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
|
|
||||||
# Start time measurement
|
# Start time measurement
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
# Root ressource abfragen
|
# Get Root ressources
|
||||||
resources = await discover_redfish_resources(session, host)
|
resources = await discover_redfish_resources(session, host)
|
||||||
if not resources:
|
if not resources or not resources.chassis:
|
||||||
logging.error("Could not discover any resources for %s", host.fqdn)
|
logging.error("Could not discover any resources for %s", host.fqdn)
|
||||||
host.mark_failure()
|
host.mark_failure()
|
||||||
UP_GAUGE.labels(host=host.fqdn).set(0)
|
UP_GAUGE.labels(host=host.fqdn).set(0)
|
||||||
return
|
return
|
||||||
|
|
||||||
chassis_url = resources.get("Chassis")
|
|
||||||
if not chassis_url:
|
|
||||||
logging.error("No valid Chassis URL found for %s", host.fqdn)
|
|
||||||
host.mark_failure()
|
|
||||||
UP_GAUGE.labels(host=host.fqdn).set(0)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Mark host as up
|
|
||||||
host.mark_success()
|
host.mark_success()
|
||||||
UP_GAUGE.labels(host=host.fqdn).set(1)
|
UP_GAUGE.labels(host=host.fqdn).set(1)
|
||||||
|
|
||||||
# Get chassis ressource
|
chassis_url = resources.get("Chassis")
|
||||||
chassis_url = f"https://{host.fqdn}{chassis_url}"
|
|
||||||
chassis_data = await fetch_with_retry(session, host, chassis_url)
|
chassis_data = await fetch_with_retry(session, host, chassis_url)
|
||||||
if not chassis_data:
|
if not chassis_data:
|
||||||
host.mark_failure()
|
host.mark_failure()
|
||||||
UP_GAUGE.labels(host=host.fqdn).set(0)
|
UP_GAUGE.labels(host=host.fqdn).set(0)
|
||||||
return
|
return
|
||||||
|
|
||||||
# loop over each member in chassis ressource
|
|
||||||
for chassis_member in chassis_data.get("Members", []):
|
for chassis_member in chassis_data.get("Members", []):
|
||||||
chassis_member_url = chassis_member.get("@odata.id")
|
chassis_member_url = chassis_member.get("@odata.id")
|
||||||
if not chassis_member_url:
|
if not chassis_member_url:
|
||||||
@@ -417,7 +418,6 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
|
|
||||||
# Normalize URL... I needed this for realy old Redfish versions :S (<1.6.0)
|
# Normalize URL... I needed this for realy old Redfish versions :S (<1.6.0)
|
||||||
chassis_member_url = normalize_url(chassis_member_url)
|
chassis_member_url = normalize_url(chassis_member_url)
|
||||||
|
|
||||||
# Get chassis id from url ("/redfish/v1/Chassis/1" -> 1)
|
# Get chassis id from url ("/redfish/v1/Chassis/1" -> 1)
|
||||||
chassis_member_id = chassis_member_url.split("/")[-1]
|
chassis_member_id = chassis_member_url.split("/")[-1]
|
||||||
# Check if the chassis id is in config (had problem with chassis "NVMe")
|
# Check if the chassis id is in config (had problem with chassis "NVMe")
|
||||||
@@ -431,9 +431,7 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Get Power ressource (fallback to "Power")
|
# Get Power ressource (fallback to "Power")
|
||||||
power_resource_url, power_resource_type = get_power_resource_info(
|
power_resource_url, power_resource_type = get_power_resource_info(member_data, host.fqdn)
|
||||||
member_data, host.fqdn
|
|
||||||
)
|
|
||||||
if not power_resource_url:
|
if not power_resource_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -444,16 +442,14 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
|
|
||||||
# Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem")
|
# Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem")
|
||||||
if power_resource_type == "PowerSubsystem":
|
if power_resource_type == "PowerSubsystem":
|
||||||
# PowerSupplies-URL abfragen (fĂĽr PowerSubsystem)
|
# Request PowerSupplies url (for PowerSubsystem)
|
||||||
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
|
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
|
||||||
if not power_supplies_url:
|
if not power_supplies_url:
|
||||||
logging.warning("No PowerSupplies found for %s", host.fqdn)
|
logging.warning("No PowerSupplies found for %s", host.fqdn)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
|
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
|
||||||
power_supplies_data = await fetch_with_retry(
|
power_supplies_data = await fetch_with_retry(session, host, power_supplies_url)
|
||||||
session, host, power_supplies_url
|
|
||||||
)
|
|
||||||
if not power_supplies_data:
|
if not power_supplies_data:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -469,13 +465,17 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Process PowerSupplies object
|
# Process PowerSupplies object
|
||||||
await process_power_supply(session, host, psu_data, "PowerSubsystem")
|
metrics = await process_power_supply(session, host, psu_data, "PowerSubsystem")
|
||||||
|
if metrics:
|
||||||
|
update_prometheus_metrics(host, metrics)
|
||||||
|
|
||||||
elif power_resource_type == "Power":
|
elif power_resource_type == "Power":
|
||||||
# Loop over PowerSupplies for older Redfish versions
|
# Loop over PowerSupplies for older Redfish versions
|
||||||
for psu in power_data.get("PowerSupplies", []):
|
for psu in power_data.get("PowerSupplies", []):
|
||||||
# Process PowerSupplies object
|
# Process PowerSupplies object
|
||||||
await process_power_supply(session, host, psu, "Power")
|
metrics = await process_power_supply(session, host, psu, "Power")
|
||||||
|
if metrics:
|
||||||
|
update_prometheus_metrics(host, metrics)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logging.error("Unknown power resource type for host %s", host.fqdn)
|
logging.error("Unknown power resource type for host %s", host.fqdn)
|
||||||
@@ -484,6 +484,15 @@ async def get_power_data(session, host: HostConfig):
|
|||||||
# Measure request and process latency
|
# Measure request and process latency
|
||||||
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
|
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
|
||||||
|
|
||||||
|
def update_prometheus_metrics(host: HostConfig, metrics: PowerMetrics):
|
||||||
|
"""Update Prometheus metrics with PowerMetrics data."""
|
||||||
|
if metrics.voltage is not None and metrics.serial:
|
||||||
|
VOLTAGE_GAUGE.labels(host=host.fqdn, psu_serial=metrics.serial).set(metrics.voltage)
|
||||||
|
if metrics.watts is not None and metrics.serial:
|
||||||
|
WATTS_GAUGE.labels(host=host.fqdn, psu_serial=metrics.serial).set(metrics.watts)
|
||||||
|
if metrics.amps is not None and metrics.serial:
|
||||||
|
AMPS_GAUGE.labels(host=host.fqdn, psu_serial=metrics.serial).set(metrics.amps)
|
||||||
|
|
||||||
|
|
||||||
async def get_system_info(session, host: HostConfig):
|
async def get_system_info(session, host: HostConfig):
|
||||||
"""Query Redfish for system data and update Prometheus metrics"""
|
"""Query Redfish for system data and update Prometheus metrics"""
|
||||||
|
|||||||
Reference in New Issue
Block a user