This commit is contained in:
2026-01-30 14:07:36 +01:00
parent 0f9c92763c
commit 5f860ada6a
3 changed files with 39 additions and 36 deletions

View File

@@ -9,7 +9,6 @@ import asyncio
import aiohttp
import urllib3
import yaml
import json
from prometheus_client import (
Gauge,
start_http_server,
@@ -70,23 +69,23 @@ REQUEST_TIME = Summary("request_processing_seconds", "Time spent processing requ
REQUEST_LATENCY = Histogram(
"redfish_request_latency_seconds", "Time for Redfish request", ["host"]
)
up_gauge = Gauge("redfish_up", "Host up/down", ["host"])
error_counter = Counter(
UP_GAUGE = Gauge("redfish_up", "Host up/down", ["host"])
ERROR_COUNTER = Counter(
"redfish_errors_total", "Total Redfish errors", ["host", "error"]
)
voltage_gauge = Gauge(
VOLTAGE_GAUGE = Gauge(
"redfish_psu_line_input_voltage_volts",
"Line Input Voltage per PSU",
["host", "psu_serial"],
)
watts_gauge = Gauge(
WATTS_GAUGE = Gauge(
"redfish_psu_power_input_watts", "Power Input Watts per PSU", ["host", "psu_serial"]
)
amps_gauge = Gauge(
AMPS_GAUGE = Gauge(
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
)
# set info metric
system_info = Info(
SYSTEM_INFO = Info(
"redfish_system_info", "System information (model, serial, etc.)", ["host"]
)
@@ -103,7 +102,7 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn).set(0)
return None
if not host.vendor:
@@ -182,7 +181,7 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
)
else:
# Default: BasicAuth, like Supermicro and so
# Default: BasicAuth
async with session.get(
url,
auth=aiohttp.BasicAuth(host.username, host.password),
@@ -259,7 +258,7 @@ def get_power_resource_info(
return None, None
def get_power_supplies_url(
def process_power_supplies_url(
power_data: dict, power_resource_type: str, host_fqdn: str
) -> str | None:
"""Get the URL for PowerSupplies based on the Power resource type."""
@@ -278,8 +277,9 @@ def get_power_supplies_url(
return None
def get_power_supplies(
power_data: dict, power_resource_type: str, host_fqdn: str
def process_power_supplies(
power_data: dict,
power_resource_type: str,
) -> list[dict] | None:
"""Get PowerSupplies data based on the Power resource type."""
if power_resource_type == "PowerSubsystem":
@@ -325,7 +325,12 @@ async def process_power_supply(
# Older Redfish API: Metrics are direct in PowerSupply as an array
line_input_v = psu_data.get("LineInputVoltage")
watts_input = psu_data.get("PowerInputWatts")
if watts_input is None:
watts_input = psu_data.get("LastPowerOutputWatts")
amps_input = psu_data.get("InputCurrentAmps")
if amps_input is None:
if line_input_v and watts_input:
amps_input = round(watts_input / line_input_v, 2)
else:
logging.error(
@@ -338,11 +343,19 @@ async def process_power_supply(
# Update Prometheus metrics
if line_input_v is not None:
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
VOLTAGE_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
if watts_input is not None:
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
WATTS_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
if amps_input is not None:
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
AMPS_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
def normalize_url(url: str) -> str:
"""Ensure URL does not end with a trailing slash."""
# I needed this for realy old Redfish versions :S (<1.6.0)
if url.endswith("/"):
return url[:-1] # Remove trailing slash
return url
async def get_power_data(session, host: HostConfig):
@@ -351,7 +364,7 @@ async def get_power_data(session, host: HostConfig):
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn).set(0)
return
# Start time measurement
@@ -361,26 +374,26 @@ async def get_power_data(session, host: HostConfig):
if not resources:
logging.error("Could not discover any resources for %s", host.fqdn)
host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn).set(0)
return
chassis_url = resources.get("Chassis")
if not chassis_url:
logging.error("No valid Chassis URL found for %s", host.fqdn)
host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn).set(0)
return
# Mark host as up
host.mark_success()
up_gauge.labels(host=host.fqdn).set(1)
UP_GAUGE.labels(host=host.fqdn).set(1)
# Get chassis ressource
chassis_url = f"https://{host.fqdn}{chassis_url}"
chassis_data = await fetch_with_retry(session, host, chassis_url)
if not chassis_data:
host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn).set(0)
return
# loop over each member in chassis ressource
@@ -389,8 +402,8 @@ async def get_power_data(session, host: HostConfig):
if not chassis_member_url:
continue
# Debug-Ausgabe für Chassis-Member-URL
print(f"Chassis Member URL: {chassis_member_url}")
# Normalize URL... I needed this for realy old Redfish versions :S (<1.6.0)
chassis_member_url = normalize_url(chassis_member_url)
# Get chassis id from url ("/redfish/v1/Chassis/1" -> 1)
chassis_member_id = chassis_member_url.split("/")[-1]
@@ -401,13 +414,9 @@ async def get_power_data(session, host: HostConfig):
member_url = f"https://{host.fqdn}{chassis_member_url}"
member_data = await fetch_with_retry(session, host, member_url)
# Debug-Ausgabe für Chassis-Member-Daten
print(f"Chassis Member Data: {json.dumps(member_data, indent=4)}")
if not member_data:
continue
# Get Power ressource (fallback to "Power")
power_resource_url, power_resource_type = get_power_resource_info(
member_data, host.fqdn
@@ -503,7 +512,7 @@ async def get_system_info(session, host: HostConfig):
serial_number = system_data.get("SerialNumber")
# Hier könnte ihre Werbung stehen
system_info.labels(host=host.fqdn).info(
SYSTEM_INFO.labels(host=host.fqdn).info(
{
"manufacturer": manufacturer,
"model": model,