This commit is contained in:
2026-01-30 14:07:36 +01:00
parent 0f9c92763c
commit 5f860ada6a
3 changed files with 39 additions and 36 deletions

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "redfish-api" name = "redfish-exporter"
version = "0.1.0" version = "1.0.0"
description = "Read redfish api powerdata" description = "Read redfish api powerdata"
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"

View File

@@ -9,7 +9,6 @@ import asyncio
import aiohttp import aiohttp
import urllib3 import urllib3
import yaml import yaml
import json
from prometheus_client import ( from prometheus_client import (
Gauge, Gauge,
start_http_server, start_http_server,
@@ -70,23 +69,23 @@ REQUEST_TIME = Summary("request_processing_seconds", "Time spent processing requ
REQUEST_LATENCY = Histogram( REQUEST_LATENCY = Histogram(
"redfish_request_latency_seconds", "Time for Redfish request", ["host"] "redfish_request_latency_seconds", "Time for Redfish request", ["host"]
) )
up_gauge = Gauge("redfish_up", "Host up/down", ["host"]) UP_GAUGE = Gauge("redfish_up", "Host up/down", ["host"])
error_counter = Counter( ERROR_COUNTER = Counter(
"redfish_errors_total", "Total Redfish errors", ["host", "error"] "redfish_errors_total", "Total Redfish errors", ["host", "error"]
) )
voltage_gauge = Gauge( VOLTAGE_GAUGE = Gauge(
"redfish_psu_line_input_voltage_volts", "redfish_psu_line_input_voltage_volts",
"Line Input Voltage per PSU", "Line Input Voltage per PSU",
["host", "psu_serial"], ["host", "psu_serial"],
) )
watts_gauge = Gauge( WATTS_GAUGE = Gauge(
"redfish_psu_power_input_watts", "Power Input Watts per PSU", ["host", "psu_serial"] "redfish_psu_power_input_watts", "Power Input Watts per PSU", ["host", "psu_serial"]
) )
amps_gauge = Gauge( AMPS_GAUGE = Gauge(
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"] "redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
) )
# set info metric # set info metric
system_info = Info( SYSTEM_INFO = Info(
"redfish_system_info", "System information (model, serial, etc.)", ["host"] "redfish_system_info", "System information (model, serial, etc.)", ["host"]
) )
@@ -103,7 +102,7 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
logging.warning( logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time "Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
) )
up_gauge.labels(host=host.fqdn).set(0) UP_GAUGE.labels(host=host.fqdn).set(0)
return None return None
if not host.vendor: if not host.vendor:
@@ -182,7 +181,7 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
) )
else: else:
# Default: BasicAuth, like Supermicro and so # Default: BasicAuth
async with session.get( async with session.get(
url, url,
auth=aiohttp.BasicAuth(host.username, host.password), auth=aiohttp.BasicAuth(host.username, host.password),
@@ -259,7 +258,7 @@ def get_power_resource_info(
return None, None return None, None
def get_power_supplies_url( def process_power_supplies_url(
power_data: dict, power_resource_type: str, host_fqdn: str power_data: dict, power_resource_type: str, host_fqdn: str
) -> str | None: ) -> str | None:
"""Get the URL for PowerSupplies based on the Power resource type.""" """Get the URL for PowerSupplies based on the Power resource type."""
@@ -278,8 +277,9 @@ def get_power_supplies_url(
return None return None
def get_power_supplies( def process_power_supplies(
power_data: dict, power_resource_type: str, host_fqdn: str power_data: dict,
power_resource_type: str,
) -> list[dict] | None: ) -> list[dict] | None:
"""Get PowerSupplies data based on the Power resource type.""" """Get PowerSupplies data based on the Power resource type."""
if power_resource_type == "PowerSubsystem": if power_resource_type == "PowerSubsystem":
@@ -325,7 +325,12 @@ async def process_power_supply(
# Older Redfish API: Metrics are direct in PowerSupply as an array # Older Redfish API: Metrics are direct in PowerSupply as an array
line_input_v = psu_data.get("LineInputVoltage") line_input_v = psu_data.get("LineInputVoltage")
watts_input = psu_data.get("PowerInputWatts") watts_input = psu_data.get("PowerInputWatts")
if watts_input is None:
watts_input = psu_data.get("LastPowerOutputWatts")
amps_input = psu_data.get("InputCurrentAmps") amps_input = psu_data.get("InputCurrentAmps")
if amps_input is None:
if line_input_v and watts_input:
amps_input = round(watts_input / line_input_v, 2)
else: else:
logging.error( logging.error(
@@ -338,11 +343,19 @@ async def process_power_supply(
# Update Prometheus metrics # Update Prometheus metrics
if line_input_v is not None: if line_input_v is not None:
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(line_input_v) VOLTAGE_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
if watts_input is not None: if watts_input is not None:
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input) WATTS_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
if amps_input is not None: if amps_input is not None:
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input) AMPS_GAUGE.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
def normalize_url(url: str) -> str:
"""Ensure URL does not end with a trailing slash."""
# I needed this for realy old Redfish versions :S (<1.6.0)
if url.endswith("/"):
return url[:-1] # Remove trailing slash
return url
async def get_power_data(session, host: HostConfig): async def get_power_data(session, host: HostConfig):
@@ -351,7 +364,7 @@ async def get_power_data(session, host: HostConfig):
logging.warning( logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time "Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
) )
up_gauge.labels(host=host.fqdn).set(0) UP_GAUGE.labels(host=host.fqdn).set(0)
return return
# Start time measurement # Start time measurement
@@ -361,26 +374,26 @@ async def get_power_data(session, host: HostConfig):
if not resources: if not resources:
logging.error("Could not discover any resources for %s", host.fqdn) logging.error("Could not discover any resources for %s", host.fqdn)
host.mark_failure() host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0) UP_GAUGE.labels(host=host.fqdn).set(0)
return return
chassis_url = resources.get("Chassis") chassis_url = resources.get("Chassis")
if not chassis_url: if not chassis_url:
logging.error("No valid Chassis URL found for %s", host.fqdn) logging.error("No valid Chassis URL found for %s", host.fqdn)
host.mark_failure() host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0) UP_GAUGE.labels(host=host.fqdn).set(0)
return return
# Mark host as up # Mark host as up
host.mark_success() host.mark_success()
up_gauge.labels(host=host.fqdn).set(1) UP_GAUGE.labels(host=host.fqdn).set(1)
# Get chassis ressource # Get chassis ressource
chassis_url = f"https://{host.fqdn}{chassis_url}" chassis_url = f"https://{host.fqdn}{chassis_url}"
chassis_data = await fetch_with_retry(session, host, chassis_url) chassis_data = await fetch_with_retry(session, host, chassis_url)
if not chassis_data: if not chassis_data:
host.mark_failure() host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0) UP_GAUGE.labels(host=host.fqdn).set(0)
return return
# loop over each member in chassis ressource # loop over each member in chassis ressource
@@ -389,8 +402,8 @@ async def get_power_data(session, host: HostConfig):
if not chassis_member_url: if not chassis_member_url:
continue continue
# Debug-Ausgabe für Chassis-Member-URL # Normalize URL... I needed this for realy old Redfish versions :S (<1.6.0)
print(f"Chassis Member URL: {chassis_member_url}") chassis_member_url = normalize_url(chassis_member_url)
# Get chassis id from url ("/redfish/v1/Chassis/1" -> 1) # Get chassis id from url ("/redfish/v1/Chassis/1" -> 1)
chassis_member_id = chassis_member_url.split("/")[-1] chassis_member_id = chassis_member_url.split("/")[-1]
@@ -401,13 +414,9 @@ async def get_power_data(session, host: HostConfig):
member_url = f"https://{host.fqdn}{chassis_member_url}" member_url = f"https://{host.fqdn}{chassis_member_url}"
member_data = await fetch_with_retry(session, host, member_url) member_data = await fetch_with_retry(session, host, member_url)
# Debug-Ausgabe für Chassis-Member-Daten
print(f"Chassis Member Data: {json.dumps(member_data, indent=4)}")
if not member_data: if not member_data:
continue continue
# Get Power ressource (fallback to "Power") # Get Power ressource (fallback to "Power")
power_resource_url, power_resource_type = get_power_resource_info( power_resource_url, power_resource_type = get_power_resource_info(
member_data, host.fqdn member_data, host.fqdn
@@ -503,7 +512,7 @@ async def get_system_info(session, host: HostConfig):
serial_number = system_data.get("SerialNumber") serial_number = system_data.get("SerialNumber")
# Hier könnte ihre Werbung stehen # Hier könnte ihre Werbung stehen
system_info.labels(host=host.fqdn).info( SYSTEM_INFO.labels(host=host.fqdn).info(
{ {
"manufacturer": manufacturer, "manufacturer": manufacturer,
"model": model, "model": model,

View File

@@ -1,6 +0,0 @@
prometheus-client==0.23.1
requests==2.32.5
urllib3==2.5.0
aiohttp==3.12.15
asyncio==4.0.0
PyYAML==6.0.2