Compare commits

...

26 Commits

Author SHA1 Message Date
991f031ce5 rename python scripts 2026-02-02 15:40:14 +01:00
8b19633a84 rename labels 2026-02-02 15:28:54 +01:00
b68889e869 hide deprecated message 2026-02-02 13:57:23 +01:00
5c777483fc feature: grouping 2026-02-02 12:55:32 +01:00
bdac561e86 update README.md 2026-01-30 15:37:09 +01:00
50e8376937 fix typo 2026-01-30 15:26:13 +01:00
361e75e4f3 remove unused attributes 2026-01-30 15:17:45 +01:00
b1db6212a0 apply ruff format 2026-01-30 15:15:42 +01:00
d8fc5cd8b8 mixe mixed up dataclass 2026-01-30 15:15:01 +01:00
9bedf0c799 add dataclass PowerMetrics 2026-01-30 15:11:01 +01:00
f3c4bc1953 add dataclass RedfishResource, update discover_redfish_resources, process_power_supply 2026-01-30 15:02:33 +01:00
381d06ae58 play with dataclass 2026-01-30 14:52:23 +01:00
ac73bcde03 format 2026-01-30 14:10:19 +01:00
ca0763ee6e update uv.lock 2026-01-30 14:09:58 +01:00
ce6e2e7f26 simple script for testing 2026-01-30 14:09:19 +01:00
cee64faaa8 rewrite Dockerfile 2026-01-30 14:08:31 +01:00
5f860ada6a reformat 2026-01-30 14:07:36 +01:00
0f9c92763c WIP suche fehler in session/login 2026-01-30 11:11:53 +01:00
b90b67475c delete vendor in system info 2026-01-30 10:34:18 +01:00
08f03d2cc3 cover two use cases 2026-01-30 10:25:31 +01:00
a899d43f8d simplify info metric 2026-01-30 08:12:27 +01:00
ef10e739b2 set vendor, manufacturer, serial 2026-01-30 08:05:11 +01:00
ed3948c72f WIP 2026-01-29 16:12:49 +01:00
5a66ca82cb update some comments 2026-01-29 16:01:46 +01:00
0b8e183f98 changed systemids to chassis 2026-01-29 15:39:58 +01:00
42efa7c1c8 changed systemids to chassis 2026-01-29 15:39:44 +01:00
10 changed files with 624 additions and 643 deletions

View File

@@ -1,13 +1,35 @@
FROM python:3
# syntax=docker/dockerfile:1.7
FROM python:3.12-slim-trixie
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/
# Setup a non-root user
RUN groupadd --system --gid 999 nonroot \
&& useradd --system --gid 999 --uid 999 --create-home nonroot
WORKDIR /app
RUN chown nonroot:nonroot /app
# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1
# Copy from the cache instead of linking since it's a mounted volume
ENV UV_LINK_MODE=copy
# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
# Use the non-root user to run our application
USER nonroot
COPY --chown=nonroot:nonroot . /app/
RUN uv sync --locked
# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"
# Reset the entrypoint, don't invoke `uv`
ENTRYPOINT []
EXPOSE 8000
WORKDIR /usr/src/app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY redfish_exporter.py .
COPY config.yaml .
CMD [ "python", "./redfish_exporter.py" ]
CMD [ "python", "main.py" ]

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2026 dasBaum
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,117 +1,169 @@
# Description
# Redfish-Exporter
A Python-based Prometheus exporter for collecting power data (Watts, Volts, Amperes) from bare metal servers using the Redfish API. This tool supports multiple vendors (e.g., HPE, Supermicro) and is designed to run cross-platform on Linux and Windows.
I've createtd this python script to collect Power data to analyse Watts, Volts and Amperes. If there is a better solution, feel free to replace me.
I've createtd this python script to collect Power data to analyse Watts, Volts and Amperes. If there is a better solution or you want more feature, feel free to replace me or expand my prometheus exporter.
Usage:
## Features
- Collects power metrics: Watts, Volts, and Amperes.
- Supports multiple vendors (HPE, Supermicro, etc.).
- Supports grouping.
- Cross-platform compatibility (Linux and Windows).
- Graceful error handling and retry logic.
- Configurable via YAML.
- Docker support.
## Metrics Overview
| Metric | Typ | Description |
|---------------------------------|-----------|----------------------------------------------------------------|
| redfish_up | Gauge | Status from host (1 = reachable, 0 = not reachable). |
| redfish_psu_input_voltage | Gauge | Voltages per powersupply (label: host, psu_serial). |
| redfish_psu_input_watts | Gauge | Watts per powersupply (label: host, psu_serial). |
| redfish_psu_input_amps | Gauge | Amperes per powersupply (label: host, psu_serial). |
| redfish_system_info | Info | Systeminformation (Vendor, Model, Serial, Redfish Version). |
| redfish_request_latency_seconds | Histogram | Latency (label: host). |
| redfish_errors_total | Counter | Number of errors per host and error type (label: host, error). |
## Usage
```
usage: redfish_exporter.py [-h] [--config CONFIG] [--port PORT]
usage: python main.py [-h] [--config CONFIG] [--port PORT]
Redfish Prometheus Exporter
options:
-h, --help show this help message and exit
--config CONFIG Path to config file
--port PORT Override port from config file
-h, --help show this help message and exit
--config CONFIG Path to config file
--port PORT Override port from config file
--interval INTERVAL Override interval from config file
--show-deprecated Enable deprecated warnings in log
```
# Install
## Requirements
* just (optional)
* python 3.8+
* uv
* see `pyproject.tom`
Dependencies:
Install the dependencies using `uv`:
* see requirements.txt
```bash
uv sync
source .venv/bin/activate
uv lock --upgrade --refresh
```
## Configuration
Create `config.yaml` with following structure:
Create `config.yaml`:
### Basic Configuration
```yaml
---
interval: 5
port: 8000
username: user1
username: user
password: secret
chassis: ["1"]
hosts:
- srv1-112.mgmt.wtb1.ch.abainfra.net
- srv2-112.mgmt.wtb1.ch.abainfra.net
- srv3-112.mgmt.wtb1.ch.abainfra.net
- srv4-112.mgmt.wtb1.ch.abainfra.net
- host1.example.net
- host2.example.net
- host3.example.net
- host4.example.net
```
or:
### Advanced Configuration
```yaml
---
interval: 5
port: 8000
username: user1
password: secret1
chassis: ["1"]
group: development # set default group for all hosts
hosts:
- fqdn: srv1-112.mgmt.wtb1.ch.abainfra.net
- fqdn: host1.example.net
username: user2
password: secret2
- fqdn: srv2-112.mgmt.wtb1.ch.abainfra.net
chassis: ["0"]
group: production # use group for specific host
- fqdn: host2.example.net
username: user3
password: secret3
- fqdn: srv3-112.mgmt.wtb1.ch.abainfra.net
chassis: ["1"]
group: stage
- fqdn: host3.example.net
username: user4
password: secret4
- fqdn: srv4-112.mgmt.wtb1.ch.abainfra.net
chassis: ["example"]
- fqdn: host4.example.net
username: user5
password: secret5
```
The `port`, `interval` are optional and can be overwritten by argument. Save default values are hardcoded.
The `port`, `interval` and `interval` are optional and can be be overridden by command-line arguments. Default values are hardcoded.
### Prometheus Configuration
```
global:
scrape_interval: 15s
evaluation_interval: 15s
# Use as Container
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: "redfish_exporter"
static_configs:
- targets: ["localhost:8000"] # Adjust to your config
metrics_path: /metrics
scrape_interval: 15s
```
# Docker / Container
To run the Redfish Exporter in a Docker container:
```
docker build -t redfish_exporter .
docker buildx build -t redfish_exporter .
docker run -it --rm --name redfish_exporter_app -p 8000:8000 redfish_exporter:latest
```
# Legacy way
# Legacy Installation
## Python Dependencies
```bash
mkdir /srv/redfish-exporter
```
## Python dependencies
```bash
# or
git clone https://github.com/dasbaum-ch/redfish-exporter.git /srv/redfish-exporter
cd /srv/redfish-exporter
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
uv sync --locked
```
## Create user
```bash
sudo useradd -r -s /bin/false redfish
```
## Install systemd unit file
## Systemd Service
1. Copy the systemd unit file:
```bash
sudo cp redfish-exporter.service /etc/systemd/system/redfish-exporter.service
```
2. Reload and start the service:
```bash
sudo systemctl daemon-reload
sudo systemctl enable --now redfish-exporter.service
```
# Usefull oneliners
# License
This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for details.
## public IP with curl
# Testet on Hardware
Here some Server's that I have successfully testet:
```bash
curl icanhazip.com
curl -4 icanhazip.com
curl -6 icanhazip.com
curl 'https://api.ipify.org?format=json'
curl 'https://api64.ipify.org?format=json'
```
| Vendor | Model | Redfish Version |
|------------|----------------------|-----------------|
| Supermicro | AS-5126GS-TNRT2 | 1.21.0 |
| | AS-1124US-TNRP | 1.8.0 |
| HPE | ProLiant DL380 Gen10 | 1.6.0 |

View File

@@ -3,7 +3,8 @@ interval: 10
port: 8000
username: gloabl-user
password: global-password
systemid: ["1"] # Strings, not integers!
chassis: ["1"] # Strings, not integers!
group: production
hosts:
- fqdn: host1.example.com
username: user1
@@ -12,5 +13,5 @@ hosts:
username: user2
password: secret2
- fqdn: host3.example.com
systemid: ["0"] # Strings, not integers!
chassis: ["0"] # Strings, not integers!
- fqdn: host4.example.com

View File

@@ -4,14 +4,14 @@ import urllib3
urllib3.disable_warnings()
username = "marco.lucarelli@abacus.ch"
password = "secret"
username = "admin"
password = "admin"
# host = sys.argv[1]
def get_power_data(host):
"""Redfish API Chassis Power"""
url = f"https://{host}.mgmt.wtb1.ch.abainfra.net/redfish/v1/"
url = f"https://{host}/redfish/v1/"
response = requests.get(url, verify=False)
response.raise_for_status()
data = response.json()
@@ -19,7 +19,7 @@ def get_power_data(host):
is_hpe = vendor.strip().upper().startswith("HPE")
if is_hpe:
response = "" # just to be sure
login_url = f"https://{host}.mgmt.wtb1.ch.abainfra.net/redfish/v1/SessionService/Sessions"
login_url = f"https://{host}/redfish/v1/SessionService/Sessions"
payload = {"UserName": username, "Password": password}
response = requests.post(login_url, json=payload, verify=False, timeout=10)
print(response)
@@ -29,7 +29,7 @@ def get_power_data(host):
if not token:
raise RuntimeError("No X-Auth-Token in login response")
headers = {"X-Auth-Token": token}
url = f"https://{host}.mgmt.wtb1.ch.abainfra.net/redfish/v1/Chassis/1/Power"
url = f"https://{host}/redfish/v1/Chassis/1/Power"
response = requests.get(url, verify=False, headers=headers)
if response.status_code == 200:
data = response.json()
@@ -37,19 +37,20 @@ def get_power_data(host):
print(response)
print(vendor)
print(is_hpe)
quit()
url = f"https://{host}.mgmt.wtb1.ch.abainfra.net/redfish/v1/Chassis/1/Power"
url = f"https://{host}/redfish/v1/Chassis/1/Power"
response = requests.get(url, auth=(username, password), verify=False)
if response.status_code == 200:
data = response.json()
for idx, psu in enumerate(data.get("PowerSupplies", [])):
print(idx)
line_input_v = psu.get("LineInputVoltage")
watts_input = psu.get("PowerInputWatts")
serial = psu.get("SerialNumber")
print(
f"PSU {idx}, {serial}: {host}, {line_input_v} V, {watts_input} W, {round(watts_input / line_input_v, 2)} A"
f"PSU {idx}, {serial}: {host}, {line_input_v} V, {watts_input} W"
)
else:
print(f"Error {response.status_code}: {response.text}")
@@ -57,7 +58,7 @@ def get_power_data(host):
# loop over each hosts
hosts = [
"srv1-110",
"22-kvm.abao.ch",
]
for host in hosts:
get_power_data(host)

View File

@@ -1,6 +1,6 @@
[project]
name = "redfish-api"
version = "0.1.0"
name = "redfish-exporter"
version = "1.0.0"
description = "Read redfish api powerdata"
readme = "README.md"
requires-python = ">=3.13"

View File

@@ -9,7 +9,43 @@ import asyncio
import aiohttp
import urllib3
import yaml
from prometheus_client import Gauge, start_http_server, Summary, Counter, Histogram
from prometheus_client import (
Gauge,
start_http_server,
Summary,
Counter,
Histogram,
Info,
)
@dataclass
class RedfishResource:
"""Container for Redfish resource URLs."""
chassis: str | None = None
systems: str | None = None
power: str | None = None
session_service: str | None = None
@dataclass
class PowerMetrics:
"""Container for power metrics."""
voltage: float | None = None
watts: float | None = None
amps: float | None = None
serial: str | None = None
@dataclass
class RedfishSession:
"""Container for Redfish session data."""
token: str | None = None
logout_url: str | None = None
vendor: str | None = None
@dataclass
@@ -19,18 +55,14 @@ class HostConfig:
fqdn: str
username: str
password: str
max_retries: int = 1
backoff: int = 2
chassis: list[str] | None = None
group: str = "none"
max_retries: int = 3 # 3 retires
backoff: int = 2 # wait 2 seconds
cool_down: int = 120 # seconds to wait after too many failures
failures: int = 0
next_retry_time: float = field(default=0.0, init=False)
# New attributes for Redfish stuff
vendor: str | None = None
session_token: str | None = None
session_logout: str | None = (
None # SessionLocation like /redfish/v1/SessionService/Sessions/marco.lucarelli%40abacus.ch00000000xxx/
)
session: RedfishSession = field(default_factory=RedfishSession)
def should_skip(self) -> bool:
"""Check if host is still in cool-down window"""
@@ -61,20 +93,28 @@ REQUEST_TIME = Summary("request_processing_seconds", "Time spent processing requ
REQUEST_LATENCY = Histogram(
"redfish_request_latency_seconds", "Time for Redfish request", ["host"]
)
up_gauge = Gauge("redfish_up", "Host up/down", ["host"])
error_counter = Counter(
UP_GAUGE = Gauge("redfish_up", "Host up/down", ["host", "group"])
ERROR_COUNTER = Counter(
"redfish_errors_total", "Total Redfish errors", ["host", "error"]
)
voltage_gauge = Gauge(
"redfish_psu_line_input_voltage_volts",
VOLTAGE_GAUGE = Gauge(
"redfish_psu_input_voltage_volts",
"Line Input Voltage per PSU",
["host", "psu_serial"],
["host", "psu_serial", "group"],
)
watts_gauge = Gauge(
"redfish_psu_power_input_watts", "Power Input Watts per PSU", ["host", "psu_serial"]
WATTS_GAUGE = Gauge(
"redfish_psu_input_watts",
"Power Input Watts per PSU",
["host", "psu_serial", "group"],
)
amps_gauge = Gauge(
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
AMPS_GAUGE = Gauge(
"redfish_psu_input_amps",
"Current draw in Amps per PSU",
["host", "psu_serial", "group"],
)
# set info metric
SYSTEM_INFO = Info(
"redfish_system", "System information (model, serial, etc.)", ["host", "group"]
)
@@ -84,72 +124,80 @@ async def process_request(t):
await asyncio.sleep(t)
async def probe_vendor(session, host: HostConfig) -> str | None:
"""Probe the vendor of the Redfish host."""
try:
async with session.get(
f"https://{host.fqdn}/redfish/v1/", ssl=False, timeout=10
) as resp:
if resp.status == 200:
data = await resp.json()
vendor = data.get("Vendor", "")
logging.debug("Detected vendor for %s: %s", host.fqdn, vendor)
return vendor
logging.warning(
"Vendor probe failed on %s: HTTP %s", host.fqdn, resp.status
)
except Exception as e:
logging.warning("Vendor probe failed for %s: %s", host.fqdn, e)
return None
async def login_hpe(session, host: HostConfig) -> bool:
"""Login to HPE Redfish API and set session token."""
login_url = f"https://{host.fqdn}/redfish/v1/SessionService/Sessions"
payload = {"UserName": host.username, "Password": host.password}
try:
async with session.post(
login_url, json=payload, ssl=False, timeout=10
) as login_resp:
if login_resp.status == 201:
host.session.token = login_resp.headers.get("X-Auth-Token")
host.session.logout_url = login_resp.headers.get("Location")
if not host.session.token or not host.session.logout_url:
raise RuntimeError("Invalid login response")
logging.info("New session token obtained for %s", host.fqdn)
return True
logging.warning(
"Login failed for %s: HTTP %s", host.fqdn, login_resp.status
)
except Exception as e:
logging.warning("Login failed for %s: %s", host.fqdn, e)
return False
async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
"""Fetch JSON from Redfish with retry/backoff"""
"""Fetch JSON from Redfish with retry/backoff."""
if host.should_skip():
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn, group=host.group).set(0)
return None
if not host.vendor:
try:
async with session.get(
f"https://{host.fqdn}/redfish/v1/", ssl=False, timeout=10
) as resp:
if resp.status == 200:
data = await resp.json()
host.vendor = data.get("Vendor", "")
logging.debug("Detected vendor for %s: %s", host.fqdn, host.vendor)
else:
logging.warning(
"Vendor probe failed on %s: HTTP %s", host.fqdn, resp.status
)
except Exception as e:
logging.warning("Vendor probe failed for %s: %s", host.fqdn, e)
# Probe vendor if not already known
if not host.session.vendor:
host.session.vendor = await probe_vendor(session, host)
is_hpe = host.vendor and host.vendor.strip().upper().startswith("HPE")
is_hpe = host.session.vendor and host.session.vendor.strip().upper().startswith(
"HPE"
)
for attempt in range(1, host.max_retries + 1):
try:
headers = {}
if is_hpe:
# Try to reuse existing session token
if host.session_token:
headers["X-Auth-Token"] = host.session_token
logging.debug("Reusing cached session token for %s", host.fqdn)
else:
# Need to login and store new session token
# HPE Redfish login
login_url = (
f"https://{host.fqdn}/redfish/v1/SessionService/Sessions"
)
payload = {"UserName": host.username, "Password": host.password}
async with session.post(
login_url, json=payload, ssl=False, timeout=10
) as login_resp:
if login_resp.status == 201:
host.session_token = login_resp.headers.get(
"X-Auth-Token"
) # as response in header
if not host.session_token:
raise RuntimeError("No X-Auth-Token in login response")
host.session_logout = login_resp.headers.get(
"Location"
) # as response in header
if not host.session_logout:
raise RuntimeError("No Location in login response")
headers["X-Auth-Token"] = host.session_token
logging.info("New session token obtained for %s", host.fqdn)
else:
logging.warning(
"Login failed for %s: HTTP %s",
host.fqdn,
login_resp.status,
)
continue # retry login next attempt
# Handle HPE session token
if not host.session.token:
if not await login_hpe(session, host):
# Retry login next attempt
continue
headers["X-Auth-Token"] = host.session.token
async with session.get(
url, headers=headers, ssl=False, timeout=10
@@ -162,14 +210,14 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
logging.warning(
"Invalid token for %s, reauthenticating...", host.fqdn
)
host.session_token = None
host.session.token = None
continue
logging.warning(
"HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt
)
else:
# Default: BasicAuth, like Supermicro and so
# Default: BasicAuth
async with session.get(
url,
auth=aiohttp.BasicAuth(host.username, host.password),
@@ -203,64 +251,337 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
return None
async def get_power_data(session, host: HostConfig):
"""Query Redfish and update Prometheus metrics"""
async def discover_redfish_resources(
session, host: HostConfig
) -> RedfishResource | None:
"""Discover available Redfish resources and return relevant URLs"""
root_url = f"https://{host.fqdn}/redfish/v1/"
data = await fetch_with_retry(session, host, root_url)
if not data:
return {}
# Create RedfishRessource object
resources = RedfishResource(
chassis=data.get("Chassis", {}).get("@odata.id"),
systems=data.get("Systems", {}).get("@odata.id"),
session_service=data.get("SessionService", {}).get("@odata.id"),
)
if not resources.chassis:
logging.error("No valid Chassis URL found for host %s", host.fqdn)
return None
return resources
def get_power_resource_info(
member_data: dict, host_fqdn: str, show_deprecated_warnings
) -> tuple[str | None, str | None]:
"""Get the URL and type of Power resource (PowerSubsystem or Power)."""
# Try PowerSubsystem (new Redfish versions)
power_url = member_data.get("PowerSubsystem", {}).get("@odata.id")
if power_url:
return f"https://{host_fqdn}{power_url}", "PowerSubsystem"
# Try Power for older Redfish versions
power_url = member_data.get("Power", {}).get("@odata.id")
if power_url:
if show_deprecated_warnings:
logging.warning(
"DEPRECATED: Host %s uses old Redfish API (Power instead of PowerSubsystem). "
"Consider updating the firmware for full compatibility.",
host_fqdn,
)
return f"https://{host_fqdn}{power_url}", "Power"
# Nothing found -> Error
logging.error("No Power or PowerSubsystem found for host %s", host_fqdn)
return None, None
def process_power_supplies_url(
power_data: dict, power_resource_type: str, host_fqdn: str
) -> str | None:
"""Get the URL for PowerSupplies based on the Power resource type."""
if power_resource_type == "PowerSubsystem":
# Bei PowerSubsystem: PowerSupplies ist ein separates Objekt
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if power_supplies_url:
return f"https://{host_fqdn}{power_supplies_url}"
elif power_resource_type == "Power":
# Bei Power: PowerSupplies ist direkt im Power-Objekt enthalten
if "PowerSupplies" in power_data:
return f"https://{host_fqdn}/redfish/v1/Chassis/1/Power"
logging.error("No PowerSupplies found in Power resource for host %s", host_fqdn)
return None
def process_power_supplies(
power_data: dict,
power_resource_type: str,
) -> list[dict] | None:
"""Get PowerSupplies data based on the Power resource type."""
if power_resource_type == "PowerSubsystem":
# PowerSubsystem: PowerSupplies is a ressource with Members
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if not power_supplies_url:
logging.error("No PowerSupplies URL found for PowerSubsystem")
return None
return None # If none, then use the PowerSubsystem member url
elif power_resource_type == "Power":
# Power: PowerSupplies is an array!
return power_data.get("PowerSupplies", [])
logging.error("Unknown power resource type")
return None
async def process_power_supply(
session, host: HostConfig, psu_data: dict, power_resource_type: str
) -> PowerMetrics | None:
"""Extract metrics from PowerSupply"""
serial = psu_data.get("SerialNumber")
metrics = PowerMetrics(serial=serial)
if power_resource_type == "PowerSubsystem":
# New Redfish API: Metrics are an own "Metrics" ressource
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
if not metrics_url:
logging.warning("No Metrics found for PowerSupply %s", psu_data.get("Id"))
return None
metrics_url = f"https://{host.fqdn}{metrics_url}"
metrics_data = await fetch_with_retry(session, host, metrics_url)
if not metrics_data:
return None
# Get metrics from Metrics ressource
metrics.voltage = metrics_data.get("InputVoltage", {}).get("Reading")
metrics.watts = metrics_data.get("InputPowerWatts", {}).get("Reading")
metrics.amps = metrics_data.get("InputCurrentAmps", {}).get("Reading")
elif power_resource_type == "Power":
# Older Redfish API: Metrics are direct in PowerSupply as an array
metrics.voltage = psu_data.get("LineInputVoltage")
metrics.watts = psu_data.get("PowerInputWatts")
if metrics.watts is None:
metrics.watts = psu_data.get("LastPowerOutputWatts")
metrics.amps = psu_data.get("InputCurrentAmps")
if metrics.amps is None and metrics.voltage and metrics.watts:
metrics.amps = round(metrics.watts / metrics.voltage, 2)
else:
logging.error(
"Unknown power resource type for PowerSupply %s", psu_data.get("Id")
)
return None
return metrics
def normalize_url(url: str) -> str:
"""Ensure URL does not end with a trailing slash."""
# I needed this for realy old Redfish versions :S (<1.6.0)
if url.endswith("/"):
return url[:-1] # Remove trailing slash
return url
async def get_power_data(session, host: HostConfig, show_deprecated_warnings):
"""Query Redfish for power data and update Prometheus metrics"""
if host.should_skip():
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn, group=host.group).set(0)
return
url = f"https://{host.fqdn}/redfish/v1/Chassis/1/Power"
# Start time measurement
start = time.monotonic()
data = await fetch_with_retry(session, host, url)
if not data:
# Get root ressources
resources = await discover_redfish_resources(session, host)
if not resources or not resources.chassis:
logging.error("Could not discover any resources for %s", host.fqdn)
host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0)
UP_GAUGE.labels(host=host.fqdn, group=host.group).set(0)
return
host.mark_success()
up_gauge.labels(host=host.fqdn).set(1)
UP_GAUGE.labels(host=host.fqdn, group=host.group).set(1)
for psu in data.get("PowerSupplies", []):
line_input_v = psu.get("LineInputVoltage")
# HPE Redfish uses LastPowerOutputWatts for Watts
if host.vendor.strip().upper().startswith("HPE"):
watts_input = psu.get("LastPowerOutputWatts")
else:
# Supermicro uses PowerInputWatts
watts_input = psu.get("PowerInputWatts")
serial = psu.get("SerialNumber")
chassis_url = f"https://{host.fqdn}{resources.chassis}"
chassis_data = await fetch_with_retry(session, host, chassis_url)
if not chassis_data:
host.mark_failure()
UP_GAUGE.labels(host=host.fqdn, group=host.group).set(0)
return
amps = (
round(watts_input / line_input_v, 2)
if line_input_v and watts_input
else None
for chassis_member in chassis_data.get("Members", []):
chassis_member_url = chassis_member.get("@odata.id")
if not chassis_member_url:
continue
# Normalize URL... I needed this for realy old Redfish versions :S (<1.6.0)
chassis_member_url = normalize_url(chassis_member_url)
# Get chassis id from url ("/redfish/v1/Chassis/1" -> 1)
chassis_member_id = chassis_member_url.split("/")[-1]
# Check if the chassis id is in config (had problem with chassis "NVMe")
if hasattr(host, "chassis") and host.chassis:
if chassis_member_id not in host.chassis:
continue
member_url = f"https://{host.fqdn}{chassis_member_url}"
member_data = await fetch_with_retry(session, host, member_url)
if not member_data:
continue
# Get Power ressource (fallback to "Power")
power_resource_url, power_resource_type = get_power_resource_info(
member_data, host.fqdn, show_deprecated_warnings
)
if not power_resource_url:
continue
if line_input_v is not None:
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(line_input_v)
if watts_input is not None:
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
if amps is not None:
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps)
# Get Power Data
power_data = await fetch_with_retry(session, host, power_resource_url)
if not power_data:
continue
# Get PowerSupplies, depend on ressource type ("Power" or "PowerSubsystem")
if power_resource_type == "PowerSubsystem":
# Request PowerSupplies url (for PowerSubsystem)
power_supplies_url = power_data.get("PowerSupplies", {}).get("@odata.id")
if not power_supplies_url:
logging.warning("No PowerSupplies found for %s", host.fqdn)
continue
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
power_supplies_data = await fetch_with_retry(
session, host, power_supplies_url
)
if not power_supplies_data:
continue
# loop over Members for "PowerSubsystem"
for psu_member in power_supplies_data.get("Members", []):
psu_url = psu_member.get("@odata.id")
if not psu_url:
continue
psu_url = f"https://{host.fqdn}{psu_url}"
psu_data = await fetch_with_retry(session, host, psu_url)
if not psu_data:
continue
# Process PowerSupplies object
metrics = await process_power_supply(
session, host, psu_data, "PowerSubsystem"
)
if metrics:
update_prometheus_metrics(host, metrics)
elif power_resource_type == "Power":
# Loop over PowerSupplies for older Redfish versions
for psu in power_data.get("PowerSupplies", []):
# Process PowerSupplies object
metrics = await process_power_supply(session, host, psu, "Power")
if metrics:
update_prometheus_metrics(host, metrics)
else:
logging.error("Unknown power resource type for host %s", host.fqdn)
continue
# Measure request and process latency
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
def update_prometheus_metrics(host: HostConfig, metrics: PowerMetrics):
"""Update Prometheus metrics with PowerMetrics data."""
if metrics.voltage is not None and metrics.serial:
VOLTAGE_GAUGE.labels(
host=host.fqdn, psu_serial=metrics.serial, group=host.group
).set(metrics.voltage)
if metrics.watts is not None and metrics.serial:
WATTS_GAUGE.labels(
host=host.fqdn, psu_serial=metrics.serial, group=host.group
).set(metrics.watts)
if metrics.amps is not None and metrics.serial:
AMPS_GAUGE.labels(
host=host.fqdn, psu_serial=metrics.serial, group=host.group
).set(metrics.amps)
async def get_system_info(session, host: HostConfig):
"""Query Redfish for system data and update Prometheus metrics"""
if host.should_skip():
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
return
# Get Redfish Version
root_url = f"https://{host.fqdn}/redfish/v1/"
root_data = await fetch_with_retry(session, host, root_url)
if not root_data:
host.mark_failure()
return
redfish_version = root_data.get("RedfishVersion")
# Get Manufacturer, Serial and Model
systems_url = f"https://{host.fqdn}/redfish/v1/Systems/"
systems_data = await fetch_with_retry(session, host, systems_url)
if not systems_data:
host.mark_failure()
return
# loop for each system members
for system_member in systems_data.get("Members", []):
system_url = system_member.get("@odata.id")
if not system_url:
continue
system_data = await fetch_with_retry(
session, host, f"https://{host.fqdn}{system_url}"
)
if not system_data:
continue
manufacturer = system_data.get("Manufacturer")
if manufacturer is None:
manufacturer = "<no data>"
model = system_data.get("Model")
if model is None:
model = "<no data>"
serial_number = system_data.get("SerialNumber")
if serial_number is None:
serial_number = "<no data>"
# Hier könnte ihre Werbung stehen
SYSTEM_INFO.labels(host=host.fqdn, group=host.group).info(
{
"manufacturer": manufacturer,
"model": model,
"serial_number": serial_number,
"redfish_version": redfish_version,
}
)
async def logout_host(session, host):
"""Clean logout for Redfish with session tokens"""
if not host.session_token:
return
if not host.session_logout:
if not host.session.token or not host.session.logout_url:
return
try:
logout_url = f"{host.session_logout}" # the full URL is here!
logout_url = host.session.logout_url
async with session.delete(
logout_url,
headers={"X-Auth-Token": host.session_token},
headers={"X-Auth-Token": host.session.token},
ssl=False,
timeout=5,
) as resp:
@@ -273,14 +594,17 @@ async def logout_host(session, host):
except Exception as e:
logging.warning("Error during logout for %s: %s", host.fqdn, e)
finally:
host.session_token = None
host.session.token = None
host.session.logout_url = None
async def run_exporter(config, stop_event):
async def run_exporter(config, stop_event, show_deprecated_warnings):
"""Main loop"""
port = config.get("port", 8000)
default_username = config.get("username")
default_password = config.get("password")
default_chassis = config.get("chassis", "1")
default_group = config.get("group", "none")
hosts = config["hosts"]
interval = config.get("interval", 10)
@@ -296,10 +620,16 @@ async def run_exporter(config, stop_event):
fqdn=host_entry["fqdn"],
username=host_entry.get("username", default_username),
password=host_entry.get("password", default_password),
chassis=host_entry.get("chassis", default_chassis),
group=host_entry.get("group", default_group),
)
else:
hc = HostConfig(
fqdn=host_entry, username=default_username, password=default_password
fqdn=host_entry,
username=default_username,
password=default_password,
chassis=default_chassis,
group=default_group,
)
host_objs.append(hc)
@@ -308,14 +638,17 @@ async def run_exporter(config, stop_event):
async with aiohttp.ClientSession(connector=connector) as session:
try:
while not stop_event.is_set():
tasks = [get_power_data(session, hc) for hc in host_objs]
tasks = []
for hc in host_objs:
tasks.append(get_power_data(session, hc, show_deprecated_warnings))
tasks.append(get_system_info(session, hc))
await asyncio.gather(*tasks)
await process_request(interval)
finally:
# Graceful shutdown: logout from Redfish sessions
logging.info("Exporter stopping, logging out from Redfish sessions...")
await asyncio.gather(
*(logout_host(session, h) for h in host_objs if h.session_token)
*(logout_host(session, h) for h in host_objs if h.session.token)
)
logging.info("All sessions logged out.")
logging.info("Exporter stopped cleanly.")
@@ -323,14 +656,19 @@ async def run_exporter(config, stop_event):
async def main():
"""Modern asyncio entry point"""
parser = argparse.ArgumentParser(description="Redfish Prometheus Exporter")
parser.add_argument("--config", default="config.yaml", help="Path to config file")
parser.add_argument("--port", type=int, help="Override port from config file")
parser = argparse.ArgumentParser(description="Redfish Prometheus Exporter.")
parser.add_argument("--config", default="config.yaml", help="Path to config file.")
parser.add_argument("--port", type=int, help="Override port from config file.")
parser.add_argument(
"--interval", type=int, help="Override interval from config file"
"--interval", type=int, help="Override interval from config file."
)
parser.add_argument("--show-deprecated", action="store_true", help="Enable deprecated warnings in log.")
args = parser.parse_args()
show_deprecated_warnings = args.show_deprecated
if show_deprecated_warnings:
logging.warning("Deprecated warnings are enabled.")
# Load YAML config
with open(args.config, "r", encoding="utf-8") as file:
config = yaml.safe_load(file)
@@ -341,13 +679,14 @@ async def main():
if args.interval is not None:
config["interval"] = args.interval
stop_event = asyncio.Event()
loop = asyncio.get_running_loop()
# Handle SIGINT (Ctrl+C) and SIGTERM
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(sig, stop_event.set)
await run_exporter(config, stop_event)
await run_exporter(config, stop_event, show_deprecated_warnings)
if __name__ == "__main__":

View File

@@ -1,449 +0,0 @@
"""Simple Redfish exporter to collect Power data from bare matel server"""
import argparse
import signal
import time
import logging
from dataclasses import dataclass, field
import asyncio
import aiohttp
import urllib3
import yaml
import json
from prometheus_client import Gauge, start_http_server, Summary, Counter, Histogram
@dataclass
class HostConfig:
"""Solve too many arguments"""
fqdn: str
username: str
password: str
systemid: list[str] | None = None
max_retries: int = 1
backoff: int = 2
cool_down: int = 120 # seconds to wait after too many failures
failures: int = 0
next_retry_time: float = field(default=0.0, init=False)
# New attributes for Redfish stuff
vendor: str | None = None
session_token: str | None = None
session_logout: str | None = (
None # SessionLocation like /redfish/v1/SessionService/Sessions/marco.lucarelli%40abacus.ch00000000xxx/
)
def should_skip(self) -> bool:
"""Check if host is still in cool-down window"""
return time.monotonic() < self.next_retry_time
def mark_failure(self):
"""Increase failure counter and maybe trigger cool-down"""
self.failures += 1
if self.failures >= self.max_retries:
self.next_retry_time = time.monotonic() + self.cool_down
self.failures = 0 # reset after triggering cool-down
def mark_success(self):
"""Reset failure counter after a successful request"""
self.failures = 0
self.next_retry_time = 0.0
# Disable certificate warnings
urllib3.disable_warnings()
# set log config
logging.basicConfig(
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
)
# Prometheus metrics
REQUEST_TIME = Summary("request_processing_seconds", "Time spent processing request")
REQUEST_LATENCY = Histogram(
"redfish_request_latency_seconds", "Time for Redfish request", ["host"]
)
up_gauge = Gauge("redfish_up", "Host up/down", ["host"])
error_counter = Counter(
"redfish_errors_total", "Total Redfish errors", ["host", "error"]
)
voltage_gauge = Gauge(
"redfish_psu_line_input_voltage_volts",
"Line Input Voltage per PSU",
["host", "psu_serial"],
)
watts_gauge = Gauge(
"redfish_psu_power_input_watts", "Power Input Watts per PSU", ["host", "psu_serial"]
)
amps_gauge = Gauge(
"redfish_psu_input_amps", "Current draw in Amps per PSU", ["host", "psu_serial"]
)
@REQUEST_TIME.time()
async def process_request(t):
"""Simulate request time"""
await asyncio.sleep(t)
async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
"""Fetch JSON from Redfish with retry/backoff"""
if host.should_skip():
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
up_gauge.labels(host=host.fqdn).set(0)
return None
if not host.vendor:
try:
async with session.get(
f"https://{host.fqdn}/redfish/v1/", ssl=False, timeout=10
) as resp:
if resp.status == 200:
data = await resp.json()
host.vendor = data.get("Vendor", "")
logging.debug("Detected vendor for %s: %s", host.fqdn, host.vendor)
else:
logging.warning(
"Vendor probe failed on %s: HTTP %s", host.fqdn, resp.status
)
except Exception as e:
logging.warning("Vendor probe failed for %s: %s", host.fqdn, e)
is_hpe = host.vendor and host.vendor.strip().upper().startswith("HPE")
for attempt in range(1, host.max_retries + 1):
try:
headers = {}
if is_hpe:
# Try to reuse existing session token
if host.session_token:
headers["X-Auth-Token"] = host.session_token
logging.debug("Reusing cached session token for %s", host.fqdn)
else:
# Need to login and store new session token
# HPE Redfish login
login_url = (
f"https://{host.fqdn}/redfish/v1/SessionService/Sessions"
)
payload = {"UserName": host.username, "Password": host.password}
async with session.post(
login_url, json=payload, ssl=False, timeout=10
) as login_resp:
if login_resp.status == 201:
host.session_token = login_resp.headers.get(
"X-Auth-Token"
) # as response in header
if not host.session_token:
raise RuntimeError("No X-Auth-Token in login response")
host.session_logout = login_resp.headers.get(
"Location"
) # as response in header
if not host.session_logout:
raise RuntimeError("No Location in login response")
headers["X-Auth-Token"] = host.session_token
logging.info("New session token obtained for %s", host.fqdn)
else:
logging.warning(
"Login failed for %s: HTTP %s",
host.fqdn,
login_resp.status,
)
continue # retry login next attempt
async with session.get(
url, headers=headers, ssl=False, timeout=10
) as resp:
if resp.status == 200:
host.mark_success()
return await resp.json()
elif resp.status in (401, 403):
# Token expired or invalid, clear it and retry
logging.warning(
"Invalid token for %s, reauthenticating...", host.fqdn
)
host.session_token = None
continue
logging.warning(
"HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt
)
else:
# Default: BasicAuth, like Supermicro and so
async with session.get(
url,
auth=aiohttp.BasicAuth(host.username, host.password),
ssl=False,
timeout=10,
) as resp:
if resp.status == 200:
host.mark_success()
return await resp.json()
logging.warning(
"HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt
)
except asyncio.TimeoutError:
logging.warning("Timeout on %s (attempt %d)", host.fqdn, attempt)
except aiohttp.ClientError as e:
logging.warning(
"Client error on %s (attempt %d): %s", host.fqdn, attempt, e
)
except Exception as e:
logging.exception(
"Unexpected error on %s (attempt %d): %s", host.fqdn, attempt, e
)
if attempt < host.max_retries:
await asyncio.sleep(host.backoff * attempt)
else:
host.mark_failure()
logging.error("All retries failed for %s", host.fqdn)
return None
async def get_power_data(session, host: HostConfig):
"""Query Redfish and update Prometheus metrics"""
if host.should_skip():
logging.warning(
"Skipping %s (in cool-down until %.1f)", host.fqdn, host.next_retry_time
)
up_gauge.labels(host=host.fqdn).set(0)
return
# start time measurement
start = time.monotonic()
# Root ressource abfragen
resources = await discover_redfish_resources(session, host)
if not resources or "Chassis" not in resources:
logging.error("Could not discover Chassis resource for %s", host.fqdn)
host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0)
return
# Mark host as up
host.mark_success()
up_gauge.labels(host=host.fqdn).set(1)
# Chassis-Ressource abfragen
chassis_url = f"https://{host.fqdn}{resources['Chassis']}"
chassis_data = await fetch_with_retry(session, host, chassis_url)
if not chassis_data:
host.mark_failure()
up_gauge.labels(host=host.fqdn).set(0)
return
# 3. Power-Daten aus den Chassis-Mitgliedern extrahieren
for chassis_member in chassis_data.get("Members", []):
chassis_member_url = chassis_member.get("@odata.id")
if not chassis_member_url:
continue
# Get Chassis ID from url ("/redfish/v1/Chassis/1" -> 1)
chassis_id = chassis_member_url.split("/")[-1]
# Check if the chassis id is in config (had problem with chassis "NVMe")
if hasattr(host, 'systemid') and host.systemid:
if chassis_id not in host.systemid:
continue
member_url = f"https://{host.fqdn}{chassis_member_url}"
member_data = await fetch_with_retry(session, host, member_url)
if not member_data:
continue
# PowerSubsystem url
power_subsystem_url = member_data.get("PowerSubsystem", {}).get("@odata.id")
if not power_subsystem_url:
logging.warning("No PowerSubsystem found for %s", host.fqdn)
continue
# PowerSubsystem collection abfragen
power_subsystem_url = f"https://{host.fqdn}{power_subsystem_url}"
power_subsystem_data = await fetch_with_retry(
session, host, power_subsystem_url
)
if not power_subsystem_data:
logging.warning("No PowerSubsystem data found for %s", host.fqdn)
continue
# PowerSupplies auflisten
power_supplies_url = power_subsystem_data.get("PowerSupplies", {}).get(
"@odata.id"
)
if not power_supplies_url:
logging.warning("No PowerSupplies found for %s", host.fqdn)
continue
# PowerSupplies Members auflisten
power_supplies_url = f"https://{host.fqdn}{power_supplies_url}"
power_supplies_data = await fetch_with_retry(session, host, power_supplies_url)
if not power_supplies_data:
continue
# Loop over PowerSupply Members
for psu_member in power_supplies_data.get("Members", []):
psu_url = psu_member.get("@odata.id")
if not psu_url:
continue
psu_url = f"https://{host.fqdn}{psu_url}"
psu_data = await fetch_with_retry(session, host, psu_url)
if not psu_data:
continue
# Get Metrics URL
metrics_url = psu_data.get("Metrics", {}).get("@odata.id")
if not metrics_url:
logging.warning(
"No Metrics found for PowerSupply %s", psu_data.get("Id")
)
continue
metrics_url = f"https://{host.fqdn}{metrics_url}"
metrics_data = await fetch_with_retry(session, host, metrics_url)
if not metrics_data:
continue
# Get Metrics from data
line_input_v = metrics_data.get("InputVoltage", {}).get("Reading")
watts_input = metrics_data.get("InputPowerWatts", {}).get("Reading")
amps_input = metrics_data.get("InputCurrentAmps", {}).get("Reading")
serial = psu_data.get("SerialNumber")
# Calculate Amps
if line_input_v is not None:
voltage_gauge.labels(host=host.fqdn, psu_serial=serial).set(
line_input_v
)
if watts_input is not None:
watts_gauge.labels(host=host.fqdn, psu_serial=serial).set(watts_input)
if amps_input is not None:
amps_gauge.labels(host=host.fqdn, psu_serial=serial).set(amps_input)
REQUEST_LATENCY.labels(host=host.fqdn).observe(time.monotonic() - start)
async def logout_host(session, host):
"""Clean logout for Redfish with session tokens"""
if not host.session_token:
return
if not host.session_logout:
return
try:
logout_url = f"{host.session_logout}" # the full URL is here!
async with session.delete(
logout_url,
headers={"X-Auth-Token": host.session_token},
ssl=False,
timeout=5,
) as resp:
if resp.status in (200, 204):
logging.info("Logged out from %s", host.fqdn)
else:
logging.warning(
"Logout failed for %s (HTTP %s)", host.fqdn, resp.status
)
except Exception as e:
logging.warning("Error during logout for %s: %s", host.fqdn, e)
finally:
host.session_token = None
async def run_exporter(config, stop_event):
"""Main loop"""
port = config.get("port", 8000)
default_username = config.get("username")
default_password = config.get("password")
default_systemid = config.get("systemid")
hosts = config["hosts"]
interval = config.get("interval", 10)
# Start Prometheus metrics server
start_http_server(port)
logging.info("Prometheus metrics server running on port %s", port)
# create persistent HostConfig objects
host_objs = []
for host_entry in hosts:
if isinstance(host_entry, dict):
hc = HostConfig(
fqdn=host_entry["fqdn"],
username=host_entry.get("username", default_username),
password=host_entry.get("password", default_password),
systemid=host_entry.get("systemid", default_systemid),
)
else:
hc = HostConfig(
fqdn=host_entry, username=default_username, password=default_password
)
host_objs.append(hc)
# Connection pooling with aiohttp
connector = aiohttp.TCPConnector(limit_per_host=5, limit=50, ttl_dns_cache=300)
async with aiohttp.ClientSession(connector=connector) as session:
try:
while not stop_event.is_set():
tasks = [get_power_data(session, hc) for hc in host_objs]
await asyncio.gather(*tasks)
await process_request(interval)
finally:
# Graceful shutdown: logout from Redfish sessions
logging.info("Exporter stopping, logging out from Redfish sessions...")
await asyncio.gather(
*(logout_host(session, h) for h in host_objs if h.session_token)
)
logging.info("All sessions logged out.")
logging.info("Exporter stopped cleanly.")
# ab hier neu
# Marco Lucarelli 2026-01-29
async def discover_redfish_resources(session, host: HostConfig) -> dict:
"""Discover available Redfish resources and return relevant URLs"""
root_url = f"https://{host.fqdn}/redfish/v1/"
data = await fetch_with_retry(session, host, root_url)
if not data:
return {}
# Extrahiere Links aus der Root-Antwort
links = {
"Chassis": data.get("Chassis", {}).get("@odata.id"),
"Systems": data.get("Systems", {}).get("@odata.id"),
"SessionService": data.get("SessionService", {}).get("@odata.id"),
}
return links
async def main():
"""Modern asyncio entry point"""
parser = argparse.ArgumentParser(description="Redfish Prometheus Exporter")
parser.add_argument("--config", default="config.yaml", help="Path to config file")
parser.add_argument("--port", type=int, help="Override port from config file")
parser.add_argument(
"--interval", type=int, help="Override interval from config file"
)
args = parser.parse_args()
# Load YAML config
with open(args.config, "r", encoding="utf-8") as file:
config = yaml.safe_load(file)
# Override port if argument is provided
if args.port is not None:
config["port"] = args.port
if args.interval is not None:
config["interval"] = args.interval
stop_event = asyncio.Event()
loop = asyncio.get_running_loop()
# Handle SIGINT (Ctrl+C) and SIGTERM
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(sig, stop_event.set)
await run_exporter(config, stop_event)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,6 +0,0 @@
prometheus-client==0.23.1
requests==2.32.5
urllib3==2.5.0
aiohttp==3.12.15
asyncio==4.0.0
PyYAML==6.0.2

View File

@@ -507,8 +507,8 @@ wheels = [
]
[[package]]
name = "redfish-api"
version = "0.1.0"
name = "redfish-exporter"
version = "1.0.0"
source = { virtual = "." }
dependencies = [
{ name = "aiohttp" },