From 8f52afc1748a90896bf7747d7730b4754579bddd Mon Sep 17 00:00:00 2001 From: Marco Lucarelli Date: Thu, 13 Nov 2025 12:43:55 +0100 Subject: [PATCH] detect vendor (hpe), cache token per host, reuse token --- python/redfish-api/redfish_exporter.py | 79 ++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/python/redfish-api/redfish_exporter.py b/python/redfish-api/redfish_exporter.py index 89a6430..c710494 100644 --- a/python/redfish-api/redfish_exporter.py +++ b/python/redfish-api/redfish_exporter.py @@ -25,6 +25,11 @@ class HostConfig: failures: int = 0 next_retry_time: float = field(default=0.0, init=False) + # New attributes for Redfish stuff + vendor: str | None = None + session_token: str | None = None + token_expiry = float | None = None + def should_skip(self) -> bool: """Check if host is still in cool-down window""" return time.monotonic() < self.next_retry_time @@ -86,20 +91,70 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None: up_gauge.labels(host=host.fqdn).set(0) return None + if not host.vendor: + try: + async with session.get(f"https://{host.fqdn}/redfish/v1/", ssl=False, timeout=10) as resp: + if resp.status == 200: + data = await resp.json() + host.vendor = data.get("Vendor", "") + logging.debug("Detected vendor for %s: %s", host.fqdn, host.vendor) + else: + logging.warning("Vendor probe failed on %s: HTTP %s", host.fqdn, resp.status) + except Exception as e: + logging.warning("Vendor probe failed for %s: %s", host.fqdn, e) + + is_hpe = host.vendor and host.vendor.strip().upper().startswith("HPE") + for attempt in range(1, host.max_retries + 1): try: - async with session.get( - url, - auth=aiohttp.BasicAuth(host.username, host.password), - ssl=False, - timeout=10, - ) as resp: - if resp.status == 200: - host.mark_success() - return await resp.json() - logging.warning( - "HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt - ) + headers = {} + + if is_hpe: + # Try to reuse existing session token + if host.session_token: + headers["X-Auth-Token"] = host.session_token + logging.debug("Reusing cached session token for %s", host.fqdn) + else: + # Need to login and store new session token + # HPE Redfish login + login_url = f"https://{host.fqdn}/redfish/v1/SessionService/Sessions" + payload = {"UserName": host.username, "Password": host.password} + async with session.post(login_url, json=payload, ssl=False, timeout=10) as login_resp: + if login_resp.status == 201: + host.session_token = login_resp.headers.get("X-Auth-Token") + if not host.session_token: + raise RuntimeError("No X-Auth-Token in login response") + headers["X-Auth-Token"] = host.session_token + logging.info("New session token obtained for %s", host.fqdn) + else: + logging.warning("Login failed for %s: HTTP %s", host.fqdn, login_resp.status) + continue # retry login next attempt + + async with session.get(url, headers=headers, ssl=False, timeout=10) as resp: + if resp.status == 200: + host.mark_success() + return await resp.json() + elif resp.status in (401, 403): + # Token expired or invalid, clear it and retry + logging.warning("Invalid token for %s, reauthenticating...", host.fqdn) + host.session_token = None + continue + logging.warning("HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt) + + else: + # Default: BasicAuth, like Supermicro and so + async with session.get( + url, + auth=aiohttp.BasicAuth(host.username, host.password), + ssl=False, + timeout=10, + ) as resp: + if resp.status == 200: + host.mark_success() + return await resp.json() + logging.warning( + "HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt + ) except asyncio.TimeoutError: logging.warning("Timeout on %s (attempt %d)", host.fqdn, attempt)