detect vendor (hpe), cache token per host, reuse token

This commit is contained in:
2025-11-13 12:43:55 +01:00
parent 5e68842356
commit 8f52afc174

View File

@@ -25,6 +25,11 @@ class HostConfig:
failures: int = 0
next_retry_time: float = field(default=0.0, init=False)
# New attributes for Redfish stuff
vendor: str | None = None
session_token: str | None = None
token_expiry = float | None = None
def should_skip(self) -> bool:
"""Check if host is still in cool-down window"""
return time.monotonic() < self.next_retry_time
@@ -86,20 +91,70 @@ async def fetch_with_retry(session, host: HostConfig, url: str) -> dict | None:
up_gauge.labels(host=host.fqdn).set(0)
return None
if not host.vendor:
try:
async with session.get(f"https://{host.fqdn}/redfish/v1/", ssl=False, timeout=10) as resp:
if resp.status == 200:
data = await resp.json()
host.vendor = data.get("Vendor", "")
logging.debug("Detected vendor for %s: %s", host.fqdn, host.vendor)
else:
logging.warning("Vendor probe failed on %s: HTTP %s", host.fqdn, resp.status)
except Exception as e:
logging.warning("Vendor probe failed for %s: %s", host.fqdn, e)
is_hpe = host.vendor and host.vendor.strip().upper().startswith("HPE")
for attempt in range(1, host.max_retries + 1):
try:
async with session.get(
url,
auth=aiohttp.BasicAuth(host.username, host.password),
ssl=False,
timeout=10,
) as resp:
if resp.status == 200:
host.mark_success()
return await resp.json()
logging.warning(
"HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt
)
headers = {}
if is_hpe:
# Try to reuse existing session token
if host.session_token:
headers["X-Auth-Token"] = host.session_token
logging.debug("Reusing cached session token for %s", host.fqdn)
else:
# Need to login and store new session token
# HPE Redfish login
login_url = f"https://{host.fqdn}/redfish/v1/SessionService/Sessions"
payload = {"UserName": host.username, "Password": host.password}
async with session.post(login_url, json=payload, ssl=False, timeout=10) as login_resp:
if login_resp.status == 201:
host.session_token = login_resp.headers.get("X-Auth-Token")
if not host.session_token:
raise RuntimeError("No X-Auth-Token in login response")
headers["X-Auth-Token"] = host.session_token
logging.info("New session token obtained for %s", host.fqdn)
else:
logging.warning("Login failed for %s: HTTP %s", host.fqdn, login_resp.status)
continue # retry login next attempt
async with session.get(url, headers=headers, ssl=False, timeout=10) as resp:
if resp.status == 200:
host.mark_success()
return await resp.json()
elif resp.status in (401, 403):
# Token expired or invalid, clear it and retry
logging.warning("Invalid token for %s, reauthenticating...", host.fqdn)
host.session_token = None
continue
logging.warning("HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt)
else:
# Default: BasicAuth, like Supermicro and so
async with session.get(
url,
auth=aiohttp.BasicAuth(host.username, host.password),
ssl=False,
timeout=10,
) as resp:
if resp.status == 200:
host.mark_success()
return await resp.json()
logging.warning(
"HTTP %s from %s (attempt %d)", resp.status, host.fqdn, attempt
)
except asyncio.TimeoutError:
logging.warning("Timeout on %s (attempt %d)", host.fqdn, attempt)