129 lines
3.6 KiB
Python
129 lines
3.6 KiB
Python
"""
|
|
Health check module
|
|
|
|
HTTP health checking with retry logic and progress indicators
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
|
|
import requests
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class HealthCheckError(Exception):
|
|
"""Raised when health check fails"""
|
|
pass
|
|
|
|
|
|
class HealthChecker:
|
|
"""HTTP health check with retry logic"""
|
|
|
|
def __init__(
|
|
self,
|
|
timeout: int,
|
|
interval: int,
|
|
verify_ssl: bool
|
|
):
|
|
"""
|
|
Initialize health checker
|
|
|
|
Args:
|
|
timeout: Total timeout in seconds
|
|
interval: Check interval in seconds
|
|
verify_ssl: Whether to verify SSL certificates
|
|
"""
|
|
self._timeout = timeout
|
|
self._interval = interval
|
|
self._verify_ssl = verify_ssl
|
|
self._logger = logging.getLogger(f"{__name__}.HealthChecker")
|
|
|
|
def check_health(self, url: str, dry_run: bool = False) -> bool:
|
|
"""
|
|
Perform health check with retries
|
|
|
|
Args:
|
|
url: URL to check (e.g., https://example.com)
|
|
dry_run: If True, only log what would be done
|
|
|
|
Returns:
|
|
True if health check passed, False otherwise
|
|
"""
|
|
if dry_run:
|
|
self._logger.info(f"[DRY-RUN] Would check health of {url}")
|
|
return True
|
|
|
|
self._logger.info(
|
|
f"Checking health of {url} for up to {self._timeout} seconds"
|
|
)
|
|
|
|
start_time = time.time()
|
|
attempt = 0
|
|
|
|
while True:
|
|
attempt += 1
|
|
elapsed = time.time() - start_time
|
|
|
|
if elapsed > self._timeout:
|
|
self._logger.error(
|
|
f"Health check timed out after {elapsed:.1f} seconds "
|
|
f"({attempt} attempts)"
|
|
)
|
|
return False
|
|
|
|
# Perform single check
|
|
if self._single_check(url):
|
|
self._logger.info(
|
|
f"Health check passed after {elapsed:.1f} seconds "
|
|
f"({attempt} attempts)"
|
|
)
|
|
return True
|
|
|
|
# Wait before next attempt
|
|
remaining = self._timeout - elapsed
|
|
if remaining > 0:
|
|
wait_time = min(self._interval, remaining)
|
|
self._logger.debug(
|
|
f"Attempt {attempt} failed, retrying in {wait_time:.1f}s "
|
|
f"(elapsed: {elapsed:.1f}s, timeout: {self._timeout}s)"
|
|
)
|
|
time.sleep(wait_time)
|
|
else:
|
|
# No time remaining
|
|
self._logger.error(f"Health check timed out after {attempt} attempts")
|
|
return False
|
|
|
|
def _single_check(self, url: str) -> bool:
|
|
"""
|
|
Single health check attempt
|
|
|
|
Args:
|
|
url: URL to check
|
|
|
|
Returns:
|
|
True if valid HTTP response (2xx or 3xx) received, False otherwise
|
|
"""
|
|
try:
|
|
response = requests.get(
|
|
url,
|
|
timeout=5,
|
|
verify=self._verify_ssl,
|
|
allow_redirects=True
|
|
)
|
|
|
|
# Accept any 2xx or 3xx status code as valid
|
|
if 200 <= response.status_code < 400:
|
|
self._logger.debug(f"Health check successful: HTTP {response.status_code}")
|
|
return True
|
|
else:
|
|
self._logger.debug(
|
|
f"Health check failed: HTTP {response.status_code}"
|
|
)
|
|
return False
|
|
|
|
except requests.RequestException as e:
|
|
self._logger.debug(f"Health check failed: {type(e).__name__}: {e}")
|
|
return False
|