Merakit-Deploy/wordpress/wordpress_deployer/orchestrator.py

627 lines
20 KiB
Python

"""
Deployment orchestration module
Main deployment workflow with rollback tracking and execution
"""
import logging
import shutil
import time
from dataclasses import asdict, dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
from .config import DeploymentConfig
from .deployment_config_manager import DeploymentConfigManager, DeploymentMetadata
from .deployment_logger import DeploymentFileLogger
from .dns_manager import DNSError, DNSManager, DNSRecord
from .docker_manager import DockerError, DockerManager
from .env_generator import EnvFileGenerator, EnvValues, PasswordGenerator, WordGenerator
from .health import HealthCheckError, HealthChecker
from .webhooks import WebhookNotifier
logger = logging.getLogger(__name__)
class DeploymentError(Exception):
"""Base exception for deployment errors"""
pass
class ValidationError(DeploymentError):
"""Validation failed"""
pass
@dataclass
class DeploymentAction:
"""Represents a single deployment action"""
action_type: str # 'dns_added', 'containers_started', 'env_updated'
timestamp: datetime
details: Dict[str, Any]
rollback_data: Dict[str, Any]
class DeploymentTracker:
"""Track deployment actions for rollback"""
def __init__(self):
"""Initialize deployment tracker"""
self._actions: List[DeploymentAction] = []
self._logger = logging.getLogger(f"{__name__}.DeploymentTracker")
def record_action(self, action: DeploymentAction) -> None:
"""
Record a deployment action
Args:
action: DeploymentAction to record
"""
self._actions.append(action)
self._logger.debug(f"Recorded action: {action.action_type}")
def get_actions(self) -> List[DeploymentAction]:
"""
Get all recorded actions
Returns:
List of DeploymentAction objects
"""
return self._actions.copy()
def clear(self) -> None:
"""Clear tracking history"""
self._actions.clear()
self._logger.debug("Cleared action history")
class DeploymentOrchestrator:
"""Main orchestrator coordinating all deployment steps"""
def __init__(self, config: DeploymentConfig):
"""
Initialize deployment orchestrator
Args:
config: DeploymentConfig instance
"""
self._config = config
self._logger = logging.getLogger(f"{__name__}.DeploymentOrchestrator")
# Initialize components
self._word_generator = WordGenerator(config.dict_file)
self._password_generator = PasswordGenerator(self._word_generator)
self._env_generator = EnvFileGenerator(
config.env_file,
self._word_generator,
self._password_generator,
config.base_domain,
config.app_name
)
self._dns_manager = DNSManager(
config.cloudflare_api_token,
config.cloudflare_zone_id
)
self._docker_manager = DockerManager(
config.docker_compose_file,
config.env_file
)
self._webhook_notifier = WebhookNotifier(
config.webhook_url,
config.webhook_timeout,
config.webhook_retries
)
self._health_checker = HealthChecker(
config.healthcheck_timeout,
config.healthcheck_interval,
config.verify_ssl
)
self._tracker = DeploymentTracker()
self._deployment_logger = DeploymentFileLogger()
self._config_manager = DeploymentConfigManager()
def deploy(self) -> None:
"""
Main deployment workflow
Raises:
DeploymentError: If deployment fails
"""
start_time = time.time()
env_values = None
dns_record_id = None
dns_ip = None
containers = []
try:
# Phase 1: Validation
self._phase_validate()
# Phase 2: Environment Generation (with retry on DNS conflicts)
env_values = self._phase_generate_env_with_retries()
# Send deployment_started webhook
self._webhook_notifier.deployment_started(
env_values.subdomain,
env_values.url
)
# Phase 3: DNS Setup
dns_record_id, dns_ip = self._phase_setup_dns(env_values)
# Phase 4: Container Deployment
containers = self._phase_deploy_containers()
# Phase 5: Health Check
self._phase_health_check(env_values.url)
# Success
duration = time.time() - start_time
self._webhook_notifier.deployment_success(
env_values.subdomain,
env_values.url,
duration
)
self._logger.info(
f"✓ Deployment successful! URL: https://{env_values.url} "
f"(took {duration:.1f}s)"
)
# Log success to file
self._deployment_logger.log_success(
env_values.url,
env_values.subdomain,
duration
)
# Save deployment configuration
self._save_deployment_config(
env_values,
dns_record_id,
dns_ip,
containers
)
except Exception as e:
self._logger.error(f"✗ Deployment failed: {e}")
# Send failure webhook
if env_values:
self._webhook_notifier.deployment_failed(
env_values.subdomain,
str(e),
env_values.url
)
else:
self._webhook_notifier.deployment_failed("", str(e), "")
# Log failure to file
if env_values:
self._deployment_logger.log_failure(
env_values.url,
env_values.subdomain,
str(e)
)
else:
self._deployment_logger.log_failure(
"",
"",
str(e)
)
# Rollback
self._logger.info("Starting rollback...")
self._rollback_all()
raise DeploymentError(f"Deployment failed: {e}") from e
def _phase_validate(self) -> None:
"""
Phase 1: Pre-deployment validation
Raises:
ValidationError: If validation fails
"""
self._logger.info("═══ Phase 1: Validation ═══")
# Check system dependencies
self._validate_dependencies()
# Validate environment file
if not self._config.env_file.exists():
raise ValidationError(f"Env file not found: {self._config.env_file}")
# Validate Docker Compose file
try:
self._docker_manager.validate_compose_file()
except DockerError as e:
raise ValidationError(f"Invalid docker-compose.yml: {e}") from e
# Check external Docker network exists
self._validate_docker_network("proxy")
self._logger.info("✓ Validation complete")
def _validate_dependencies(self) -> None:
"""
Validate system dependencies
Raises:
ValidationError: If dependencies are missing
"""
import shutil as sh
required_commands = ["docker", "curl"]
for cmd in required_commands:
if not sh.which(cmd):
raise ValidationError(
f"Required command not found: {cmd}. "
f"Please install {cmd} and try again."
)
# Check Docker daemon is running
try:
import subprocess
result = subprocess.run(
["docker", "info"],
capture_output=True,
timeout=5
)
if result.returncode != 0:
raise ValidationError(
"Docker daemon is not running. Please start Docker."
)
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
raise ValidationError(f"Failed to check Docker daemon: {e}") from e
def _validate_docker_network(self, network_name: str) -> None:
"""
Check external Docker network exists
Args:
network_name: Network name to check
Raises:
ValidationError: If network doesn't exist
"""
import subprocess
try:
result = subprocess.run(
["docker", "network", "inspect", network_name],
capture_output=True,
timeout=5
)
if result.returncode != 0:
raise ValidationError(
f"Docker network '{network_name}' not found. "
f"Please create it with: docker network create {network_name}"
)
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
raise ValidationError(
f"Failed to check Docker network: {e}"
) from e
def _phase_generate_env_with_retries(self) -> EnvValues:
"""
Phase 2: Generate environment with DNS conflict retry
Returns:
EnvValues with generated values
Raises:
DeploymentError: If unable to generate unique subdomain
"""
self._logger.info("═══ Phase 2: Environment Generation ═══")
for attempt in range(1, self._config.max_retries + 1):
# Generate new values
env_values = self._env_generator.generate_values()
self._logger.info(f"Generated subdomain: {env_values.subdomain}")
# Check DNS conflict
try:
if not self._dns_manager.check_record_exists(env_values.url):
# No conflict, proceed
self._logger.info(f"✓ Subdomain available: {env_values.subdomain}")
# Create backup
backup_path = self._env_generator.backup_env_file()
# Update .env file
self._env_generator.update_env_file(
env_values,
dry_run=self._config.dry_run
)
# Track for rollback
self._tracker.record_action(DeploymentAction(
action_type="env_updated",
timestamp=datetime.now(),
details={"env_values": asdict(env_values)},
rollback_data={"backup_path": str(backup_path)}
))
return env_values
else:
self._logger.warning(
f"✗ DNS conflict for {env_values.url}, "
f"regenerating... (attempt {attempt}/{self._config.max_retries})"
)
except DNSError as e:
self._logger.warning(
f"DNS check failed: {e}. "
f"Assuming no conflict and proceeding..."
)
# If DNS check fails, proceed anyway (fail open)
backup_path = self._env_generator.backup_env_file()
self._env_generator.update_env_file(
env_values,
dry_run=self._config.dry_run
)
self._tracker.record_action(DeploymentAction(
action_type="env_updated",
timestamp=datetime.now(),
details={"env_values": asdict(env_values)},
rollback_data={"backup_path": str(backup_path)}
))
return env_values
raise DeploymentError(
f"Failed to generate unique subdomain after {self._config.max_retries} attempts"
)
def _phase_setup_dns(self, env_values: EnvValues) -> tuple:
"""
Phase 3: Add DNS record
Args:
env_values: EnvValues with subdomain and URL
Returns:
Tuple of (record_id, ip)
Raises:
DNSError: If DNS setup fails
"""
self._logger.info("═══ Phase 3: DNS Setup ═══")
# Get public IP
ip = self._dns_manager.get_public_ip()
self._logger.info(f"Public IP: {ip}")
# Add DNS record
dns_record = self._dns_manager.add_record(
env_values.url,
ip,
dry_run=self._config.dry_run
)
self._logger.info(f"✓ DNS record added: {env_values.url} -> {ip}")
# Track for rollback
self._tracker.record_action(DeploymentAction(
action_type="dns_added",
timestamp=datetime.now(),
details={"hostname": env_values.url, "ip": ip},
rollback_data={"record_id": dns_record.record_id}
))
# Send webhook notification
self._webhook_notifier.dns_added(env_values.url, ip)
return dns_record.record_id, ip
def _phase_deploy_containers(self) -> List:
"""
Phase 4: Start Docker containers
Returns:
List of container information
Raises:
DockerError: If container deployment fails
"""
self._logger.info("═══ Phase 4: Container Deployment ═══")
# Pull images
self._logger.info("Pulling Docker images...")
self._docker_manager.pull_images(dry_run=self._config.dry_run)
# Start services
self._logger.info("Starting Docker services...")
containers = self._docker_manager.start_services(
dry_run=self._config.dry_run
)
self._logger.info(
f"✓ Docker services started: {len(containers)} containers"
)
# Track for rollback
self._tracker.record_action(DeploymentAction(
action_type="containers_started",
timestamp=datetime.now(),
details={"containers": [asdict(c) for c in containers]},
rollback_data={}
))
return containers
def _phase_health_check(self, url: str) -> None:
"""
Phase 5: Health check
Args:
url: URL to check (without https://)
Raises:
HealthCheckError: If health check fails
"""
self._logger.info("═══ Phase 5: Health Check ═══")
health_url = f"https://{url}"
start_time = time.time()
if not self._health_checker.check_health(
health_url,
dry_run=self._config.dry_run
):
raise HealthCheckError(f"Health check failed for {health_url}")
duration = time.time() - start_time
self._logger.info(f"✓ Health check passed (took {duration:.1f}s)")
# Send webhook notification
self._webhook_notifier.health_check_passed(url, duration)
def _rollback_all(self) -> None:
"""Rollback all tracked actions in reverse order"""
actions = list(reversed(self._tracker.get_actions()))
if not actions:
self._logger.info("No actions to rollback")
return
self._logger.info(f"Rolling back {len(actions)} actions...")
for action in actions:
try:
self._rollback_action(action)
except Exception as e:
# Log but don't fail rollback
self._logger.error(
f"Failed to rollback action {action.action_type}: {e}"
)
self._logger.info("Rollback complete")
def _rollback_action(self, action: DeploymentAction) -> None:
"""
Rollback single action based on type
Args:
action: DeploymentAction to rollback
"""
if action.action_type == "dns_added":
self._rollback_dns(action)
elif action.action_type == "containers_started":
self._rollback_containers(action)
elif action.action_type == "env_updated":
self._rollback_env(action)
else:
self._logger.warning(f"Unknown action type: {action.action_type}")
def _rollback_dns(self, action: DeploymentAction) -> None:
"""
Rollback DNS changes
Args:
action: DeploymentAction with DNS details
"""
record_id = action.rollback_data.get("record_id")
if record_id:
self._logger.info(f"Rolling back DNS record: {record_id}")
try:
self._dns_manager.remove_record_by_id(
record_id,
dry_run=self._config.dry_run
)
self._logger.info("✓ DNS record removed")
except DNSError as e:
self._logger.error(f"Failed to remove DNS record: {e}")
def _rollback_containers(self, action: DeploymentAction) -> None:
"""
Stop and remove containers
Args:
action: DeploymentAction with container details
"""
self._logger.info("Rolling back Docker containers")
try:
self._docker_manager.stop_services(dry_run=self._config.dry_run)
self._logger.info("✓ Docker services stopped")
except DockerError as e:
self._logger.error(f"Failed to stop Docker services: {e}")
def _rollback_env(self, action: DeploymentAction) -> None:
"""
Restore .env file from backup
Args:
action: DeploymentAction with backup path
"""
backup_path_str = action.rollback_data.get("backup_path")
if backup_path_str:
backup_path = Path(backup_path_str)
if backup_path.exists():
self._logger.info(f"Rolling back .env file from {backup_path}")
try:
self._env_generator.restore_env_file(backup_path)
self._logger.info("✓ .env file restored")
except Exception as e:
self._logger.error(f"Failed to restore .env file: {e}")
else:
self._logger.warning(f"Backup file not found: {backup_path}")
def _save_deployment_config(
self,
env_values: EnvValues,
dns_record_id: str,
dns_ip: str,
containers: List
) -> None:
"""
Save deployment configuration for later cleanup
Args:
env_values: EnvValues with deployment info
dns_record_id: Cloudflare DNS record ID
dns_ip: IP address used in DNS
containers: List of container information
"""
try:
# Extract container names, volumes, and networks
container_names = [c.name for c in containers if hasattr(c, 'name')]
# Get volumes and networks from docker-compose
volumes = [
f"{env_values.compose_project_name}_db_data",
f"{env_values.compose_project_name}_wp_data"
]
networks = [
f"{env_values.compose_project_name}_internal"
]
# Create metadata
metadata = DeploymentMetadata(
subdomain=env_values.subdomain,
url=env_values.url,
domain=env_values.domain,
compose_project_name=env_values.compose_project_name,
db_name=env_values.db_name,
db_user=env_values.db_user,
deployment_timestamp=datetime.now().isoformat(),
dns_record_id=dns_record_id,
dns_ip=dns_ip,
containers=container_names,
volumes=volumes,
networks=networks,
env_file_path=str(self._config.env_file.absolute())
)
# Save configuration
config_path = self._config_manager.save_deployment(metadata)
self._logger.info(f"✓ Deployment config saved: {config_path}")
except Exception as e:
self._logger.warning(f"Failed to save deployment config: {e}")