Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

brokkr-agent::deployment_health Rust

Deployment Health Checker Module

Monitors the health of deployed Kubernetes resources and reports status to the broker. Detects common issues like ImagePullBackOff, CrashLoopBackOff, OOMKilled, and other problematic conditions.

Structs

brokkr-agent::deployment_health::DeploymentHealthStatus

pub

Derives: Debug, Clone, Serialize, Deserialize

Health status for a deployment object

Fields

NameTypeDescription
idUuidThe deployment object ID
statusStringOverall health status: healthy, degraded, failing, unknown
summaryHealthSummaryStructured health summary
checked_atDateTime < Utc >When the health was checked

brokkr-agent::deployment_health::HealthSummary

pub

Derives: Debug, Clone, Default, Serialize, Deserialize

Summary of health information for a deployment

Fields

NameTypeDescription
pods_readyusizeNumber of pods in ready state
pods_totalusizeTotal number of pods
conditionsVec < String >List of detected problematic conditions
resourcesVec < ResourceHealth >Per-resource health details

brokkr-agent::deployment_health::ResourceHealth

pub

Derives: Debug, Clone, Serialize, Deserialize

Health status of an individual resource

Fields

NameTypeDescription
kindStringKind of the resource (e.g., “Pod”, “Deployment”)
nameStringName of the resource
namespaceStringNamespace of the resource
readyboolWhether the resource is ready
messageOption < String >Human-readable status message

brokkr-agent::deployment_health::HealthChecker

pub

Checks deployment health for Kubernetes resources

Fields

NameTypeDescription
k8s_clientClient

Methods

new pub
#![allow(unused)]
fn main() {
fn new (k8s_client : Client) -> Self
}

Creates a new HealthChecker instance

Source
#![allow(unused)]
fn main() {
    pub fn new(k8s_client: Client) -> Self {
        Self { k8s_client }
    }
}
check_deployment_object pub

async

#![allow(unused)]
fn main() {
async fn check_deployment_object (& self , deployment_object_id : Uuid ,) -> Result < DeploymentHealthStatus , Box < dyn std :: error :: Error + Send + Sync > >
}

Checks the health of a specific deployment object by ID

Finds all pods labeled with the deployment object ID and analyzes their status to determine overall health.

Source
#![allow(unused)]
fn main() {
    pub async fn check_deployment_object(
        &self,
        deployment_object_id: Uuid,
    ) -> Result<DeploymentHealthStatus, Box<dyn std::error::Error + Send + Sync>> {
        let checked_at = Utc::now();

        // Find pods matching this deployment object
        let pods = self.find_pods_for_deployment(deployment_object_id).await?;

        let mut summary = HealthSummary::default();
        let mut overall_status = "healthy";
        let mut conditions_set: std::collections::HashSet<String> =
            std::collections::HashSet::new();

        summary.pods_total = pods.len();

        for pod in &pods {
            let pod_name = pod.metadata.name.clone().unwrap_or_default();
            let pod_namespace = pod.metadata.namespace.clone().unwrap_or_default();

            // Check if pod is ready
            let pod_ready = is_pod_ready(pod);
            if pod_ready {
                summary.pods_ready += 1;
            }

            // Analyze pod status for issues
            if let Some(pod_status) = &pod.status {
                // Check container statuses for waiting/terminated issues
                if let Some(container_statuses) = &pod_status.container_statuses {
                    for cs in container_statuses {
                        if let Some(state) = &cs.state {
                            // Check waiting state
                            if let Some(waiting) = &state.waiting {
                                if let Some(reason) = &waiting.reason {
                                    if DEGRADED_CONDITIONS.contains(&reason.as_str()) {
                                        conditions_set.insert(reason.clone());
                                        overall_status = "degraded";

                                        summary.resources.push(ResourceHealth {
                                            kind: "Pod".to_string(),
                                            name: pod_name.clone(),
                                            namespace: pod_namespace.clone(),
                                            ready: false,
                                            message: waiting.message.clone(),
                                        });
                                    }
                                }
                            }

                            // Check terminated state for issues
                            if let Some(terminated) = &state.terminated {
                                if let Some(reason) = &terminated.reason {
                                    if TERMINATED_ISSUES.contains(&reason.as_str()) {
                                        conditions_set.insert(reason.clone());
                                        overall_status = "degraded";
                                    }
                                }
                            }
                        }

                        // Check last terminated state for recent crashes
                        if let Some(last_state) = &cs.last_state {
                            if let Some(terminated) = &last_state.terminated {
                                if let Some(reason) = &terminated.reason {
                                    if reason == "OOMKilled" {
                                        conditions_set.insert("OOMKilled".to_string());
                                        overall_status = "degraded";
                                    }
                                }
                            }
                        }
                    }
                }

                // Check init container statuses
                if let Some(init_statuses) = &pod_status.init_container_statuses {
                    for cs in init_statuses {
                        if let Some(state) = &cs.state {
                            if let Some(waiting) = &state.waiting {
                                if let Some(reason) = &waiting.reason {
                                    if DEGRADED_CONDITIONS.contains(&reason.as_str()) {
                                        conditions_set.insert(format!("InitContainer:{}", reason));
                                        overall_status = "degraded";
                                    }
                                }
                            }
                        }
                    }
                }

                // Check pod phase
                if let Some(phase) = &pod_status.phase {
                    match phase.as_str() {
                        "Failed" => {
                            overall_status = "failing";
                            conditions_set.insert("PodFailed".to_string());
                        }
                        "Unknown" => {
                            if overall_status != "failing" && overall_status != "degraded" {
                                overall_status = "unknown";
                            }
                        }
                        "Pending" => {
                            // Check if pending for too long might indicate an issue
                            // For now, we just note it's pending
                            if overall_status == "healthy" {
                                // Could add logic to check if pending too long
                            }
                        }
                        _ => {}
                    }
                }
            }
        }

        summary.conditions = conditions_set.into_iter().collect();

        // If no pods found and we expected some, mark as unknown
        if summary.pods_total == 0 {
            overall_status = "unknown";
        }

        Ok(DeploymentHealthStatus {
            id: deployment_object_id,
            status: overall_status.to_string(),
            summary,
            checked_at,
        })
    }
}
find_pods_for_deployment private

async

#![allow(unused)]
fn main() {
async fn find_pods_for_deployment (& self , deployment_object_id : Uuid ,) -> Result < Vec < Pod > , Box < dyn std :: error :: Error + Send + Sync > >
}

Finds all pods labeled with the given deployment object ID

Source
#![allow(unused)]
fn main() {
    async fn find_pods_for_deployment(
        &self,
        deployment_object_id: Uuid,
    ) -> Result<Vec<Pod>, Box<dyn std::error::Error + Send + Sync>> {
        // Query pods across all namespaces with the deployment object label
        let pods_api: Api<Pod> = Api::all(self.k8s_client.clone());

        let label_selector = format!("{}={}", DEPLOYMENT_OBJECT_ID_LABEL, deployment_object_id);
        let lp = ListParams::default().labels(&label_selector);

        let pod_list = pods_api.list(&lp).await?;
        Ok(pod_list.items)
    }
}
check_deployment_objects pub

async

#![allow(unused)]
fn main() {
async fn check_deployment_objects (& self , deployment_object_ids : & [Uuid] ,) -> Vec < DeploymentHealthStatus >
}

Checks health for multiple deployment objects

Source
#![allow(unused)]
fn main() {
    pub async fn check_deployment_objects(
        &self,
        deployment_object_ids: &[Uuid],
    ) -> Vec<DeploymentHealthStatus> {
        let mut results = Vec::new();

        for &id in deployment_object_ids {
            match self.check_deployment_object(id).await {
                Ok(status) => results.push(status),
                Err(e) => {
                    warn!("Failed to check health for deployment object {}: {}", id, e);
                    // Report as unknown on error
                    results.push(DeploymentHealthStatus {
                        id,
                        status: "unknown".to_string(),
                        summary: HealthSummary::default(),
                        checked_at: Utc::now(),
                    });
                }
            }
        }

        results
    }
}

brokkr-agent::deployment_health::HealthStatusUpdate

pub

Derives: Debug, Clone, Serialize, Deserialize

Request body for sending health status updates to the broker

Fields

NameTypeDescription
deployment_objectsVec < DeploymentObjectHealthUpdate >List of deployment object health updates

brokkr-agent::deployment_health::DeploymentObjectHealthUpdate

pub

Derives: Debug, Clone, Serialize, Deserialize

Health update for a single deployment object (matches broker API)

Fields

NameTypeDescription
idUuidThe deployment object ID
statusStringHealth status: healthy, degraded, failing, or unknown
summaryOption < HealthSummary >Structured health summary
checked_atDateTime < Utc >When the health was checked

Functions

brokkr-agent::deployment_health::is_pod_ready

private

#![allow(unused)]
fn main() {
fn is_pod_ready (pod : & Pod) -> bool
}

Checks if a pod is in ready state

Source
#![allow(unused)]
fn main() {
fn is_pod_ready(pod: &Pod) -> bool {
    pod.status
        .as_ref()
        .and_then(|s| s.conditions.as_ref())
        .map(|conditions| {
            conditions
                .iter()
                .any(|c| c.type_ == "Ready" && c.status == "True")
        })
        .unwrap_or(false)
}
}