brokkr-agent::deployment_health Rust

Deployment Health Checker Module

Monitors the health of deployed Kubernetes resources and reports status to the broker. Detects common issues like ImagePullBackOff, CrashLoopBackOff, OOMKilled, and other problematic conditions.

Structs

`brokkr-agent::deployment_health::DeploymentHealthStatus`

pub

Derives: Debug, Clone, Serialize, Deserialize

Health status for a deployment object

Fields

Name	Type	Description
`id`	`Uuid`	The deployment object ID
`status`	`String`	Overall health status: healthy, degraded, failing, unknown
`summary`	`HealthSummary`	Structured health summary
`checked_at`	`DateTime < Utc >`	When the health was checked

`brokkr-agent::deployment_health::HealthSummary`

pub

Derives: Debug, Clone, Default, Serialize, Deserialize

Summary of health information for a deployment

Fields

Name	Type	Description
`pods_ready`	`usize`	Number of pods in ready state
`pods_total`	`usize`	Total number of pods
`conditions`	`Vec < String >`	List of detected problematic conditions
`resources`	`Vec < ResourceHealth >`	Per-resource health details

`brokkr-agent::deployment_health::ResourceHealth`

pub

Derives: Debug, Clone, Serialize, Deserialize

Health status of an individual resource

Fields

Name	Type	Description
`kind`	`String`	Kind of the resource (e.g., “Pod”, “Deployment”)
`name`	`String`	Name of the resource
`namespace`	`String`	Namespace of the resource
`ready`	`bool`	Whether the resource is ready
`message`	`Option < String >`	Human-readable status message

`brokkr-agent::deployment_health::HealthChecker`

pub

Checks deployment health for Kubernetes resources

Fields

Name	Type	Description
`k8s_client`	`Client`

Methods

`new` pub

#![allow(unused)]
fn main() {
fn new (k8s_client : Client) -> Self
}

Creates a new HealthChecker instance

Source

#![allow(unused)]
fn main() {
    pub fn new(k8s_client: Client) -> Self {
        Self { k8s_client }
    }
}

`check_deployment_object` pub

async

#![allow(unused)]
fn main() {
async fn check_deployment_object (& self , deployment_object_id : Uuid ,) -> Result < DeploymentHealthStatus , Box < dyn std :: error :: Error + Send + Sync > >
}

Checks the health of a specific deployment object by ID

Finds all pods labeled with the deployment object ID and analyzes their status to determine overall health.

Source

#![allow(unused)]
fn main() {
    pub async fn check_deployment_object(
        &self,
        deployment_object_id: Uuid,
    ) -> Result<DeploymentHealthStatus, Box<dyn std::error::Error + Send + Sync>> {
        let checked_at = Utc::now();

        // Find pods matching this deployment object
        let pods = self.find_pods_for_deployment(deployment_object_id).await?;

        let mut summary = HealthSummary::default();
        let mut overall_status = "healthy";
        let mut conditions_set: std::collections::HashSet<String> =
            std::collections::HashSet::new();

        summary.pods_total = pods.len();

        for pod in &pods {
            let pod_name = pod.metadata.name.clone().unwrap_or_default();
            let pod_namespace = pod.metadata.namespace.clone().unwrap_or_default();

            // Check if pod is ready
            let pod_ready = is_pod_ready(pod);
            if pod_ready {
                summary.pods_ready += 1;
            }

            // Analyze pod status for issues
            if let Some(pod_status) = &pod.status {
                // Check container statuses for waiting/terminated issues
                if let Some(container_statuses) = &pod_status.container_statuses {
                    for cs in container_statuses {
                        if let Some(state) = &cs.state {
                            // Check waiting state
                            if let Some(waiting) = &state.waiting {
                                if let Some(reason) = &waiting.reason {
                                    if DEGRADED_CONDITIONS.contains(&reason.as_str()) {
                                        conditions_set.insert(reason.clone());
                                        overall_status = "degraded";

                                        summary.resources.push(ResourceHealth {
                                            kind: "Pod".to_string(),
                                            name: pod_name.clone(),
                                            namespace: pod_namespace.clone(),
                                            ready: false,
                                            message: waiting.message.clone(),
                                        });
                                    }
                                }
                            }

                            // Check terminated state for issues
                            if let Some(terminated) = &state.terminated {
                                if let Some(reason) = &terminated.reason {
                                    if TERMINATED_ISSUES.contains(&reason.as_str()) {
                                        conditions_set.insert(reason.clone());
                                        overall_status = "degraded";
                                    }
                                }
                            }
                        }

                        // Check last terminated state for recent crashes
                        if let Some(last_state) = &cs.last_state {
                            if let Some(terminated) = &last_state.terminated {
                                if let Some(reason) = &terminated.reason {
                                    if reason == "OOMKilled" {
                                        conditions_set.insert("OOMKilled".to_string());
                                        overall_status = "degraded";
                                    }
                                }
                            }
                        }
                    }
                }

                // Check init container statuses
                if let Some(init_statuses) = &pod_status.init_container_statuses {
                    for cs in init_statuses {
                        if let Some(state) = &cs.state {
                            if let Some(waiting) = &state.waiting {
                                if let Some(reason) = &waiting.reason {
                                    if DEGRADED_CONDITIONS.contains(&reason.as_str()) {
                                        conditions_set.insert(format!("InitContainer:{}", reason));
                                        overall_status = "degraded";
                                    }
                                }
                            }
                        }
                    }
                }

                // Check pod phase
                if let Some(phase) = &pod_status.phase {
                    match phase.as_str() {
                        "Failed" => {
                            overall_status = "failing";
                            conditions_set.insert("PodFailed".to_string());
                        }
                        "Unknown" => {
                            if overall_status != "failing" && overall_status != "degraded" {
                                overall_status = "unknown";
                            }
                        }
                        "Pending" => {
                            // Check if pending for too long might indicate an issue
                            // For now, we just note it's pending
                            if overall_status == "healthy" {
                                // Could add logic to check if pending too long
                            }
                        }
                        _ => {}
                    }
                }
            }
        }

        summary.conditions = conditions_set.into_iter().collect();

        // If no pods found and we expected some, mark as unknown
        if summary.pods_total == 0 {
            overall_status = "unknown";
        }

        Ok(DeploymentHealthStatus {
            id: deployment_object_id,
            status: overall_status.to_string(),
            summary,
            checked_at,
        })
    }
}

`find_pods_for_deployment` private

async

#![allow(unused)]
fn main() {
async fn find_pods_for_deployment (& self , deployment_object_id : Uuid ,) -> Result < Vec < Pod > , Box < dyn std :: error :: Error + Send + Sync > >
}

Finds all pods labeled with the given deployment object ID

Source

#![allow(unused)]
fn main() {
    async fn find_pods_for_deployment(
        &self,
        deployment_object_id: Uuid,
    ) -> Result<Vec<Pod>, Box<dyn std::error::Error + Send + Sync>> {
        // Query pods across all namespaces with the deployment object label
        let pods_api: Api<Pod> = Api::all(self.k8s_client.clone());

        let label_selector = format!("{}={}", DEPLOYMENT_OBJECT_ID_LABEL, deployment_object_id);
        let lp = ListParams::default().labels(&label_selector);

        let pod_list = pods_api.list(&lp).await?;
        Ok(pod_list.items)
    }
}

`check_deployment_objects` pub

async

#![allow(unused)]
fn main() {
async fn check_deployment_objects (& self , deployment_object_ids : & [Uuid] ,) -> Vec < DeploymentHealthStatus >
}

Checks health for multiple deployment objects

Source

#![allow(unused)]
fn main() {
    pub async fn check_deployment_objects(
        &self,
        deployment_object_ids: &[Uuid],
    ) -> Vec<DeploymentHealthStatus> {
        let mut results = Vec::new();

        for &id in deployment_object_ids {
            match self.check_deployment_object(id).await {
                Ok(status) => results.push(status),
                Err(e) => {
                    warn!("Failed to check health for deployment object {}: {}", id, e);
                    // Report as unknown on error
                    results.push(DeploymentHealthStatus {
                        id,
                        status: "unknown".to_string(),
                        summary: HealthSummary::default(),
                        checked_at: Utc::now(),
                    });
                }
            }
        }

        results
    }
}

`brokkr-agent::deployment_health::HealthStatusUpdate`

pub

Derives: Debug, Clone, Serialize, Deserialize

Request body for sending health status updates to the broker

Fields

Name	Type	Description
`deployment_objects`	`Vec < DeploymentObjectHealthUpdate >`	List of deployment object health updates

`brokkr-agent::deployment_health::DeploymentObjectHealthUpdate`

pub

Derives: Debug, Clone, Serialize, Deserialize

Health update for a single deployment object (matches broker API)

Fields

Name	Type	Description
`id`	`Uuid`	The deployment object ID
`status`	`String`	Health status: healthy, degraded, failing, or unknown
`summary`	`Option < HealthSummary >`	Structured health summary
`checked_at`	`DateTime < Utc >`	When the health was checked

Functions

`brokkr-agent::deployment_health::is_pod_ready`

private

#![allow(unused)]
fn main() {
fn is_pod_ready (pod : & Pod) -> bool
}

Checks if a pod is in ready state

Source

#![allow(unused)]
fn main() {
fn is_pod_ready(pod: &Pod) -> bool {
    pod.status
        .as_ref()
        .and_then(|s| s.conditions.as_ref())
        .map(|conditions| {
            conditions
                .iter()
                .any(|c| c.type_ == "Ready" && c.status == "True")
        })
        .unwrap_or(false)
}
}

Keyboard shortcuts

Brokkr