brokkr-agent::deployment_health Rust
Deployment Health Checker Module
Monitors the health of deployed Kubernetes resources and reports status to the broker. Detects common issues like ImagePullBackOff, CrashLoopBackOff, OOMKilled, and other problematic conditions.
Structs
brokkr-agent::deployment_health::DeploymentHealthStatus
pub
Derives: Debug, Clone, Serialize, Deserialize
Health status for a deployment object
Fields
| Name | Type | Description |
|---|---|---|
id | Uuid | The deployment object ID |
status | String | Overall health status: healthy, degraded, failing, unknown |
summary | HealthSummary | Structured health summary |
checked_at | DateTime < Utc > | When the health was checked |
brokkr-agent::deployment_health::HealthSummary
pub
Derives: Debug, Clone, Default, Serialize, Deserialize
Summary of health information for a deployment
Fields
| Name | Type | Description |
|---|---|---|
pods_ready | usize | Number of pods in ready state |
pods_total | usize | Total number of pods |
conditions | Vec < String > | List of detected problematic conditions |
resources | Vec < ResourceHealth > | Per-resource health details |
brokkr-agent::deployment_health::ResourceHealth
pub
Derives: Debug, Clone, Serialize, Deserialize
Health status of an individual resource
Fields
| Name | Type | Description |
|---|---|---|
kind | String | Kind of the resource (e.g., “Pod”, “Deployment”) |
name | String | Name of the resource |
namespace | String | Namespace of the resource |
ready | bool | Whether the resource is ready |
message | Option < String > | Human-readable status message |
brokkr-agent::deployment_health::HealthChecker
pub
Checks deployment health for Kubernetes resources
Fields
| Name | Type | Description |
|---|---|---|
k8s_client | Client |
Methods
new pub
#![allow(unused)]
fn main() {
fn new (k8s_client : Client) -> Self
}
Creates a new HealthChecker instance
Source
#![allow(unused)]
fn main() {
pub fn new(k8s_client: Client) -> Self {
Self { k8s_client }
}
}
check_deployment_object pub
async
#![allow(unused)]
fn main() {
async fn check_deployment_object (& self , deployment_object_id : Uuid ,) -> Result < DeploymentHealthStatus , Box < dyn std :: error :: Error + Send + Sync > >
}
Checks the health of a specific deployment object by ID
Finds all pods labeled with the deployment object ID and analyzes their status to determine overall health.
Source
#![allow(unused)]
fn main() {
pub async fn check_deployment_object(
&self,
deployment_object_id: Uuid,
) -> Result<DeploymentHealthStatus, Box<dyn std::error::Error + Send + Sync>> {
let checked_at = Utc::now();
// Find pods matching this deployment object
let pods = self.find_pods_for_deployment(deployment_object_id).await?;
let mut summary = HealthSummary::default();
let mut overall_status = "healthy";
let mut conditions_set: std::collections::HashSet<String> =
std::collections::HashSet::new();
summary.pods_total = pods.len();
for pod in &pods {
let pod_name = pod.metadata.name.clone().unwrap_or_default();
let pod_namespace = pod.metadata.namespace.clone().unwrap_or_default();
// Check if pod is ready
let pod_ready = is_pod_ready(pod);
if pod_ready {
summary.pods_ready += 1;
}
// Analyze pod status for issues
if let Some(pod_status) = &pod.status {
// Check container statuses for waiting/terminated issues
if let Some(container_statuses) = &pod_status.container_statuses {
for cs in container_statuses {
if let Some(state) = &cs.state {
// Check waiting state
if let Some(waiting) = &state.waiting {
if let Some(reason) = &waiting.reason {
if DEGRADED_CONDITIONS.contains(&reason.as_str()) {
conditions_set.insert(reason.clone());
overall_status = "degraded";
summary.resources.push(ResourceHealth {
kind: "Pod".to_string(),
name: pod_name.clone(),
namespace: pod_namespace.clone(),
ready: false,
message: waiting.message.clone(),
});
}
}
}
// Check terminated state for issues
if let Some(terminated) = &state.terminated {
if let Some(reason) = &terminated.reason {
if TERMINATED_ISSUES.contains(&reason.as_str()) {
conditions_set.insert(reason.clone());
overall_status = "degraded";
}
}
}
}
// Check last terminated state for recent crashes
if let Some(last_state) = &cs.last_state {
if let Some(terminated) = &last_state.terminated {
if let Some(reason) = &terminated.reason {
if reason == "OOMKilled" {
conditions_set.insert("OOMKilled".to_string());
overall_status = "degraded";
}
}
}
}
}
}
// Check init container statuses
if let Some(init_statuses) = &pod_status.init_container_statuses {
for cs in init_statuses {
if let Some(state) = &cs.state {
if let Some(waiting) = &state.waiting {
if let Some(reason) = &waiting.reason {
if DEGRADED_CONDITIONS.contains(&reason.as_str()) {
conditions_set.insert(format!("InitContainer:{}", reason));
overall_status = "degraded";
}
}
}
}
}
}
// Check pod phase
if let Some(phase) = &pod_status.phase {
match phase.as_str() {
"Failed" => {
overall_status = "failing";
conditions_set.insert("PodFailed".to_string());
}
"Unknown" => {
if overall_status != "failing" && overall_status != "degraded" {
overall_status = "unknown";
}
}
"Pending" => {
// Check if pending for too long might indicate an issue
// For now, we just note it's pending
if overall_status == "healthy" {
// Could add logic to check if pending too long
}
}
_ => {}
}
}
}
}
summary.conditions = conditions_set.into_iter().collect();
// If no pods found and we expected some, mark as unknown
if summary.pods_total == 0 {
overall_status = "unknown";
}
Ok(DeploymentHealthStatus {
id: deployment_object_id,
status: overall_status.to_string(),
summary,
checked_at,
})
}
}
find_pods_for_deployment private
async
#![allow(unused)]
fn main() {
async fn find_pods_for_deployment (& self , deployment_object_id : Uuid ,) -> Result < Vec < Pod > , Box < dyn std :: error :: Error + Send + Sync > >
}
Finds all pods labeled with the given deployment object ID
Source
#![allow(unused)]
fn main() {
async fn find_pods_for_deployment(
&self,
deployment_object_id: Uuid,
) -> Result<Vec<Pod>, Box<dyn std::error::Error + Send + Sync>> {
// Query pods across all namespaces with the deployment object label
let pods_api: Api<Pod> = Api::all(self.k8s_client.clone());
let label_selector = format!("{}={}", DEPLOYMENT_OBJECT_ID_LABEL, deployment_object_id);
let lp = ListParams::default().labels(&label_selector);
let pod_list = pods_api.list(&lp).await?;
Ok(pod_list.items)
}
}
check_deployment_objects pub
async
#![allow(unused)]
fn main() {
async fn check_deployment_objects (& self , deployment_object_ids : & [Uuid] ,) -> Vec < DeploymentHealthStatus >
}
Checks health for multiple deployment objects
Source
#![allow(unused)]
fn main() {
pub async fn check_deployment_objects(
&self,
deployment_object_ids: &[Uuid],
) -> Vec<DeploymentHealthStatus> {
let mut results = Vec::new();
for &id in deployment_object_ids {
match self.check_deployment_object(id).await {
Ok(status) => results.push(status),
Err(e) => {
warn!("Failed to check health for deployment object {}: {}", id, e);
// Report as unknown on error
results.push(DeploymentHealthStatus {
id,
status: "unknown".to_string(),
summary: HealthSummary::default(),
checked_at: Utc::now(),
});
}
}
}
results
}
}
brokkr-agent::deployment_health::HealthStatusUpdate
pub
Derives: Debug, Clone, Serialize, Deserialize
Request body for sending health status updates to the broker
Fields
| Name | Type | Description |
|---|---|---|
deployment_objects | Vec < DeploymentObjectHealthUpdate > | List of deployment object health updates |
brokkr-agent::deployment_health::DeploymentObjectHealthUpdate
pub
Derives: Debug, Clone, Serialize, Deserialize
Health update for a single deployment object (matches broker API)
Fields
| Name | Type | Description |
|---|---|---|
id | Uuid | The deployment object ID |
status | String | Health status: healthy, degraded, failing, or unknown |
summary | Option < HealthSummary > | Structured health summary |
checked_at | DateTime < Utc > | When the health was checked |
Functions
brokkr-agent::deployment_health::is_pod_ready
private
#![allow(unused)]
fn main() {
fn is_pod_ready (pod : & Pod) -> bool
}
Checks if a pod is in ready state
Source
#![allow(unused)]
fn main() {
fn is_pod_ready(pod: &Pod) -> bool {
pod.status
.as_ref()
.and_then(|s| s.conditions.as_ref())
.map(|conditions| {
conditions
.iter()
.any(|c| c.type_ == "Ready" && c.status == "True")
})
.unwrap_or(false)
}
}