Skip to content

Commit b87564c

Browse files
committed
[core] Implement neighbor validation through LLDP data
In commit c3ccc6d we extended the `interface` type to include neighbor information (LLDP adjacencies). Here we instruct the interface controller to retrieve the LLDP adjacency information via the provider and populate the status accordingly. The ExpirationTime is computed only once from TTL and is preserved across reconciliations. We do so to avoid continuous status updates observed in the lab. In that environment there are clock drifts between the device and operator, meaning that the value of the ExpirationTime changed and was triggering unnecessary reconciliations in the process. For readability reasons we truncate the ExpireTime with second granularity. Note that expiring entries are automatically removed from the device. Hence, in the reconciliation we only need to fetch the entries. As per the neighbor validation, we implement it as a non-blocking operation. This means that errors are logged but they don't prevent the reconciliation of the interface. The validation checks first if the resource has a label. The label is used to perform a validation against an interface resource. If this fails, then the controller falls back to check the annotation. The annotation is used to validate neighbors that are not a kubernetes resource (see 52eae24). In this commit we also add a missing watch for the LLDP controller. Now, if the LLDP resource references an interface resource that has not been created, the LLDP resource will be reconciled once the missing dependency is created. This was problematic during bootstraps, as the lldp feature was not installed if a single interface was missing. We also add tests for LLDP operational status degradation, verifying that the controller correctly sets OperationalCondition to False when the device reports LLDP is down, and recovers when it comes back up.
1 parent 5ce7c31 commit b87564c

6 files changed

Lines changed: 724 additions & 25 deletions

File tree

internal/controller/core/interface_controller.go

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"fmt"
1010
"net/netip"
1111
"slices"
12+
"strings"
1213
"time"
1314

1415
"k8s.io/apimachinery/pkg/api/equality"
@@ -506,6 +507,11 @@ func (r *InterfaceReconciler) reconcile(ctx context.Context, s *scope) (_ ctrl.R
506507
return ctrl.Result{}, fmt.Errorf("failed to get interface status: %w", err)
507508
}
508509

510+
// Neighbor adjacencies is only metadata and should not prevent receonciliation
511+
if err := r.updateNeighborAdjacenciesStatus(ctx, s, &status); err != nil {
512+
ctrl.LoggerFrom(ctx).V(3).Info("failed to update neighbor adjacency status", "error", err, "interface", klog.KObj(s.Interface))
513+
}
514+
509515
cond = metav1.Condition{
510516
Type: v1alpha1.OperationalCondition,
511517
Status: metav1.ConditionTrue,
@@ -525,6 +531,149 @@ func (r *InterfaceReconciler) reconcile(ctx context.Context, s *scope) (_ ctrl.R
525531
return ctrl.Result{RequeueAfter: Jitter(r.RequeueInterval)}, nil
526532
}
527533

534+
// updateNeighborAdjacenciesStatus updates the Interface status with the LLDP neighbor adjacencies returned by the provider.
535+
// It validates the adjacencies by looking for at the corresponding label/annotation.
536+
// It first attempts to validate through label (neighbor is managed by the operator and exists as a kubernetes resource).
537+
// If that fails, it attempts to validate through annotation (neighbor is not managed by the operator).
538+
// Only returns an error if there is an issue during the validation process, but does not return an error if the validation fails (i.e. the adjacency is marked as invalid).
539+
func (r *InterfaceReconciler) updateNeighborAdjacenciesStatus(ctx context.Context, s *scope, status *provider.InterfaceStatus) error {
540+
if s.Interface.Spec.Type != v1alpha1.InterfaceTypePhysical || len(status.LLDPAdjacencies) == 0 {
541+
s.Interface.Status.Neighbors = nil
542+
return nil
543+
}
544+
545+
type neighborKey struct{ ChassisID, PortID string }
546+
547+
existingNeighbors := make(map[neighborKey]v1alpha1.Neighbor)
548+
for _, n := range s.Interface.Status.Neighbors {
549+
existingNeighbors[neighborKey{n.ChassisID, n.PortID}] = n
550+
}
551+
552+
var errs []error
553+
neighbors := make([]v1alpha1.Neighbor, 0, len(status.LLDPAdjacencies))
554+
for _, adj := range status.LLDPAdjacencies {
555+
key := neighborKey{adj.ChassisID, adj.PortID}
556+
557+
adjacency := v1alpha1.Neighbor{
558+
NeighborInfo: v1alpha1.NeighborInfo{
559+
SystemName: adj.SysName,
560+
SystemDescription: adj.SysDescription,
561+
ChassisID: adj.ChassisID,
562+
ChassisIDType: adj.ChassisIDType,
563+
PortID: adj.PortID,
564+
PortIDType: adj.PortIDType,
565+
PortDescription: adj.PortDescription,
566+
},
567+
Validation: v1alpha1.NeighborValidationUndefined,
568+
}
569+
570+
// The TTL reported by the device and the operator clock can drift one from each other for more than one second.
571+
// If we use the TTL to compute ExpirationTime on each reconciliation, then ExpirationTime will keep changing, triggering unnecessary updates to the Interface status.
572+
// Thus, we preserve the existing ExpirationTime as long as the neighbor is still valid, at the cost of some inaccuracy.
573+
if existing, found := existingNeighbors[key]; found && existing.ExpirationTime != nil && existing.ExpirationTime.After(time.Now()) {
574+
adjacency.ExpirationTime = existing.ExpirationTime
575+
} else {
576+
t := metav1.NewTime(time.Now().Add(time.Duration(adj.TTL) * time.Second).Truncate(time.Second))
577+
adjacency.ExpirationTime = &t
578+
}
579+
580+
if err := r.validateLLDPAdjacencyThroughLabel(ctx, s.Interface, &adjacency); err != nil {
581+
errs = append(errs, fmt.Errorf("failed to validate LLDP adjacency %q/%q through label: %w", adj.ChassisID, adj.PortID, err))
582+
}
583+
if adjacency.Validation == v1alpha1.NeighborValidationUndefined {
584+
if err := r.validateLLDPAdjacencyThroughAnnotation(ctx, s.Interface, &adjacency); err != nil {
585+
errs = append(errs, fmt.Errorf("failed to validate LLDP adjacency %q/%q through annotation: %w", adj.ChassisID, adj.PortID, err))
586+
}
587+
}
588+
neighbors = append(neighbors, adjacency)
589+
}
590+
591+
s.Interface.Status.Neighbors = neighbors
592+
593+
return kerrors.NewAggregate(errs)
594+
}
595+
596+
func (r *InterfaceReconciler) validateLLDPAdjacencyThroughLabel(ctx context.Context, intf *v1alpha1.Interface, n *v1alpha1.Neighbor) error {
597+
neighborLabelValue, ok := intf.Labels[v1alpha1.PhysicalInterfaceNeighborLabel]
598+
if !ok {
599+
n.Validation = v1alpha1.NeighborValidationUndefined
600+
return nil
601+
}
602+
603+
key := client.ObjectKey{
604+
Name: neighborLabelValue,
605+
Namespace: intf.Namespace,
606+
}
607+
608+
remoteIntf := new(v1alpha1.Interface)
609+
if err := r.Get(ctx, key, remoteIntf); err != nil {
610+
n.Validation = v1alpha1.NeighborValidationError
611+
if !apierrors.IsNotFound(err) {
612+
return fmt.Errorf("failed to get neighbor interface %q: %w", neighborLabelValue, err)
613+
}
614+
return fmt.Errorf("could not verify neighbor adjacency: neighbor interface %q not found: %w", neighborLabelValue, err)
615+
}
616+
617+
log := ctrl.LoggerFrom(ctx, "LLDP validation", klog.KObj(intf))
618+
619+
remoteDevice, err := deviceutil.GetOwnerDevice(ctx, r, remoteIntf)
620+
if err != nil {
621+
n.Validation = v1alpha1.NeighborValidationError
622+
return fmt.Errorf("could not find the device owning interface %q: %w", remoteIntf.Name, err)
623+
}
624+
625+
if remoteDevice.Status.Hostname == "" {
626+
n.Validation = v1alpha1.NeighborValidationError
627+
return fmt.Errorf("the neighbor device does not have a hostname yet, cannot validate adjacency: neighborInterface=%q", remoteIntf.Name)
628+
}
629+
630+
if remoteDevice.Status.Hostname != n.SystemName {
631+
n.Validation = v1alpha1.NeighborDeviceMismatch
632+
log.V(0).Info("the neighbor device hostname does not match", "expected", n.SystemName, "actual", remoteDevice.Status.Hostname)
633+
return nil
634+
}
635+
636+
if remoteIntf.Spec.Name != n.PortID {
637+
n.Validation = v1alpha1.NeighborInterfaceMismatch
638+
log.V(0).Info("the neighbor interface name does not match", "expected", n.PortID, "actual", remoteIntf.Spec.Name)
639+
return nil
640+
}
641+
642+
n.Validation = v1alpha1.NeighborVerified
643+
644+
return nil
645+
}
646+
647+
func (r *InterfaceReconciler) validateLLDPAdjacencyThroughAnnotation(ctx context.Context, intf *v1alpha1.Interface, n *v1alpha1.Neighbor) error {
648+
neighborAnnotationValue, ok := intf.Annotations[v1alpha1.PhysicalInterfaceNeighborRawAnnotation]
649+
if !ok {
650+
n.Validation = v1alpha1.NeighborValidationUndefined
651+
return nil
652+
}
653+
654+
remoteDeviceID, remotePortID, ok := strings.Cut(neighborAnnotationValue, "::")
655+
if !ok || remoteDeviceID == "" || remotePortID == "" {
656+
n.Validation = v1alpha1.NeighborValidationError
657+
return fmt.Errorf("invalid neighbor annotation value %q, expected format is <deviceIdentifier>::<portID>", neighborAnnotationValue)
658+
}
659+
660+
log := ctrl.LoggerFrom(ctx, "LLDP validation", klog.KObj(intf))
661+
if remoteDeviceID != n.ChassisID && remoteDeviceID != n.SystemName {
662+
n.Validation = v1alpha1.NeighborDeviceMismatch
663+
log.V(0).Info("the neighbor device identifier does not match", "annotationValue", remoteDeviceID, "chassisID", n.ChassisID, "systemName", n.SystemName)
664+
return nil
665+
}
666+
667+
if remotePortID != n.PortID {
668+
n.Validation = v1alpha1.NeighborInterfaceMismatch
669+
log.V(0).Info("the neighbor port identifier does not match", "annotationValue", remotePortID, "portID", n.PortID)
670+
return nil
671+
}
672+
673+
n.Validation = v1alpha1.NeighborVerified
674+
return nil
675+
}
676+
528677
func (r *InterfaceReconciler) reconcileIPv4(ctx context.Context, s *scope) (provider.IPv4, error) {
529678
switch {
530679
case len(s.Interface.Spec.IPv4.Addresses) > 0:
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package core
5+
6+
import (
7+
. "github.com/onsi/ginkgo/v2"
8+
. "github.com/onsi/gomega"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"sigs.k8s.io/controller-runtime/pkg/client"
11+
12+
"github.com/ironcore-dev/network-operator/api/core/v1alpha1"
13+
)
14+
15+
var _ = Describe("Interface Controller - LLDP Neighbor", func() {
16+
Context("When reconciling an interface with LLDP neighbor information", func() {
17+
var (
18+
name string
19+
key client.ObjectKey
20+
)
21+
22+
BeforeEach(func() {
23+
By("Creating a Device resource for testing")
24+
device := &v1alpha1.Device{
25+
ObjectMeta: metav1.ObjectMeta{
26+
GenerateName: "test-interface-lldp-",
27+
Namespace: metav1.NamespaceDefault,
28+
},
29+
Spec: v1alpha1.DeviceSpec{
30+
Endpoint: v1alpha1.Endpoint{
31+
Address: "192.168.10.2:9339",
32+
},
33+
},
34+
}
35+
Expect(k8sClient.Create(ctx, device)).To(Succeed())
36+
name = device.Name
37+
key = client.ObjectKey{Name: name, Namespace: metav1.NamespaceDefault}
38+
})
39+
40+
AfterEach(func() {
41+
By("Cleaning up all Interface resources")
42+
Expect(k8sClient.DeleteAllOf(ctx, &v1alpha1.Interface{}, client.InNamespace(metav1.NamespaceDefault))).To(Succeed())
43+
44+
device := &v1alpha1.Device{}
45+
err := k8sClient.Get(ctx, key, device)
46+
Expect(err).NotTo(HaveOccurred())
47+
48+
By("Cleaning up the test Device resource")
49+
Expect(k8sClient.Delete(ctx, device, client.PropagationPolicy(metav1.DeletePropagationForeground))).To(Succeed())
50+
})
51+
52+
It("Should populate LLDP neighbor information in interface status", func() {
53+
By("Configuring the test provider to return LLDP neighbor information")
54+
testProvider.SetLLDPNeighbor("Ethernet1/1", "neighbor-switch", "00:11:22:33:44:55", "Ethernet1/1", 120)
55+
56+
By("Creating a Physical Interface resource")
57+
intf := &v1alpha1.Interface{
58+
ObjectMeta: metav1.ObjectMeta{
59+
Name: name,
60+
Namespace: metav1.NamespaceDefault,
61+
},
62+
Spec: v1alpha1.InterfaceSpec{
63+
DeviceRef: v1alpha1.LocalObjectReference{Name: name},
64+
Name: "Ethernet1/1",
65+
AdminState: v1alpha1.AdminStateUp,
66+
Type: v1alpha1.InterfaceTypePhysical,
67+
},
68+
}
69+
Expect(k8sClient.Create(ctx, intf)).To(Succeed())
70+
71+
By("Verifying the controller populates the LLDP neighbor information in status")
72+
Eventually(func(g Gomega) {
73+
resource := &v1alpha1.Interface{}
74+
g.Expect(k8sClient.Get(ctx, key, resource)).To(Succeed())
75+
g.Expect(resource.Status.Neighbors).NotTo(BeEmpty())
76+
g.Expect(resource.Status.Neighbors[0].SystemName).To(Equal("neighbor-switch"))
77+
g.Expect(resource.Status.Neighbors[0].ChassisID).To(Equal("00:11:22:33:44:55"))
78+
g.Expect(resource.Status.Neighbors[0].PortID).To(Equal("Ethernet1/1"))
79+
g.Expect(resource.Status.Neighbors[0].ExpirationTime).NotTo(BeNil())
80+
}).Should(Succeed())
81+
})
82+
83+
It("Should handle interface without LLDP neighbor", func() {
84+
By("Creating a Physical Interface resource without LLDP neighbor configured")
85+
intf := &v1alpha1.Interface{
86+
ObjectMeta: metav1.ObjectMeta{
87+
Name: name,
88+
Namespace: metav1.NamespaceDefault,
89+
},
90+
Spec: v1alpha1.InterfaceSpec{
91+
DeviceRef: v1alpha1.LocalObjectReference{Name: name},
92+
Name: "Ethernet1/2",
93+
AdminState: v1alpha1.AdminStateUp,
94+
Type: v1alpha1.InterfaceTypePhysical,
95+
},
96+
}
97+
Expect(k8sClient.Create(ctx, intf)).To(Succeed())
98+
99+
By("Verifying the neighbor field is nil when no LLDP neighbor exists")
100+
Eventually(func(g Gomega) {
101+
resource := &v1alpha1.Interface{}
102+
g.Expect(k8sClient.Get(ctx, key, resource)).To(Succeed())
103+
g.Expect(resource.Status.Conditions).NotTo(BeEmpty())
104+
// Neighbors should be empty when no LLDP neighbor exists
105+
if len(resource.Status.Neighbors) > 0 {
106+
g.Expect(resource.Status.Neighbors[0].SystemName).To(BeEmpty())
107+
}
108+
}).Should(Succeed())
109+
})
110+
})
111+
})

internal/controller/core/lldp_controller.go

Lines changed: 67 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -422,40 +422,48 @@ func (r *LLDPReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager)
422422
return fmt.Errorf("failed to create label selector predicate: %w", err)
423423
}
424424

425-
c := ctrl.NewControllerManagedBy(mgr).
425+
bldr := ctrl.NewControllerManagedBy(mgr).
426426
For(&v1alpha1.LLDP{}).
427427
Named("lldp").
428428
WithEventFilter(filter)
429429

430430
for _, gvk := range v1alpha1.LLDPDependencies {
431431
obj := &unstructured.Unstructured{}
432432
obj.SetGroupVersionKind(gvk)
433-
c = c.Watches(
433+
434+
bldr = bldr.Watches(
434435
obj,
435436
handler.EnqueueRequestsFromMapFunc(r.mapProviderConfigToLLDP),
436437
builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}),
437438
)
438439
}
439440

440-
// Watches enqueues LLDPs for updates in referenced Device resources.
441-
// Triggers on update events when the Paused spec field changes.
442-
c = c.Watches(
443-
&v1alpha1.Device{},
444-
handler.EnqueueRequestsFromMapFunc(r.deviceToLLDPs),
445-
builder.WithPredicates(predicate.Funcs{
446-
UpdateFunc: func(e event.UpdateEvent) bool {
447-
oldDevice := e.ObjectOld.(*v1alpha1.Device)
448-
newDevice := e.ObjectNew.(*v1alpha1.Device)
449-
// Only trigger when Paused spec field changes.
450-
return oldDevice.Spec.Paused != newDevice.Spec.Paused
451-
},
452-
GenericFunc: func(e event.GenericEvent) bool {
453-
return false
454-
},
455-
}),
456-
)
457-
458-
return c.Complete(r)
441+
return bldr.
442+
// Watches enqueues LLDPs for updates in referenced Device resources.
443+
// Triggers on update events when the Paused spec field changes.
444+
Watches(
445+
&v1alpha1.Device{},
446+
handler.EnqueueRequestsFromMapFunc(r.deviceToLLDPs),
447+
builder.WithPredicates(predicate.Funcs{
448+
UpdateFunc: func(e event.UpdateEvent) bool {
449+
oldDevice := e.ObjectOld.(*v1alpha1.Device)
450+
newDevice := e.ObjectNew.(*v1alpha1.Device)
451+
// Only trigger when Paused spec field changes.
452+
return oldDevice.Spec.Paused != newDevice.Spec.Paused
453+
},
454+
GenericFunc: func(e event.GenericEvent) bool {
455+
return false
456+
},
457+
}),
458+
).
459+
// Watches enqueues LLDPs for updates in referenced Interface resources.
460+
// This ensures LLDP reconciles when a referenced Interface is created or updated.
461+
Watches(
462+
&v1alpha1.Interface{},
463+
handler.EnqueueRequestsFromMapFunc(r.interfaceToLLDPs),
464+
builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}),
465+
).
466+
Complete(r)
459467
}
460468

461469
func (r *LLDPReconciler) mapProviderConfigToLLDP(ctx context.Context, obj client.Object) []reconcile.Request {
@@ -534,3 +542,41 @@ func (r *LLDPReconciler) deviceToLLDPs(ctx context.Context, obj client.Object) [
534542

535543
return requests
536544
}
545+
546+
// interfaceToLLDPs is a [handler.MapFunc] to be used to enqueue requests for reconciliation
547+
// for LLDPs when a referenced Interface is created or updated.
548+
func (r *LLDPReconciler) interfaceToLLDPs(ctx context.Context, obj client.Object) []ctrl.Request {
549+
intf, ok := obj.(*v1alpha1.Interface)
550+
if !ok {
551+
panic(fmt.Sprintf("Expected an Interface but got a %T", obj))
552+
}
553+
554+
log := ctrl.LoggerFrom(ctx, "Interface", klog.KObj(intf))
555+
556+
list := new(v1alpha1.LLDPList)
557+
if err := r.List(ctx, list,
558+
client.InNamespace(intf.Namespace),
559+
client.MatchingLabels{v1alpha1.DeviceLabel: intf.Spec.DeviceRef.Name},
560+
); err != nil {
561+
log.Error(err, "Failed to list LLDPs")
562+
return nil
563+
}
564+
565+
var requests []ctrl.Request
566+
for _, lldp := range list.Items {
567+
for _, ifRef := range lldp.Spec.InterfaceRefs {
568+
if ifRef.Name == intf.Name {
569+
log.V(2).Info("Enqueuing LLDP for reconciliation", "LLDP", klog.KObj(&lldp))
570+
requests = append(requests, ctrl.Request{
571+
NamespacedName: client.ObjectKey{
572+
Name: lldp.Name,
573+
Namespace: lldp.Namespace,
574+
},
575+
})
576+
break
577+
}
578+
}
579+
}
580+
581+
return requests
582+
}

0 commit comments

Comments
 (0)