Skip to content

Commit ac24a40

Browse files
authored
feat(disk-fill): Adding the disk-fill experiment (#47)
Signed-off-by: shubhamchaudhary <shubham.chaudhary@mayadata.io>
1 parent 35d5829 commit ac24a40

File tree

9 files changed

+760
-2
lines changed

9 files changed

+760
-2
lines changed

build/generate_go_binary

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,6 @@ go build -o build/_output/kubelet-service-kill ./experiments/generic/kubelet-ser
2121
# Buiding go binaries for node_memory_hog experiment
2222
go build -o build/_output/node-memory-hog ./experiments/generic/node-memory-hog
2323
# Buiding go binaries for node_cpu_hog experiment
24-
go build -o build/_output/node-cpu-hog ./experiments/generic/node-cpu-hog
24+
go build -o build/_output/node-cpu-hog ./experiments/generic/node-cpu-hog
25+
# Buiding go binaries for disk_fill experiment
26+
go build -o build/_output/disk-fill ./experiments/generic/disk-fill
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
package disk_fill
2+
3+
import (
4+
"fmt"
5+
"math/rand"
6+
"strconv"
7+
"strings"
8+
"time"
9+
10+
clients "github.com/litmuschaos/litmus-go/pkg/clients"
11+
"github.com/litmuschaos/litmus-go/pkg/events"
12+
experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/disk-fill/types"
13+
"github.com/litmuschaos/litmus-go/pkg/log"
14+
"github.com/litmuschaos/litmus-go/pkg/status"
15+
"github.com/litmuschaos/litmus-go/pkg/types"
16+
"github.com/litmuschaos/litmus-go/pkg/utils/exec"
17+
"github.com/openebs/maya/pkg/util/retry"
18+
"github.com/pkg/errors"
19+
"github.com/sirupsen/logrus"
20+
apiv1 "k8s.io/api/core/v1"
21+
"k8s.io/apimachinery/pkg/api/resource"
22+
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
)
24+
25+
//PrepareDiskFill contains the prepration steps before chaos injection
26+
func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error {
27+
28+
// It will contains all the pod & container details required for exec command
29+
execCommandDetails := exec.PodDetails{}
30+
31+
//Select application pod & node for the disk fill chaos
32+
appName, appNodeName, err := GetApplicationPod(experimentsDetails, clients)
33+
if err != nil {
34+
return errors.Errorf("Unable to get the application name and application nodename due to, err: %v", err)
35+
}
36+
37+
//Get the target container name of the application pod
38+
if experimentsDetails.TargetContainer == "" {
39+
experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, appName, clients)
40+
if err != nil {
41+
return errors.Errorf("Unable to get the target container name due to, err: %v", err)
42+
}
43+
}
44+
45+
// GetEphemeralStorageAttributes derive the ephemeral storage attributes from the target container
46+
ephemeralStorageLimit, ephemeralStorageRequest, err := GetEphemeralStorageAttributes(experimentsDetails, clients, appName)
47+
if err != nil {
48+
return err
49+
}
50+
51+
// Derive the container id of the target container
52+
containerID, err := GetContainerID(experimentsDetails, clients, appName)
53+
if err != nil {
54+
return err
55+
}
56+
57+
log.InfoWithValues("[Info]: Details of application under chaos injection", logrus.Fields{
58+
"PodName": appName,
59+
"NodeName": appNodeName,
60+
"ContainerName": experimentsDetails.TargetContainer,
61+
"ephemeralStorageLimit": ephemeralStorageLimit,
62+
"ephemeralStorageRequest": ephemeralStorageRequest,
63+
"ContainerID": containerID,
64+
})
65+
66+
// generating a unique string which can be appended with the helper pod name & labels for the uniquely identification
67+
experimentsDetails.RunID = GetRunID()
68+
69+
//Waiting for the ramp time before chaos injection
70+
if experimentsDetails.RampTime != 0 {
71+
log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime))
72+
waitForDuration(experimentsDetails.RampTime)
73+
}
74+
75+
// generating the chaos inject event in the chaosengine
76+
if experimentsDetails.EngineName != "" {
77+
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + appName + " pod"
78+
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, chaosDetails)
79+
events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine")
80+
}
81+
82+
// creating the helper pod to perform disk fill chaos
83+
err = CreateHelperPod(experimentsDetails, clients, appName, appNodeName)
84+
if err != nil {
85+
errors.Errorf("Unable to create the helper pod, err: %v", err)
86+
}
87+
88+
//checking the status of the helper pod, wait till the helper pod comes to running state else fail the experiment
89+
log.Info("[Status]: Checking the status of the helper pod")
90+
err = status.CheckApplicationStatus(experimentsDetails.ChaosNamespace, "name=disk-fill-"+experimentsDetails.RunID, clients)
91+
if err != nil {
92+
return errors.Errorf("helper pod is not in running state, err: %v", err)
93+
}
94+
95+
// Derive the used ephemeral storage size from the target container
96+
// It will exec inside disk-fill helper pod & derive the used ephemeral storage space
97+
command := "du /diskfill/" + containerID
98+
exec.SetExecCommandAttributes(&execCommandDetails, "disk-fill-"+experimentsDetails.RunID, "disk-fill", experimentsDetails.ChaosNamespace)
99+
ephemeralStorageDetails, err := exec.Exec(&execCommandDetails, clients, command)
100+
if err != nil {
101+
return errors.Errorf("Unable to get ephemeral storage details due to err: %v", err)
102+
}
103+
// filtering out the used ephemeral storage from the output of du command
104+
usedEphemeralStorageSize, err := FilterUsedEphemeralStorage(ephemeralStorageDetails)
105+
if err != nil {
106+
return errors.Errorf("Unable to filter used ephemeral storage size due to err: %v", err)
107+
}
108+
log.Infof("used ephemeral storage space: %v", strconv.Itoa(usedEphemeralStorageSize))
109+
110+
// deriving the ephemeral storage size to be filled
111+
sizeTobeFilled := GetSizeToBeFilled(experimentsDetails, usedEphemeralStorageSize, int(ephemeralStorageLimit))
112+
113+
log.Infof("ephemeral storage size to be filled: %v", strconv.Itoa(sizeTobeFilled))
114+
115+
if sizeTobeFilled > 0 {
116+
// Creating files to fill the required ephemeral storage size of block size of 4K
117+
command := "dd if=/dev/urandom of=/diskfill/" + containerID + "/diskfill bs=4K count=" + strconv.Itoa(sizeTobeFilled/4)
118+
_, err = exec.Exec(&execCommandDetails, clients, command)
119+
if err != nil {
120+
return errors.Errorf("Unable to to create the files to fill the ephemeral storage due to err: %v", err)
121+
}
122+
} else {
123+
log.Warn("No required free space found!, It's Housefull")
124+
}
125+
126+
// waiting for the chaos duration
127+
log.Infof("[Wait]: Waiting for the %vs after injecting chaos", strconv.Itoa(experimentsDetails.ChaosDuration))
128+
waitForDuration(experimentsDetails.ChaosDuration)
129+
130+
// It will delete the target pod if target pod is evicted
131+
// if target pod is still running then it will delete all the files, which was created earlier during chaos execution
132+
err = Remedy(experimentsDetails, clients, containerID, appName, &execCommandDetails)
133+
if err != nil {
134+
return errors.Errorf("Unable to perform remedy operation due to err: %v", err)
135+
}
136+
137+
//Deleting the helper pod
138+
log.Info("[Cleanup]: Deleting the helper pod")
139+
err = DeleteHelperPod(experimentsDetails, clients, experimentsDetails.RunID)
140+
if err != nil {
141+
errors.Errorf("Unable to delete the helper pod, err: %v", err)
142+
}
143+
144+
//Waiting for the ramp time after chaos injection
145+
if experimentsDetails.RampTime != 0 {
146+
log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime))
147+
waitForDuration(experimentsDetails.RampTime)
148+
}
149+
return nil
150+
}
151+
152+
//GetApplicationPod will select a random replica of application pod for chaos
153+
//It will also get the node name of the application pod
154+
func GetApplicationPod(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (string, string, error) {
155+
podList, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).List(v1.ListOptions{LabelSelector: experimentsDetails.AppLabel})
156+
if err != nil || len(podList.Items) == 0 {
157+
return "", "", errors.Wrapf(err, "Fail to get the application pod in %v namespace", experimentsDetails.AppNS)
158+
}
159+
160+
rand.Seed(time.Now().Unix())
161+
randomIndex := rand.Intn(len(podList.Items))
162+
applicationName := podList.Items[randomIndex].Name
163+
nodeName := podList.Items[randomIndex].Spec.NodeName
164+
165+
return applicationName, nodeName, nil
166+
}
167+
168+
//GetTargetContainer will fetch the container name from application pod
169+
// It will return the first container name from the application pod
170+
func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) {
171+
pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{})
172+
if err != nil {
173+
return "", errors.Wrapf(err, "Fail to get the application pod status, due to:%v", err)
174+
}
175+
176+
return pod.Spec.Containers[0].Name, nil
177+
}
178+
179+
//waitForDuration waits for the given time duration (in seconds)
180+
func waitForDuration(duration int) {
181+
time.Sleep(time.Duration(duration) * time.Second)
182+
}
183+
184+
// GetRunID generate a random string
185+
func GetRunID() string {
186+
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyz")
187+
runID := make([]rune, 6)
188+
for i := range runID {
189+
runID[i] = letterRunes[rand.Intn(len(letterRunes))]
190+
}
191+
return string(runID)
192+
}
193+
194+
// CreateHelperPod derive the attributes for helper pod and create the helper pod
195+
func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, appName, appNodeName string) error {
196+
197+
mountPropagationMode := apiv1.MountPropagationHostToContainer
198+
privileged := true
199+
200+
helperPod := &apiv1.Pod{
201+
ObjectMeta: v1.ObjectMeta{
202+
Name: "disk-fill-" + experimentsDetails.RunID,
203+
Namespace: experimentsDetails.ChaosNamespace,
204+
Labels: map[string]string{
205+
"app": "disk-fill",
206+
"name": "disk-fill-" + experimentsDetails.RunID,
207+
"chaosUID": string(experimentsDetails.ChaosUID),
208+
},
209+
},
210+
Spec: apiv1.PodSpec{
211+
RestartPolicy: apiv1.RestartPolicyNever,
212+
NodeName: appNodeName,
213+
Volumes: []apiv1.Volume{
214+
{
215+
Name: "udev",
216+
VolumeSource: apiv1.VolumeSource{
217+
HostPath: &apiv1.HostPathVolumeSource{
218+
Path: experimentsDetails.ContainerPath,
219+
},
220+
},
221+
},
222+
},
223+
Containers: []apiv1.Container{
224+
{
225+
Name: "disk-fill",
226+
Image: "alpine",
227+
ImagePullPolicy: apiv1.PullAlways,
228+
Args: []string{
229+
"sleep",
230+
"10000",
231+
},
232+
VolumeMounts: []apiv1.VolumeMount{
233+
{
234+
Name: "udev",
235+
MountPath: "/diskfill",
236+
MountPropagation: &mountPropagationMode,
237+
},
238+
},
239+
SecurityContext: &apiv1.SecurityContext{
240+
Privileged: &privileged,
241+
},
242+
},
243+
},
244+
},
245+
}
246+
247+
_, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(helperPod)
248+
return err
249+
}
250+
251+
//DeleteHelperPod deletes the helper pod and wait until it got terminated
252+
func DeleteHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, runID string) error {
253+
254+
err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Delete("disk-fill-"+runID, &v1.DeleteOptions{})
255+
256+
if err != nil {
257+
return err
258+
}
259+
260+
// waiting for the termination of the pod
261+
err = retry.
262+
Times(90).
263+
Wait(1 * time.Second).
264+
Try(func(attempt uint) error {
265+
podSpec, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).List(v1.ListOptions{LabelSelector: "name=disk-fill-" + runID})
266+
if err != nil || len(podSpec.Items) != 0 {
267+
return errors.Errorf("Helper Pod is not deleted yet, err: %v", err)
268+
}
269+
return nil
270+
})
271+
272+
return err
273+
}
274+
275+
// GetEphemeralStorageAttributes derive the ephemeral storage attributes from the target pod
276+
func GetEphemeralStorageAttributes(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, podName string) (int64, int64, error) {
277+
278+
pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(podName, v1.GetOptions{})
279+
280+
if err != nil {
281+
return 0, 0, err
282+
}
283+
284+
var ephemeralStorageLimit, ephemeralStorageRequest int64
285+
containers := pod.Spec.Containers
286+
287+
// Extracting ephemeral storage limit & requested value from the target container
288+
// It will be in the form of Kb
289+
for _, container := range containers {
290+
if container.Name == experimentsDetails.TargetContainer {
291+
ephemeralStorageLimit = container.Resources.Limits.StorageEphemeral().ToDec().ScaledValue(resource.Kilo)
292+
ephemeralStorageRequest = container.Resources.Requests.StorageEphemeral().ToDec().ScaledValue(resource.Kilo)
293+
break
294+
}
295+
}
296+
297+
if ephemeralStorageRequest == 0 || ephemeralStorageLimit == 0 {
298+
return 0, 0, fmt.Errorf("No Ephemeral storage details found inside %v container", experimentsDetails.TargetContainer)
299+
}
300+
301+
return ephemeralStorageLimit, ephemeralStorageRequest, nil
302+
}
303+
304+
// GetContainerID derive the container id of the target container
305+
func GetContainerID(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, podName string) (string, error) {
306+
pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(podName, v1.GetOptions{})
307+
308+
if err != nil {
309+
return "", err
310+
}
311+
312+
var containerID string
313+
containers := pod.Status.ContainerStatuses
314+
315+
// filtering out the container id from the details of containers inside containerStatuses of the given pod
316+
// container id is present in the form of <runtime>://<container-id>
317+
for _, container := range containers {
318+
if container.Name == experimentsDetails.TargetContainer {
319+
containerID = strings.Split(container.ContainerID, "//")[1]
320+
break
321+
}
322+
}
323+
324+
return containerID, nil
325+
326+
}
327+
328+
// FilterUsedEphemeralStorage filter out the used ephemeral storage from the given string
329+
func FilterUsedEphemeralStorage(ephemeralStorageDetails string) (int, error) {
330+
331+
// Filtering out the ephemeral storage size from the output of du command
332+
// It contains details of all subdirectories of target container
333+
ephemeralStorageAll := strings.Split(ephemeralStorageDetails, "\n")
334+
// It will return the details of main directory
335+
ephemeralStorageAllDiskFill := strings.Split(ephemeralStorageAll[len(ephemeralStorageAll)-2], "\t")[0]
336+
// type casting string to interger
337+
ephemeralStorageSize, err := strconv.Atoi(ephemeralStorageAllDiskFill)
338+
return ephemeralStorageSize, err
339+
340+
}
341+
342+
// GetSizeToBeFilled generate the ephemeral storage size need to be filled
343+
func GetSizeToBeFilled(experimentsDetails *experimentTypes.ExperimentDetails, usedEphemeralStorageSize int, ephemeralStorageLimit int) int {
344+
345+
// deriving size need to be filled from the used size & requirement size to fill
346+
requirementToBeFill := (ephemeralStorageLimit * experimentsDetails.FillPercentage) / 100
347+
needToBeFilled := requirementToBeFill - usedEphemeralStorageSize
348+
return needToBeFilled
349+
}
350+
351+
// Remedy will delete the target pod if target pod is evicted
352+
// if target pod is still running then it will delete the files, which was created during chaos execution
353+
func Remedy(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, containerID string, podName string, execCommandDetails *exec.PodDetails) error {
354+
pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(podName, v1.GetOptions{})
355+
if err != nil {
356+
return err
357+
}
358+
// Deleting the pod as pod is already evicted
359+
podReason := pod.Status.Reason
360+
if podReason == "Evicted" {
361+
if err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Delete(podName, &v1.DeleteOptions{}); err != nil {
362+
return err
363+
}
364+
} else {
365+
366+
// deleting the files after chaos execution
367+
command := "rm -rf /diskfill/" + containerID + "/diskfill"
368+
_, err = exec.Exec(execCommandDetails, clients, command)
369+
if err != nil {
370+
errors.Errorf("Unable to delete files to clean ephemeral storage due to err: %v", err)
371+
}
372+
}
373+
return nil
374+
}

0 commit comments

Comments
 (0)