Unverified Commit b562ec77 authored by Steve Azzopardi's avatar Steve Azzopardi
Browse files

Fix TestBuildCancel from timing out

In https://gitlab.com/gitlab-org/gitlab-runner/-/issues/27077 we are
seeing timeouts of the `TestBuildCancel` test because when the job in
canceled we send `SIGTERM` to process that is running. After a certain
amount of time we send `SIGKILL` if that process hasn't terminated yet,
the default is [10
minutes](https://gitlab.com/gitlab-org/gitlab-runner/-/blob/56fbbb1150681915e4de33551a01cb3d4defa91c/helpers/process/killer.go#L17).
Since the default timeout of go tests is 10 minutes our test ends up
timeing out because `SIGKILL` hasn't been sent yet.

Have the GracefulKillTimeout and ForceKillTimeout configurable and set
it at a low amount for tests so we don't waste time waiting for a
process to terminate since the processes don't usually have any cleanup
to do.

closes https://gitlab.com/gitlab-org/gitlab-runner/-/issues/27077
parent 71874248
......@@ -19,6 +19,7 @@ import (
"gitlab.com/gitlab-org/gitlab-runner/helpers"
"gitlab.com/gitlab-org/gitlab-runner/helpers/docker"
"gitlab.com/gitlab-org/gitlab-runner/helpers/process"
"gitlab.com/gitlab-org/gitlab-runner/helpers/ssh"
"gitlab.com/gitlab-org/gitlab-runner/helpers/timeperiod"
"gitlab.com/gitlab-org/gitlab-runner/referees"
......@@ -438,6 +439,14 @@ type RunnerSettings struct {
Referees *referees.Config `toml:"referees,omitempty" json:"referees" group:"referees configuration" namespace:"referees"`
Cache *CacheConfig `toml:"cache,omitempty" json:"cache" group:"cache configuration" namespace:"cache"`
// GracefulKillTimeout and ForceKillTimeout aren't exposed to the users yet
// because not every executor supports it. We also have to keep in mind that
// the CustomConfig has its configuration fields for termination so when
// every executor supports graceful termination we should expose this single
// configuration for all executors.
GracefulKillTimeout *int `toml:"-"`
ForceKillTimeout *int `toml:"-"`
SSH *ssh.Config `toml:"ssh,omitempty" json:"ssh" group:"ssh executor" namespace:"ssh"`
Docker *DockerConfig `toml:"docker,omitempty" json:"docker" group:"docker executor" namespace:"docker"`
Parallels *ParallelsConfig `toml:"parallels,omitempty" json:"parallels" group:"parallels executor" namespace:"parallels"`
......@@ -498,6 +507,27 @@ func (c *CacheConfig) GetShared() bool {
return c.Shared
}
func (r *RunnerSettings) GetGracefulKillTimeout() time.Duration {
return getDuration(r.GracefulKillTimeout, process.GracefulTimeout)
}
func (r *RunnerSettings) GetForceKillTimeout() time.Duration {
return getDuration(r.ForceKillTimeout, process.KillTimeout)
}
func getDuration(source *int, defaultValue time.Duration) time.Duration {
if source == nil {
return defaultValue
}
timeout := *source
if timeout <= 0 {
return defaultValue
}
return time.Duration(timeout) * time.Second
}
func (c *SessionServer) GetSessionTimeout() time.Duration {
if c.SessionTimeout > 0 {
return time.Duration(c.SessionTimeout) * time.Second
......
......@@ -9,6 +9,8 @@ import (
"github.com/BurntSushi/toml"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab-runner/helpers/process"
)
func TestCacheS3Config_ShouldUseIAMCredentials(t *testing.T) {
......@@ -292,6 +294,17 @@ func TestConfigParse(t *testing.T) {
assert.Equal(t, []string{"e2e-az1"}, nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[1].MatchFields[0].Values)
},
},
"check that GracefulKillTimeout and ForceKillTimeout can't be set": {
config: `
[[runners]]
GracefulKillTimeout = 30
ForceKillTimeout = 10
`,
validateConfig: func(t *testing.T, config *Config) {
assert.Nil(t, config.Runners[0].GracefulKillTimeout)
assert.Nil(t, config.Runners[0].ForceKillTimeout)
},
},
}
for tn, tt := range tests {
......@@ -509,3 +522,40 @@ func TestDockerMachine(t *testing.T) {
})
}
}
func TestRunnerSettings_GetGracefulKillTimeout_GetForceKillTimeout(t *testing.T) {
tests := map[string]struct {
config RunnerSettings
expectedGracefulKillTimeout time.Duration
expectedForceKillTimeout time.Duration
}{
"undefined": {
config: RunnerSettings{},
expectedGracefulKillTimeout: process.GracefulTimeout,
expectedForceKillTimeout: process.KillTimeout,
},
"timeouts lower than 0": {
config: RunnerSettings{
GracefulKillTimeout: func(i int) *int { return &i }(-10),
ForceKillTimeout: func(i int) *int { return &i }(-10),
},
expectedGracefulKillTimeout: process.GracefulTimeout,
expectedForceKillTimeout: process.KillTimeout,
},
"timeouts greater than 0": {
config: RunnerSettings{
GracefulKillTimeout: func(i int) *int { return &i }(30),
ForceKillTimeout: func(i int) *int { return &i }(15),
},
expectedGracefulKillTimeout: 30 * time.Second,
expectedForceKillTimeout: 15 * time.Second,
},
}
for tn, tt := range tests {
t.Run(tn, func(t *testing.T) {
assert.Equal(t, tt.expectedGracefulKillTimeout, tt.config.GetGracefulKillTimeout())
assert.Equal(t, tt.expectedForceKillTimeout, tt.config.GetForceKillTimeout())
})
}
}
......@@ -204,7 +204,7 @@ func (s *executor) run(cmd common.ExecutorCommand) error {
return err
case <-cmd.Context.Done():
logger := common.NewProcessLoggerAdapter(s.BuildLogger)
return newProcessKillWaiter(logger, process.GracefulTimeout, process.KillTimeout).
return newProcessKillWaiter(logger, s.Config.GetGracefulKillTimeout(), s.Config.GetForceKillTimeout()).
KillAndWait(c, waitCh)
}
}
......
......@@ -94,9 +94,11 @@ func newBuild(t *testing.T, getBuildResponse common.JobResponse, shell string) (
JobResponse: getBuildResponse,
Runner: &common.RunnerConfig{
RunnerSettings: common.RunnerSettings{
BuildsDir: dir,
Executor: "shell",
Shell: shell,
BuildsDir: dir,
Executor: "shell",
Shell: shell,
GracefulKillTimeout: func(i int) *int { return &i }(5),
ForceKillTimeout: func(i int) *int { return &i }(1),
},
},
SystemInterrupt: make(chan os.Signal, 1),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment