Commit 285ea21c authored by gngeorgiev's avatar gngeorgiev
Browse files

Retry kubernetes commands when "error dialing backend: EOF" error is hit

parent 9a053dd6
......@@ -6,23 +6,31 @@ import (
"io"
"os"
"path/filepath"
"time"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
"gitlab.com/gitlab-org/gitlab-runner/common"
"gitlab.com/gitlab-org/gitlab-runner/helpers/archives"
"gitlab.com/gitlab-org/gitlab-runner/helpers/retry"
"gitlab.com/gitlab-org/gitlab-runner/log"
"gitlab.com/gitlab-org/gitlab-runner/network"
)
const DefaultUploadName = "default"
const (
DefaultUploadName = "default"
defaultTries = 3
serviceUnavailableTries = 6
)
var (
errServiceUnavailable = errors.New("service unavailable")
errTooLarge = errors.New("too large")
)
type ArtifactsUploaderCommand struct {
common.JobCredentials
fileArchiver
retryHelper
network common.Network
Name string `long:"name" description:"The name of the archive"`
......@@ -83,7 +91,7 @@ func (c *ArtifactsUploaderCommand) createReadStream() (string, io.ReadCloser, er
}
}
func (c *ArtifactsUploaderCommand) createAndUpload() error {
func (c *ArtifactsUploaderCommand) Run() error {
artifactsName, stream, err := c.createReadStream()
if err != nil {
return err
......@@ -110,14 +118,34 @@ func (c *ArtifactsUploaderCommand) createAndUpload() error {
case common.UploadForbidden:
return os.ErrPermission
case common.UploadTooLarge:
return errors.New("too large")
return errTooLarge
case common.UploadFailed:
return retryableErr{err: os.ErrInvalid}
case common.UploadServiceUnavailable:
return retryableErr{err: errServiceUnavailable}
default:
return os.ErrInvalid
}
}
func (c *ArtifactsUploaderCommand) ShouldRetry(tries int, err error) bool {
var retryableErr retryableErr
if !errors.As(err, &retryableErr) {
return false
}
maxTries := defaultTries
if errors.Is(retryableErr, errServiceUnavailable) {
maxTries = serviceUnavailableTries
}
if tries >= maxTries {
return false
}
return true
}
func (c *ArtifactsUploaderCommand) Execute(*cli.Context) {
log.SetRunnerFormatter()
......@@ -135,7 +163,9 @@ func (c *ArtifactsUploaderCommand) Execute(*cli.Context) {
}
// If the upload fails, exit with a non-zero exit code to indicate an issue?
err = c.doRetry(c.createAndUpload)
logger := logrus.WithField("context", "artifacts-uploader")
retryable := retry.New(retry.WithLogrus(c, logger))
err = retryable.Run()
if err != nil {
logrus.Fatalln(err)
}
......@@ -144,10 +174,6 @@ func (c *ArtifactsUploaderCommand) Execute(*cli.Context) {
func init() {
common.RegisterCommand2("artifacts-uploader", "create and upload build artifacts (internal)", &ArtifactsUploaderCommand{
network: network.NewGitLabClient(),
retryHelper: retryHelper{
Retry: 2,
RetryTime: time.Second,
},
Name: "artifacts",
Name: "artifacts",
})
}
package helpers
import (
"errors"
"os"
"testing"
......@@ -83,9 +84,6 @@ func TestArtifactsUploaderRetry(t *testing.T) {
cmd := ArtifactsUploaderCommand{
JobCredentials: UploaderCredentials,
network: network,
retryHelper: retryHelper{
Retry: 2,
},
fileArchiver: fileArchiver{
Paths: []string{artifactsTestArchivedFile},
},
......@@ -101,7 +99,7 @@ func TestArtifactsUploaderRetry(t *testing.T) {
cmd.Execute(nil)
})
assert.Equal(t, 3, network.uploadCalled)
assert.Equal(t, defaultTries, network.uploadCalled)
}
func TestArtifactsUploaderDefaultSucceeded(t *testing.T) {
......@@ -254,3 +252,93 @@ func TestArtifactsUploaderNoFilesDoNotGenerateError(t *testing.T) {
cmd.Execute(nil)
})
}
func TestArtifactsUploaderServiceUnavailable(t *testing.T) {
network := &testNetwork{
uploadState: common.UploadServiceUnavailable,
}
cmd := ArtifactsUploaderCommand{
JobCredentials: UploaderCredentials,
network: network,
fileArchiver: fileArchiver{
Paths: []string{artifactsTestArchivedFile},
},
}
writeTestFile(t, artifactsTestArchivedFile)
defer os.Remove(artifactsTestArchivedFile)
removeHook := helpers.MakeFatalToPanic()
defer removeHook()
assert.Panics(t, func() {
cmd.Execute(nil)
})
assert.Equal(t, serviceUnavailableTries, network.uploadCalled)
}
func TestArtifactUploaderCommandShouldRetry(t *testing.T) {
tests := map[string]struct {
err error
tries int
expectedShouldRetry bool
}{
"no error, first try": {
err: nil,
tries: 1,
expectedShouldRetry: false,
},
"random error, first try": {
err: errors.New("err"),
tries: 1,
expectedShouldRetry: false,
},
"retryable error, first try": {
err: retryableErr{},
tries: 1,
expectedShouldRetry: true,
},
"retryable error, max tries": {
err: retryableErr{},
tries: defaultTries,
expectedShouldRetry: false,
},
"retryable error, over max tries limit": {
err: retryableErr{},
tries: defaultTries + 10,
expectedShouldRetry: false,
},
"retryable error, before reaching service unavailable tries": {
err: retryableErr{err: errServiceUnavailable},
tries: serviceUnavailableTries - 1,
expectedShouldRetry: true,
},
"retryable error service unavailable, max tries": {
err: retryableErr{err: errServiceUnavailable},
tries: serviceUnavailableTries,
expectedShouldRetry: false,
},
"retryable error service unavailable, over max errors limit": {
err: retryableErr{err: errServiceUnavailable},
tries: serviceUnavailableTries + 10,
expectedShouldRetry: false,
},
}
for tn, tt := range tests {
t.Run(tn, func(t *testing.T) {
r := ArtifactsUploaderCommand{}
assert.Equal(t, tt.expectedShouldRetry, r.ShouldRetry(tt.tries, tt.err))
})
}
}
......@@ -21,6 +21,10 @@ type retryableErr struct {
err error
}
func (e retryableErr) Unwrap() error {
return e.err
}
func (e retryableErr) Error() string {
return e.err.Error()
}
......
......@@ -42,6 +42,7 @@ const (
UploadTooLarge
UploadForbidden
UploadFailed
UploadServiceUnavailable
)
const (
......
......@@ -517,6 +517,9 @@ func (n *GitLabClient) UploadRawArtifacts(config common.JobCredentials, reader i
case http.StatusRequestEntityTooLarge:
log.WithField("status", res.Status).Errorln("Uploading artifacts to coordinator...", "too large archive")
return common.UploadTooLarge
case http.StatusServiceUnavailable:
log.WithField("status", res.Status).Errorln("Uploading artifacts to coordinator...", "service unavailable")
return common.UploadServiceUnavailable
default:
log.WithField("status", res.Status).Warningln("Uploading artifacts to coordinator...", "failed")
return common.UploadFailed
......
......@@ -1035,6 +1035,9 @@ func checkTestArtifactsUploadHandlerContent(w http.ResponseWriter, r *http.Reque
statusCode: http.StatusCreated,
formValueKey: "artifact_type",
},
"service-unavailable": {
statusCode: http.StatusServiceUnavailable,
},
}
testCase, ok := cases[body]
......@@ -1153,6 +1156,10 @@ func TestArtifactsUpload(t *testing.T) {
state = uploadArtifacts(c, invalidToken, tempFile.Name(), "", ArtifactFormatDefault)
assert.Equal(t, UploadForbidden, state, "Artifacts should be rejected if invalid token")
ioutil.WriteFile(tempFile.Name(), []byte("service-unavailable"), 0600)
state = uploadArtifacts(c, config, tempFile.Name(), "", ArtifactFormatDefault)
assert.Equal(t, UploadServiceUnavailable, state, "Artifacts should get service unavailable")
}
func testArtifactsDownloadHandler(w http.ResponseWriter, r *http.Request, t *testing.T) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment