Skip to content

Commit 5a412b6

Browse files
qdrant-cloud-botQdrant Claw
andauthored
feat: add --wait flag to cluster key create command (#73)
* feat: add --wait flag to cluster key create command API key creation is not transactional — it takes time for a newly created key to propagate to the Qdrant cluster. The --wait flag polls the cluster's REST endpoint using the new key until it gets a successful response, confirming the key is active. Flags added: - --wait: opt-in to waiting for key activation - --wait-timeout (default 1m): maximum time to wait - --wait-poll-interval (default 1s, hidden): polling frequency Made-with: Cursor * fix: resolve lint issues in key create wait implementation - Suppress errcheck on best-effort resp.Body.Close() in probe - Remove unused *httptest.Server return from test helper Made-with: Cursor * fix: move endpoint resolution into wait poll loop A key can be created before the cluster has an endpoint (e.g. cluster still provisioning). Instead of failing early when no endpoint exists, the probe now calls GetCluster on each poll iteration and keeps retrying until the endpoint appears and the key is accepted. Made-with: Cursor * fix: address PR review feedback - Make HTTP error in probe more descriptive: include status code and URL - Use standard Go error-first checking in waitForKeyReady (if err != nil + continue) instead of positive nil check - Print the probe error in the progress message so users can see why the key is not yet active (e.g. no endpoint, HTTP 403, etc.) Made-with: Cursor * refactor: extract magic port number into defaultQdrantRESTPort const Made-with: Cursor * refactor: use dedicated HTTP client with safe timeouts for key probe Replace http.DefaultClient with a purpose-built client configured outside the closure with explicit timeouts (10s overall, 5s TLS handshake, 5s response header). Made-with: Cursor * fix: drop redundant http.Client.Timeout, rely on context deadline The per-request context already carries the user's --wait-timeout deadline. A separate client-level Timeout is redundant and could confuse readers. Transport-level timeouts (TLS handshake, response header) are kept since they guard specific connection phases. Made-with: Cursor --------- Co-authored-by: Qdrant Claw <[email protected]>
1 parent 3d2403b commit 5a412b6

3 files changed

Lines changed: 374 additions & 1 deletion

File tree

internal/cmd/cluster/key_create.go

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
package cluster
22

33
import (
4+
"context"
45
"fmt"
56
"io"
7+
"net/http"
68
"time"
79

810
"github.com/spf13/cobra"
911
"google.golang.org/protobuf/types/known/timestamppb"
1012

1113
clusterauthv2 "github.com/qdrant/qdrant-cloud-public-api/gen/go/qdrant/cloud/cluster/auth/v2"
14+
clusterv1 "github.com/qdrant/qdrant-cloud-public-api/gen/go/qdrant/cloud/cluster/v1"
1215

1316
"github.com/qdrant/qcloud-cli/internal/cmd/base"
1417
"github.com/qdrant/qcloud-cli/internal/cmd/completion"
@@ -23,7 +26,11 @@ qcloud cluster key create 7b2ea926-724b-4de2-b73a-8675c42a6ebe --name my-key
2326
2427
# Create a read-only key with expiration
2528
qcloud cluster key create 7b2ea926-724b-4de2-b73a-8675c42a6ebe \
26-
--name read-key --access-type read-only --expires 2025-12-31`,
29+
--name read-key --access-type read-only --expires 2025-12-31
30+
31+
# Create a key and wait for it to become active on the cluster
32+
qcloud cluster key create 7b2ea926-724b-4de2-b73a-8675c42a6ebe \
33+
--name my-key --wait`,
2734
BaseCobraCommand: func() *cobra.Command {
2835
cmd := &cobra.Command{
2936
Use: "create <cluster-id>",
@@ -33,6 +40,10 @@ qcloud cluster key create 7b2ea926-724b-4de2-b73a-8675c42a6ebe \
3340
cmd.Flags().String("name", "", "Name of the API key (required)")
3441
cmd.Flags().String("access-type", "", "Access type: manage or read-only (default: server assigns manage)")
3542
cmd.Flags().String("expires", "", "Expiration date in YYYY-MM-DD format")
43+
cmd.Flags().Bool("wait", false, "Wait for the API key to become active on the cluster")
44+
cmd.Flags().Duration("wait-timeout", time.Minute, "Maximum time to wait for the API key to become active")
45+
cmd.Flags().Duration("wait-poll-interval", time.Second, "How often to probe the cluster endpoint")
46+
_ = cmd.Flags().MarkHidden("wait-poll-interval")
3647
_ = cmd.MarkFlagRequired("name")
3748
return cmd
3849
},
@@ -96,6 +107,20 @@ qcloud cluster key create 7b2ea926-724b-4de2-b73a-8675c42a6ebe \
96107
return nil, fmt.Errorf("failed to create API key: %w", err)
97108
}
98109

110+
wait, _ := cmd.Flags().GetBool("wait")
111+
if !wait {
112+
return resp.GetDatabaseApiKey(), nil
113+
}
114+
115+
waitTimeout, _ := cmd.Flags().GetDuration("wait-timeout")
116+
pollInterval, _ := cmd.Flags().GetDuration("wait-poll-interval")
117+
118+
fmt.Fprintf(cmd.ErrOrStderr(), "API key created, waiting for it to become active on the cluster...\n")
119+
probe := newKeyProbe(client.Cluster(), accountID, clusterID, resp.GetDatabaseApiKey().GetKey())
120+
if err := waitForKeyReady(ctx, cmd.ErrOrStderr(), probe, waitTimeout, pollInterval); err != nil {
121+
return nil, err
122+
}
123+
99124
return resp.GetDatabaseApiKey(), nil
100125
},
101126
PrintResource: func(_ *cobra.Command, out io.Writer, key *clusterauthv2.DatabaseApiKey) {
@@ -109,3 +134,54 @@ qcloud cluster key create 7b2ea926-724b-4de2-b73a-8675c42a6ebe \
109134
ValidArgsFunction: completion.ClusterIDCompletion(s),
110135
}.CobraCommand(s)
111136
}
137+
138+
const defaultQdrantRESTPort = 6333
139+
140+
func newKeyProbe(
141+
clusterSvc clusterv1.ClusterServiceClient,
142+
accountID, clusterID, apiKey string,
143+
) func(ctx context.Context) error {
144+
httpClient := &http.Client{
145+
Transport: &http.Transport{
146+
TLSHandshakeTimeout: 5 * time.Second,
147+
ResponseHeaderTimeout: 5 * time.Second,
148+
},
149+
}
150+
151+
return func(ctx context.Context) error {
152+
clusterResp, err := clusterSvc.GetCluster(ctx, &clusterv1.GetClusterRequest{
153+
AccountId: accountID,
154+
ClusterId: clusterID,
155+
})
156+
if err != nil {
157+
return fmt.Errorf("failed to get cluster: %w", err)
158+
}
159+
160+
ep := clusterResp.GetCluster().GetState().GetEndpoint()
161+
if ep == nil || ep.GetUrl() == "" {
162+
return fmt.Errorf("cluster %s has no endpoint yet", clusterID)
163+
}
164+
165+
port := ep.GetRestPort()
166+
if port == 0 {
167+
port = defaultQdrantRESTPort
168+
}
169+
endpointURL := fmt.Sprintf("%s:%d", ep.GetUrl(), port)
170+
171+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpointURL, nil)
172+
if err != nil {
173+
return err
174+
}
175+
req.Header.Set("api-key", apiKey)
176+
resp, err := httpClient.Do(req)
177+
if err != nil {
178+
return err
179+
}
180+
defer resp.Body.Close() //nolint:errcheck // best-effort close on a read-only probe
181+
_, _ = io.Copy(io.Discard, resp.Body)
182+
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
183+
return nil
184+
}
185+
return fmt.Errorf("cluster endpoint responded with HTTP %d at %s", resp.StatusCode, endpointURL)
186+
}
187+
}

internal/cmd/cluster/key_create_test.go

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
package cluster_test
22

33
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"net/http/httptest"
8+
"net/url"
9+
"strconv"
10+
"sync/atomic"
411
"testing"
512

613
"github.com/stretchr/testify/assert"
714
"github.com/stretchr/testify/require"
815

916
clusterauthv2 "github.com/qdrant/qdrant-cloud-public-api/gen/go/qdrant/cloud/cluster/auth/v2"
17+
clusterv1 "github.com/qdrant/qdrant-cloud-public-api/gen/go/qdrant/cloud/cluster/v1"
1018

1119
"github.com/qdrant/qcloud-cli/internal/testutil"
1220
)
@@ -113,3 +121,255 @@ func TestKeyCreate_MissingName(t *testing.T) {
113121
_, _, err := testutil.Exec(t, env, "cluster", "key", "create", "cluster-123")
114122
require.Error(t, err)
115123
}
124+
125+
func TestKeyCreate_NoWait(t *testing.T) {
126+
env := testutil.NewTestEnv(t)
127+
128+
env.DatabaseApiKeyServer.CreateDatabaseApiKeyCalls.Returns(&clusterauthv2.CreateDatabaseApiKeyResponse{
129+
DatabaseApiKey: &clusterauthv2.DatabaseApiKey{
130+
Id: "key-no-wait",
131+
Key: "secret",
132+
},
133+
}, nil)
134+
135+
stdout, _, err := testutil.Exec(t, env, "cluster", "key", "create", "cluster-123", "--name", "my-key")
136+
require.NoError(t, err)
137+
assert.Contains(t, stdout, "key-no-wait")
138+
assert.Equal(t, 0, env.Server.GetClusterCalls.Count(), "GetCluster should not be called without --wait")
139+
}
140+
141+
// clusterEndpoint starts an httptest server and returns (server, schemeHost, port).
142+
// The caller is responsible for closing the server.
143+
func clusterEndpoint(t *testing.T, handler http.Handler) (host string, port int32) {
144+
t.Helper()
145+
ts := httptest.NewServer(handler)
146+
t.Cleanup(ts.Close)
147+
u, err := url.Parse(ts.URL)
148+
require.NoError(t, err)
149+
p, err := strconv.Atoi(u.Port())
150+
require.NoError(t, err)
151+
return fmt.Sprintf("http://%s", u.Hostname()), int32(p)
152+
}
153+
154+
func TestKeyCreate_WaitSuccess(t *testing.T) {
155+
env := testutil.NewTestEnv(t)
156+
157+
var calls atomic.Int32
158+
host, port := clusterEndpoint(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
159+
if r.Header.Get("api-key") != "secret-key" {
160+
w.WriteHeader(http.StatusUnauthorized)
161+
return
162+
}
163+
n := calls.Add(1)
164+
if n <= 2 {
165+
w.WriteHeader(http.StatusForbidden)
166+
return
167+
}
168+
w.WriteHeader(http.StatusOK)
169+
}))
170+
171+
env.DatabaseApiKeyServer.CreateDatabaseApiKeyCalls.Returns(&clusterauthv2.CreateDatabaseApiKeyResponse{
172+
DatabaseApiKey: &clusterauthv2.DatabaseApiKey{
173+
Id: "key-wait",
174+
Name: "wait-key",
175+
Key: "secret-key",
176+
},
177+
}, nil)
178+
179+
env.Server.GetClusterCalls.Returns(&clusterv1.GetClusterResponse{
180+
Cluster: &clusterv1.Cluster{
181+
Id: "cluster-123",
182+
State: &clusterv1.ClusterState{
183+
Phase: clusterv1.ClusterPhase_CLUSTER_PHASE_HEALTHY,
184+
Endpoint: &clusterv1.ClusterEndpoint{
185+
Url: host,
186+
RestPort: port,
187+
},
188+
},
189+
},
190+
}, nil)
191+
192+
stdout, stderr, err := testutil.Exec(t, env,
193+
"cluster", "key", "create", "cluster-123",
194+
"--name", "wait-key",
195+
"--wait",
196+
"--wait-timeout", "30s",
197+
"--wait-poll-interval", "10ms",
198+
)
199+
require.NoError(t, err)
200+
assert.Contains(t, stdout, "key-wait")
201+
assert.Contains(t, stderr, "API key is active")
202+
assert.GreaterOrEqual(t, env.Server.GetClusterCalls.Count(), 1)
203+
}
204+
205+
func TestKeyCreate_WaitTimeout(t *testing.T) {
206+
env := testutil.NewTestEnv(t)
207+
208+
host, port := clusterEndpoint(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
209+
w.WriteHeader(http.StatusForbidden)
210+
}))
211+
212+
env.DatabaseApiKeyServer.CreateDatabaseApiKeyCalls.Returns(&clusterauthv2.CreateDatabaseApiKeyResponse{
213+
DatabaseApiKey: &clusterauthv2.DatabaseApiKey{
214+
Id: "key-timeout",
215+
Key: "secret",
216+
},
217+
}, nil)
218+
219+
env.Server.GetClusterCalls.Returns(&clusterv1.GetClusterResponse{
220+
Cluster: &clusterv1.Cluster{
221+
Id: "cluster-123",
222+
State: &clusterv1.ClusterState{
223+
Phase: clusterv1.ClusterPhase_CLUSTER_PHASE_HEALTHY,
224+
Endpoint: &clusterv1.ClusterEndpoint{
225+
Url: host,
226+
RestPort: port,
227+
},
228+
},
229+
},
230+
}, nil)
231+
232+
_, _, err := testutil.Exec(t, env,
233+
"cluster", "key", "create", "cluster-123",
234+
"--name", "timeout-key",
235+
"--wait",
236+
"--wait-timeout", "200ms",
237+
"--wait-poll-interval", "10ms",
238+
)
239+
require.Error(t, err)
240+
assert.Contains(t, err.Error(), "timed out")
241+
}
242+
243+
func TestKeyCreate_WaitNoEndpoint_TimesOut(t *testing.T) {
244+
env := testutil.NewTestEnv(t)
245+
246+
env.DatabaseApiKeyServer.CreateDatabaseApiKeyCalls.Returns(&clusterauthv2.CreateDatabaseApiKeyResponse{
247+
DatabaseApiKey: &clusterauthv2.DatabaseApiKey{
248+
Id: "key-no-ep",
249+
Key: "secret",
250+
},
251+
}, nil)
252+
253+
env.Server.GetClusterCalls.Returns(&clusterv1.GetClusterResponse{
254+
Cluster: &clusterv1.Cluster{
255+
Id: "cluster-123",
256+
State: &clusterv1.ClusterState{},
257+
},
258+
}, nil)
259+
260+
_, stderr, err := testutil.Exec(t, env,
261+
"cluster", "key", "create", "cluster-123",
262+
"--name", "no-ep-key",
263+
"--wait",
264+
"--wait-timeout", "200ms",
265+
"--wait-poll-interval", "10ms",
266+
)
267+
require.Error(t, err)
268+
assert.Contains(t, err.Error(), "timed out")
269+
assert.Contains(t, stderr, "waiting for API key")
270+
}
271+
272+
func TestKeyCreate_WaitEndpointAppearsMidPoll(t *testing.T) {
273+
env := testutil.NewTestEnv(t)
274+
275+
host, port := clusterEndpoint(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
276+
w.WriteHeader(http.StatusOK)
277+
}))
278+
279+
env.DatabaseApiKeyServer.CreateDatabaseApiKeyCalls.Returns(&clusterauthv2.CreateDatabaseApiKeyResponse{
280+
DatabaseApiKey: &clusterauthv2.DatabaseApiKey{
281+
Id: "key-delayed-ep",
282+
Name: "delayed-ep-key",
283+
Key: "secret",
284+
},
285+
}, nil)
286+
287+
// First two GetCluster calls return no endpoint, then it appears.
288+
env.Server.GetClusterCalls.
289+
OnCall(0, func(_ context.Context, _ *clusterv1.GetClusterRequest) (*clusterv1.GetClusterResponse, error) {
290+
return &clusterv1.GetClusterResponse{
291+
Cluster: &clusterv1.Cluster{
292+
Id: "cluster-123",
293+
State: &clusterv1.ClusterState{Phase: clusterv1.ClusterPhase_CLUSTER_PHASE_CREATING},
294+
},
295+
}, nil
296+
}).
297+
OnCall(1, func(_ context.Context, _ *clusterv1.GetClusterRequest) (*clusterv1.GetClusterResponse, error) {
298+
return &clusterv1.GetClusterResponse{
299+
Cluster: &clusterv1.Cluster{
300+
Id: "cluster-123",
301+
State: &clusterv1.ClusterState{Phase: clusterv1.ClusterPhase_CLUSTER_PHASE_CREATING},
302+
},
303+
}, nil
304+
}).
305+
Always(func(_ context.Context, _ *clusterv1.GetClusterRequest) (*clusterv1.GetClusterResponse, error) {
306+
return &clusterv1.GetClusterResponse{
307+
Cluster: &clusterv1.Cluster{
308+
Id: "cluster-123",
309+
State: &clusterv1.ClusterState{
310+
Phase: clusterv1.ClusterPhase_CLUSTER_PHASE_HEALTHY,
311+
Endpoint: &clusterv1.ClusterEndpoint{
312+
Url: host,
313+
RestPort: port,
314+
},
315+
},
316+
},
317+
}, nil
318+
})
319+
320+
stdout, stderr, err := testutil.Exec(t, env,
321+
"cluster", "key", "create", "cluster-123",
322+
"--name", "delayed-ep-key",
323+
"--wait",
324+
"--wait-timeout", "30s",
325+
"--wait-poll-interval", "10ms",
326+
)
327+
require.NoError(t, err)
328+
assert.Contains(t, stdout, "key-delayed-ep")
329+
assert.Contains(t, stderr, "API key is active")
330+
assert.GreaterOrEqual(t, env.Server.GetClusterCalls.Count(), 3)
331+
}
332+
333+
func TestKeyCreate_WaitDefaultPort(t *testing.T) {
334+
env := testutil.NewTestEnv(t)
335+
336+
var probed atomic.Bool
337+
host, port := clusterEndpoint(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
338+
probed.Store(true)
339+
w.WriteHeader(http.StatusOK)
340+
}))
341+
342+
env.DatabaseApiKeyServer.CreateDatabaseApiKeyCalls.Returns(&clusterauthv2.CreateDatabaseApiKeyResponse{
343+
DatabaseApiKey: &clusterauthv2.DatabaseApiKey{
344+
Id: "key-default-port",
345+
Key: "secret",
346+
},
347+
}, nil)
348+
349+
// RestPort = 0 would trigger default 6333, but we can't test that with a real
350+
// httptest server. Instead, verify that the explicit port works.
351+
env.Server.GetClusterCalls.Always(func(_ context.Context, _ *clusterv1.GetClusterRequest) (*clusterv1.GetClusterResponse, error) {
352+
return &clusterv1.GetClusterResponse{
353+
Cluster: &clusterv1.Cluster{
354+
Id: "cluster-123",
355+
State: &clusterv1.ClusterState{
356+
Phase: clusterv1.ClusterPhase_CLUSTER_PHASE_HEALTHY,
357+
Endpoint: &clusterv1.ClusterEndpoint{
358+
Url: host,
359+
RestPort: port,
360+
},
361+
},
362+
},
363+
}, nil
364+
})
365+
366+
_, _, err := testutil.Exec(t, env,
367+
"cluster", "key", "create", "cluster-123",
368+
"--name", "port-key",
369+
"--wait",
370+
"--wait-timeout", "5s",
371+
"--wait-poll-interval", "10ms",
372+
)
373+
require.NoError(t, err)
374+
assert.True(t, probed.Load(), "cluster endpoint should have been probed")
375+
}

0 commit comments

Comments
 (0)