Skip to content

Commit ba43656

Browse files
authored
Merge branch 'main' into script/CM-774
2 parents 223a96b + 2938f91 commit ba43656

8 files changed

Lines changed: 167 additions & 85 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE public.repositories DROP COLUMN enabled;
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE public.repositories ADD COLUMN enabled BOOLEAN NOT NULL DEFAULT TRUE;
2+
COMMENT ON COLUMN public.repositories.enabled IS 'Used to enable/disable repository on insights';

backend/src/services/collectionService.ts

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -384,16 +384,6 @@ export class CollectionService extends LoggerBase {
384384
}
385385
}
386386

387-
static normalizeRepositories(
388-
repositories?: string[] | { platform: string; url: string }[],
389-
): string[] {
390-
if (!repositories || repositories.length === 0) return []
391-
392-
return typeof repositories[0] === 'string'
393-
? (repositories as string[])
394-
: (repositories as { platform: string; url: string }[]).map((r) => r.url)
395-
}
396-
397387
async updateInsightsProject(insightsProjectId: string, project: Partial<ICreateInsightsProject>) {
398388
return SequelizeRepository.withTx(this.options, async (tx) => {
399389
const qx = SequelizeRepository.getQueryExecutor({ ...this.options, transaction: tx })

services/libs/data-access-layer/src/collections/index.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { QueryFilter } from '../query'
22
import { QueryExecutor } from '../queryExecutor'
3+
import { syncRepositoriesEnabledStatus } from '../repositories'
34
import { ICreateRepositoryGroup } from '../repositoryGroups'
45
import {
56
QueryResult,
@@ -310,9 +311,28 @@ export async function updateInsightsProject(
310311
throw new Error(`Update failed or project with id ${id} not found`)
311312
}
312313

314+
// Sync repositories.enabled status when repositories field is updated
315+
// Disables repos not in the new list (new repos are enabled by default on insert)
316+
if (project.repositories !== undefined) {
317+
const enabledUrls = normalizeRepositoriesToUrls(project.repositories)
318+
await syncRepositoriesEnabledStatus(qx, id, enabledUrls)
319+
}
320+
313321
return updated as IInsightsProject
314322
}
315323

324+
function normalizeRepositoriesToUrls(
325+
repositories: string[] | { platform: string; url: string }[] | undefined,
326+
): string[] {
327+
if (!repositories || repositories.length === 0) return []
328+
329+
if (typeof repositories[0] === 'string') {
330+
return repositories as string[]
331+
}
332+
333+
return (repositories as { platform: string; url: string }[]).map((r) => r.url)
334+
}
335+
316336
function prepareProject(project: Partial<ICreateInsightsProject>) {
317337
const toUpdate: Record<string, unknown> = {
318338
...project,

services/libs/data-access-layer/src/integrations/index.ts

Lines changed: 62 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,62 @@ import { getServiceChildLogger } from '@crowd/logging'
22
import { IIntegration, PlatformType } from '@crowd/types'
33

44
import { QueryExecutor } from '../queryExecutor'
5-
import { getMappedRepos } from '../segments'
5+
import { getReposBySegmentGroupedByPlatform } from '../segments'
66

77
const log = getServiceChildLogger('db.integrations')
88

9+
export function normalizeRepoUrl(url: string): string {
10+
try {
11+
const parsed = new URL(url)
12+
13+
// Normalize protocol to https
14+
parsed.protocol = 'https:'
15+
16+
// Remove www. prefix and lowercase hostname
17+
parsed.hostname = parsed.hostname.replace(/^www\./, '').toLowerCase()
18+
19+
// Lowercase path for GitHub/GitLab (case-insensitive platforms)
20+
if (parsed.hostname === 'github.com' || parsed.hostname === 'gitlab.com') {
21+
parsed.pathname = parsed.pathname.toLowerCase()
22+
}
23+
24+
// Remove trailing slashes and .git suffix
25+
parsed.pathname = parsed.pathname.replace(/\/+$/, '').replace(/\.git$/, '')
26+
27+
// Remove query string and hash
28+
parsed.search = ''
29+
parsed.hash = ''
30+
31+
return parsed.toString()
32+
} catch {
33+
return url
34+
}
35+
}
36+
37+
/**
38+
* Extracts a human-readable label from a repository URL.
39+
* For GitHub/GitLab: returns "owner/repo"
40+
* For others: returns the full URL
41+
*/
42+
export function extractLabelFromUrl(url: string): string {
43+
try {
44+
const parsed = new URL(url)
45+
if (
46+
parsed.hostname === 'github.com' ||
47+
parsed.hostname === 'gitlab.com' ||
48+
parsed.hostname.endsWith('.gitlab.com')
49+
) {
50+
return parsed.pathname
51+
.slice(1)
52+
.replace(/\.git$/, '')
53+
.replace(/\/+$/, '')
54+
}
55+
return url
56+
} catch {
57+
return url
58+
}
59+
}
60+
961
/* eslint-disable @typescript-eslint/no-explicit-any */
1062

1163
/**
@@ -609,82 +661,17 @@ export async function findRepositoriesForSegment(
609661
qx: QueryExecutor,
610662
segmentId: string,
611663
): Promise<Record<string, Array<{ url: string; label: string }>>> {
612-
const integrations = await fetchIntegrationsForSegment(qx, segmentId)
613-
614-
// Initialize result with platform arrays
615-
const result: Record<string, Array<{ url: string; label: string }>> = {
616-
git: [],
617-
github: [],
618-
gitlab: [],
619-
gerrit: [],
620-
}
621-
622-
const addToResult = (platform: PlatformType, fullUrl: string, label: string) => {
623-
const platformKey = platform.toLowerCase()
624-
if (!result[platformKey].some((item) => item.url === fullUrl)) {
625-
result[platformKey].push({ url: fullUrl, label })
626-
}
627-
}
628-
629-
// Add mapped repositories from public.repositories (GitHub and GitLab platforms)
630-
const [githubMappedRepos, githubNangoMappedRepos, gitlabMappedRepos] = await Promise.all([
631-
getMappedRepos(qx, segmentId, PlatformType.GITHUB),
632-
getMappedRepos(qx, segmentId, PlatformType.GITHUB_NANGO),
633-
getMappedRepos(qx, segmentId, PlatformType.GITLAB),
634-
])
635-
636-
for (const repo of [...githubMappedRepos, ...githubNangoMappedRepos, ...gitlabMappedRepos]) {
637-
const url = repo.url
638-
try {
639-
const parsedUrl = new URL(url)
640-
if (parsedUrl.hostname === 'github.com') {
641-
const label = parsedUrl.pathname.slice(1) // removes leading '/'
642-
addToResult(PlatformType.GITHUB, url, label)
643-
}
644-
if (parsedUrl.hostname === 'gitlab.com') {
645-
const label = parsedUrl.pathname.slice(1) // removes leading '/'
646-
addToResult(PlatformType.GITLAB, url, label)
647-
}
648-
} catch (err) {
649-
log.error({ err, repo }, 'Error parsing URL for repository!')
650-
}
651-
}
652-
653-
for (const i of integrations) {
654-
if (i.platform === PlatformType.GIT) {
655-
for (const r of (i.settings as any).remotes) {
656-
try {
657-
const url = new URL(r)
658-
let label = r
659-
660-
if (url.hostname === 'gitlab.com') {
661-
label = url.pathname.slice(1)
662-
} else if (url.hostname === 'github.com') {
663-
label = url.pathname.slice(1)
664-
}
665-
666-
addToResult(i.platform, r, label)
667-
} catch {
668-
// Invalid URL, skip
669-
}
670-
}
671-
}
664+
// Get all repos grouped by platform (github-nango merged into github)
665+
const reposByPlatform = await getReposBySegmentGroupedByPlatform(qx, segmentId, true)
672666

673-
if (i.platform === PlatformType.GITLAB) {
674-
for (const group of Object.values((i.settings as any).groupProjects) as any[]) {
675-
for (const r of group) {
676-
const label = r.path_with_namespace
677-
const fullUrl = `https://gitlab.com/${label}`
678-
addToResult(i.platform, fullUrl, label)
679-
}
680-
}
681-
}
667+
// Transform to include normalized URLs and labels
668+
const result: Record<string, Array<{ url: string; label: string }>> = {}
682669

683-
if (i.platform === PlatformType.GERRIT) {
684-
for (const r of (i.settings as any).remote.repoNames) {
685-
addToResult(i.platform, `${(i.settings as any).remote.orgURL}/q/project:${r}`, r)
686-
}
687-
}
670+
for (const [platform, urls] of Object.entries(reposByPlatform)) {
671+
result[platform] = urls.map((url) => ({
672+
url: normalizeRepoUrl(url),
673+
label: extractLabelFromUrl(url),
674+
}))
688675
}
689676

690677
return result

services/libs/data-access-layer/src/repositories/index.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ export interface IRepository {
1616
archived: boolean
1717
forkedFrom: string | null
1818
excluded: boolean
19+
enabled: boolean
1920
createdAt: string
2021
updatedAt: string
2122
deletedAt: string | null
@@ -268,6 +269,7 @@ export async function restoreRepositories(
268269
archived = COALESCE(v.archived::boolean, r.archived),
269270
"forkedFrom" = COALESCE(v."forkedFrom", r."forkedFrom"),
270271
excluded = COALESCE(v.excluded::boolean, r.excluded),
272+
enabled = true,
271273
"deletedAt" = NULL,
272274
"updatedAt" = NOW()
273275
FROM jsonb_to_recordset($(values)::jsonb) AS v(
@@ -551,3 +553,27 @@ export async function findSegmentsForRepos(
551553

552554
return results
553555
}
556+
557+
/**
558+
* Syncs repositories.enabled to match insightsProject.repositories list.
559+
*/
560+
export async function syncRepositoriesEnabledStatus(
561+
qx: QueryExecutor,
562+
insightsProjectId: string,
563+
enabledUrls: string[],
564+
): Promise<void> {
565+
const normalizedUrls = enabledUrls.map((url) => url.toLowerCase())
566+
567+
await qx.result(
568+
`
569+
UPDATE public.repositories
570+
SET
571+
enabled = LOWER(url) = ANY($(normalizedUrls)::text[]),
572+
"updatedAt" = NOW()
573+
WHERE "insightsProjectId" = $(insightsProjectId)
574+
AND "deletedAt" IS NULL
575+
AND enabled <> (LOWER(url) = ANY($(normalizedUrls)::text[]))
576+
`,
577+
{ insightsProjectId, normalizedUrls },
578+
)
579+
}

services/libs/data-access-layer/src/segments/index.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,60 @@ export async function getMappedRepos(
209209
)
210210
}
211211

212+
export interface IRepoByPlatform {
213+
url: string
214+
platform: string
215+
}
216+
217+
/**
218+
* Get all repositories for a segment, grouped by platform.
219+
* Joins with the integrations table to determine the platform for each repo.
220+
*
221+
* @param qx - Query executor
222+
* @param segmentId - The segment ID to get repos for
223+
* @param mergeGithubNango - If true, merges 'github-nango' platform into 'github' (default: true)
224+
* @returns Record of platform -> array of repo URLs
225+
*/
226+
export async function getReposBySegmentGroupedByPlatform(
227+
qx: QueryExecutor,
228+
segmentId: string,
229+
mergeGithubNango = true,
230+
): Promise<Record<string, string[]>> {
231+
const rows: IRepoByPlatform[] = await qx.select(
232+
`
233+
SELECT DISTINCT
234+
r.url,
235+
i.platform
236+
FROM public.repositories r
237+
JOIN integrations i ON r."sourceIntegrationId" = i.id
238+
WHERE r."segmentId" = $(segmentId)
239+
AND r."deletedAt" IS NULL
240+
AND i."deletedAt" IS NULL
241+
ORDER BY i.platform, r.url
242+
`,
243+
{ segmentId },
244+
)
245+
246+
const result: Record<string, string[]> = {}
247+
248+
for (const row of rows) {
249+
let platform = row.platform
250+
251+
// Merge github-nango into github if requested
252+
if (mergeGithubNango && platform === PlatformType.GITHUB_NANGO) {
253+
platform = PlatformType.GITHUB
254+
}
255+
256+
if (!result[platform]) {
257+
result[platform] = []
258+
}
259+
260+
result[platform].push(row.url)
261+
}
262+
263+
return result
264+
}
265+
212266
export async function getRepoUrlsMappedToOtherSegments(
213267
qx: QueryExecutor,
214268
urls: string[],

services/libs/tinybird/datasources/repositories.datasource

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ DESCRIPTION >
1010
- `archived` indicates whether the repository has been archived.
1111
- `forkedFrom` contains the URL of the parent repository if this is a fork (empty string if not a fork).
1212
- `excluded` indicates whether the repository is excluded from processing.
13+
- `enabled` indicates whether the repository is enabled on insights.
1314
- `createdAt` and `updatedAt` are standard timestamp fields for record lifecycle tracking.
1415
- `deletedAt` is set when the repository is soft-deleted.
1516
- `lastArchivedCheckAt` is the timestamp of the last check for repository archived status.
@@ -26,6 +27,7 @@ SCHEMA >
2627
`archived` Bool `json:$.record.archived` DEFAULT false,
2728
`forkedFrom` String `json:$.record.forkedFrom` DEFAULT '',
2829
`excluded` Bool `json:$.record.excluded` DEFAULT false,
30+
`enabled` Bool `json:$.record.enabled` DEFAULT true,
2931
`createdAt` DateTime64(3) `json:$.record.createdAt`,
3032
`updatedAt` DateTime64(3) `json:$.record.updatedAt`,
3133
`deletedAt` Nullable(DateTime64(3)) `json:$.record.deletedAt`,

0 commit comments

Comments
 (0)