Skip to content

Commit beb0f25

Browse files
committed
fix: use no-large param
Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
1 parent d849c6c commit beb0f25

2 files changed

Lines changed: 18 additions & 22 deletions

File tree

services/apps/git_integration/src/crowdgit/services/software_value/main.go

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ import (
1414
)
1515

1616
func main() {
17-
numProcessors := flag.Int("num-processors", 0, "Number of parallel scc workers (0 = scc default, 1 = minimum for large repos)")
17+
noLarge := flag.Bool("no-large", false, "Skip files larger than 100MB to avoid OOM on large repos")
1818
flag.Parse()
1919

20-
response := processRepository(*numProcessors)
20+
response := processRepository(*noLarge)
2121
outputJSON(response)
2222

2323
// Always exit with code 0 - status details are in JSON response
2424
}
2525

2626
// processRepository handles the main logic and returns a StandardResponse
27-
func processRepository(numProcessors int) StandardResponse {
27+
func processRepository(noLarge bool) StandardResponse {
2828
ctx := context.Background()
2929

3030
// Get target path from remaining non-flag arguments
@@ -34,7 +34,7 @@ func processRepository(numProcessors int) StandardResponse {
3434
targetPath = args[0]
3535
} else {
3636
errorCode := ErrorCodeInvalidArguments
37-
errorMessage := fmt.Sprintf("Usage: %s [--num-processors N] <target-path>", os.Args[0])
37+
errorMessage := fmt.Sprintf("Usage: %s [--no-large] <target-path>", os.Args[0])
3838
return StandardResponse{
3939
Status: StatusFailure,
4040
ErrorCode: &errorCode,
@@ -89,7 +89,7 @@ func processRepository(numProcessors int) StandardResponse {
8989
}
9090

9191
// Process the repository with SCC
92-
report, err := getSCCReport(config.SCCPath, repoDir, numProcessors)
92+
report, err := getSCCReport(config.SCCPath, repoDir, noLarge)
9393
if err != nil {
9494
errorCode := getErrorCodeFromSCCError(err)
9595
errorMessage := fmt.Sprintf("Error processing repository '%s': %v", repoDir, err)
@@ -141,8 +141,8 @@ func processRepository(numProcessors int) StandardResponse {
141141

142142

143143
// getSCCReport analyzes a directory with scc and returns a report containing the estimated cost and language statistics.
144-
func getSCCReport(sccPath, dirPath string, numProcessors int) (SCCReport, error) {
145-
cost, err := getCost(sccPath, dirPath, numProcessors)
144+
func getSCCReport(sccPath, dirPath string, noLarge bool) (SCCReport, error) {
145+
cost, err := getCost(sccPath, dirPath, noLarge)
146146
if err != nil {
147147
return SCCReport{}, fmt.Errorf("error getting SCC report for '%s': %v", dirPath, err)
148148
}
@@ -154,7 +154,7 @@ func getSCCReport(sccPath, dirPath string, numProcessors int) (SCCReport, error)
154154

155155
projectPath := filepath.Base(dirPath)
156156

157-
langStats, err := getLanguageStats(sccPath, dirPath, numProcessors)
157+
langStats, err := getLanguageStats(sccPath, dirPath, noLarge)
158158
if err != nil {
159159
return SCCReport{}, fmt.Errorf("error getting language stats for '%s': %v", dirPath, err)
160160
}
@@ -198,8 +198,8 @@ func getGitRepositoryURL(dirPath string) (string, error) {
198198
}
199199

200200
// getCost runs the scc command and parses the output to get the estimated cost.
201-
func getCost(sccPathPath, repoPath string, numProcessors int) (float64, error) {
202-
output, err := runSCC(sccPathPath, numProcessors, "--format=short", repoPath)
201+
func getCost(sccPathPath, repoPath string, noLarge bool) (float64, error) {
202+
output, err := runSCC(sccPathPath, noLarge, "--format=short", repoPath)
203203
if err != nil {
204204
return 0, fmt.Errorf("failed to run scc command: %w", err)
205205
}
@@ -213,8 +213,8 @@ func getCost(sccPathPath, repoPath string, numProcessors int) (float64, error) {
213213
}
214214

215215
// getLanguageStats runs the scc command and parses the output to get language statistics.
216-
func getLanguageStats(sccPathPath, repoPath string, numProcessors int) ([]LanguageStats, error) {
217-
output, err := runSCC(sccPathPath, numProcessors, "--format=json", repoPath)
216+
func getLanguageStats(sccPathPath, repoPath string, noLarge bool) ([]LanguageStats, error) {
217+
output, err := runSCC(sccPathPath, noLarge, "--format=json", repoPath)
218218
if err != nil {
219219
return nil, fmt.Errorf("failed to run scc command: %w", err)
220220
}
@@ -228,15 +228,11 @@ func getLanguageStats(sccPathPath, repoPath string, numProcessors int) ([]Langua
228228
}
229229

230230
// runSCC executes the scc command with the given arguments and returns the output.
231-
// When numProcessors > 0, scc is run with reduced parallelism to limit memory usage on large repos.
232-
func runSCC(sccPathPath string, numProcessors int, args ...string) (string, error) {
231+
// When noLarge is true, files larger than 100MB are skipped to avoid OOM on large repos.
232+
func runSCC(sccPathPath string, noLarge bool, args ...string) (string, error) {
233233
var cmdArgs []string
234-
if numProcessors > 0 {
235-
n := strconv.Itoa(numProcessors)
236-
cmdArgs = append(cmdArgs,
237-
"--directory-walker-job-workers", n,
238-
"--file-process-job-workers", n,
239-
)
234+
if noLarge {
235+
cmdArgs = append(cmdArgs, "--no-large", "--large-file-limit", "100000000")
240236
}
241237
cmdArgs = append(cmdArgs, args...)
242238
cmd := exec.Command(sccPathPath, cmdArgs...)

services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ async def run(self, repo_id: str, repo_path: str) -> None:
5252
if repo_size >= _LARGE_REPO_THRESHOLD_BYTES:
5353
self.logger.info(
5454
f"Repo size {repo_size / (1024**3):.1f} GB exceeds threshold — "
55-
"running scc with num-processors=1"
55+
"running scc with no-large (skipping files >100MB)"
5656
)
57-
cmd += ["--num-processors", "1"]
57+
cmd += ["--no-large"]
5858

5959
cmd.append(repo_path)
6060

0 commit comments

Comments
 (0)