@@ -14,17 +14,17 @@ import (
1414)
1515
1616func main () {
17- numProcessors := flag .Int ( "num-processors " , 0 , "Number of parallel scc workers (0 = scc default, 1 = minimum for large repos) " )
17+ noLarge := flag .Bool ( "no-large " , false , "Skip files larger than 100MB to avoid OOM on large repos" )
1818 flag .Parse ()
1919
20- response := processRepository (* numProcessors )
20+ response := processRepository (* noLarge )
2121 outputJSON (response )
2222
2323 // Always exit with code 0 - status details are in JSON response
2424}
2525
2626// processRepository handles the main logic and returns a StandardResponse
27- func processRepository (numProcessors int ) StandardResponse {
27+ func processRepository (noLarge bool ) StandardResponse {
2828 ctx := context .Background ()
2929
3030 // Get target path from remaining non-flag arguments
@@ -34,7 +34,7 @@ func processRepository(numProcessors int) StandardResponse {
3434 targetPath = args [0 ]
3535 } else {
3636 errorCode := ErrorCodeInvalidArguments
37- errorMessage := fmt .Sprintf ("Usage: %s [--num-processors N ] <target-path>" , os .Args [0 ])
37+ errorMessage := fmt .Sprintf ("Usage: %s [--no-large ] <target-path>" , os .Args [0 ])
3838 return StandardResponse {
3939 Status : StatusFailure ,
4040 ErrorCode : & errorCode ,
@@ -89,7 +89,7 @@ func processRepository(numProcessors int) StandardResponse {
8989 }
9090
9191 // Process the repository with SCC
92- report , err := getSCCReport (config .SCCPath , repoDir , numProcessors )
92+ report , err := getSCCReport (config .SCCPath , repoDir , noLarge )
9393 if err != nil {
9494 errorCode := getErrorCodeFromSCCError (err )
9595 errorMessage := fmt .Sprintf ("Error processing repository '%s': %v" , repoDir , err )
@@ -141,8 +141,8 @@ func processRepository(numProcessors int) StandardResponse {
141141
142142
143143// getSCCReport analyzes a directory with scc and returns a report containing the estimated cost and language statistics.
144- func getSCCReport (sccPath , dirPath string , numProcessors int ) (SCCReport , error ) {
145- cost , err := getCost (sccPath , dirPath , numProcessors )
144+ func getSCCReport (sccPath , dirPath string , noLarge bool ) (SCCReport , error ) {
145+ cost , err := getCost (sccPath , dirPath , noLarge )
146146 if err != nil {
147147 return SCCReport {}, fmt .Errorf ("error getting SCC report for '%s': %v" , dirPath , err )
148148 }
@@ -154,7 +154,7 @@ func getSCCReport(sccPath, dirPath string, numProcessors int) (SCCReport, error)
154154
155155 projectPath := filepath .Base (dirPath )
156156
157- langStats , err := getLanguageStats (sccPath , dirPath , numProcessors )
157+ langStats , err := getLanguageStats (sccPath , dirPath , noLarge )
158158 if err != nil {
159159 return SCCReport {}, fmt .Errorf ("error getting language stats for '%s': %v" , dirPath , err )
160160 }
@@ -198,8 +198,8 @@ func getGitRepositoryURL(dirPath string) (string, error) {
198198}
199199
200200// getCost runs the scc command and parses the output to get the estimated cost.
201- func getCost (sccPathPath , repoPath string , numProcessors int ) (float64 , error ) {
202- output , err := runSCC (sccPathPath , numProcessors , "--format=short" , repoPath )
201+ func getCost (sccPathPath , repoPath string , noLarge bool ) (float64 , error ) {
202+ output , err := runSCC (sccPathPath , noLarge , "--format=short" , repoPath )
203203 if err != nil {
204204 return 0 , fmt .Errorf ("failed to run scc command: %w" , err )
205205 }
@@ -213,8 +213,8 @@ func getCost(sccPathPath, repoPath string, numProcessors int) (float64, error) {
213213}
214214
215215// getLanguageStats runs the scc command and parses the output to get language statistics.
216- func getLanguageStats (sccPathPath , repoPath string , numProcessors int ) ([]LanguageStats , error ) {
217- output , err := runSCC (sccPathPath , numProcessors , "--format=json" , repoPath )
216+ func getLanguageStats (sccPathPath , repoPath string , noLarge bool ) ([]LanguageStats , error ) {
217+ output , err := runSCC (sccPathPath , noLarge , "--format=json" , repoPath )
218218 if err != nil {
219219 return nil , fmt .Errorf ("failed to run scc command: %w" , err )
220220 }
@@ -228,15 +228,11 @@ func getLanguageStats(sccPathPath, repoPath string, numProcessors int) ([]Langua
228228}
229229
230230// runSCC executes the scc command with the given arguments and returns the output.
231- // When numProcessors > 0, scc is run with reduced parallelism to limit memory usage on large repos.
232- func runSCC (sccPathPath string , numProcessors int , args ... string ) (string , error ) {
231+ // When noLarge is true, files larger than 100MB are skipped to avoid OOM on large repos.
232+ func runSCC (sccPathPath string , noLarge bool , args ... string ) (string , error ) {
233233 var cmdArgs []string
234- if numProcessors > 0 {
235- n := strconv .Itoa (numProcessors )
236- cmdArgs = append (cmdArgs ,
237- "--directory-walker-job-workers" , n ,
238- "--file-process-job-workers" , n ,
239- )
234+ if noLarge {
235+ cmdArgs = append (cmdArgs , "--no-large" , "--large-file-limit" , "100000000" )
240236 }
241237 cmdArgs = append (cmdArgs , args ... )
242238 cmd := exec .Command (sccPathPath , cmdArgs ... )
0 commit comments