@@ -878,6 +878,45 @@ def triggerJob(jobName, parameters, jenkinsUrl = "", credentials = "")
878
878
return status
879
879
}
880
880
881
+ def launchJob (jobName , reuseBuild , enableFailFast , globalVars , platform = " x86_64" , additionalParameters = [:]) {
882
+ def parameters = getCommonParameters()
883
+ String globalVarsJson = writeJSON returnText : true , json : globalVars
884
+ parameters + = [
885
+ ' enableFailFast' : enableFailFast,
886
+ ' globalVars' : globalVarsJson,
887
+ ] + additionalParameters
888
+
889
+ if (env. alternativeTRT && platform == " x86_64" ) {
890
+ parameters + = [
891
+ ' alternativeTRT' : env. alternativeTRT,
892
+ ]
893
+ }
894
+
895
+ if (env. alternativeTrtSBSA && platform == " SBSA" ) {
896
+ parameters + = [
897
+ ' alternativeTRT' : env. alternativeTrtSBSA,
898
+ ]
899
+ }
900
+
901
+ if (env. testPhase2StageName) {
902
+ parameters + = [
903
+ ' testPhase2StageName' : env. testPhase2StageName,
904
+ ]
905
+ }
906
+
907
+ if (reuseBuild) {
908
+ parameters[' reuseArtifactPath' ] = " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ reuseBuild} "
909
+ }
910
+
911
+ echo " Trigger ${ jobName} job, params: ${ parameters} "
912
+
913
+ def status = triggerJob(jobName, parameters)
914
+ if (status != " SUCCESS" ) {
915
+ error " Downstream job did not succeed"
916
+ }
917
+ return status
918
+ }
919
+
881
920
def launchStages (pipeline , reuseBuild , testFilter , enableFailFast , globalVars )
882
921
{
883
922
stages = [
@@ -889,78 +928,88 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
889
928
" x86_64-linux" : {
890
929
script {
891
930
stage(" Build" ) {
892
- def parameters = getCommonParameters()
893
- String globalVarsJson = writeJSON returnText : true , json : globalVars
894
- parameters + = [
895
- ' enableFailFast' : enableFailFast,
931
+ def additionalParameters = [
896
932
' dockerImage' : globalVars[" LLM_DOCKER_IMAGE" ],
897
933
' wheelDockerImagePy310' : globalVars[" LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" ],
898
934
' wheelDockerImagePy312' : globalVars[" LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" ],
899
- ' globalVars' : globalVarsJson,
900
935
]
901
-
902
- if (env. alternativeTRT) {
903
- parameters + = [
904
- ' alternativeTRT' : env. alternativeTRT,
905
- ]
906
- }
907
-
908
- if (reuseBuild) {
909
- parameters[' reuseArtifactPath' ] = " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ reuseBuild} "
910
- }
911
-
912
- echo " trigger x86_64 build job, params: ${ parameters} "
913
-
914
- def status = triggerJob(" /LLM/helpers/Build-x86_64" , parameters)
915
- if (status != " SUCCESS" ) {
916
- error " Downstream job did not succeed"
917
- }
918
-
919
- }
920
- def testStageName = " [Test-x86_64] Run"
921
- if (env. localJobCredentials) {
922
- testStageName = " [Test-x86_64] Remote Run"
936
+ launchJob(" /LLM/helpers/Build-x86_64" , reuseBuild, enableFailFast, globalVars, " x86_64" , additionalParameters)
923
937
}
938
+ def testStageName = " [Test-x86_64-Single-GPU] ${ env.localJobCredentials ? "Remote Run" : "Run"} "
939
+ def singleGpuTestFailed = false
924
940
stage(testStageName) {
925
941
if (X86_TEST_CHOICE == STAGE_CHOICE_SKIP ) {
926
942
echo " x86_64 test job is skipped due to Jenkins configuration"
927
943
return
928
944
}
929
945
try {
930
- parameters = getCommonParameters()
931
946
String testFilterJson = writeJSON returnText : true , json : testFilter
932
- String globalVarsJson = writeJSON returnText : true , json : globalVars
933
- parameters + = [
934
- ' enableFailFast' : enableFailFast,
947
+ def additionalParameters = [
935
948
' testFilter' : testFilterJson,
936
949
' dockerImage' : globalVars[" LLM_DOCKER_IMAGE" ],
937
950
' wheelDockerImagePy310' : globalVars[" LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" ],
938
951
' wheelDockerImagePy312' : globalVars[" LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" ],
939
- ' globalVars' : globalVarsJson,
940
952
]
941
953
942
- if (env. alternativeTRT) {
943
- parameters + = [
944
- ' alternativeTRT' : env. alternativeTRT,
945
- ]
954
+ launchJob(" L0_Test-x86_64-Single-GPU" , false , enableFailFast, globalVars, " x86_64" , additionalParameters)
955
+ } catch (InterruptedException e) {
956
+ throw e
957
+ } catch (Exception e) {
958
+ if (X86_TEST_CHOICE == STAGE_CHOICE_IGNORE ) {
959
+ catchError(
960
+ buildResult : ' SUCCESS' ,
961
+ stageResult : ' FAILURE' ) {
962
+ error " x86_64 test failed but ignored due to Jenkins configuration"
963
+ }
964
+ } else {
965
+ catchError(
966
+ buildResult : ' FAILURE' ,
967
+ stageResult : ' FAILURE' ) {
968
+ error " x86_64 single-GPU test failed"
969
+ }
970
+ singleGpuTestFailed = true
946
971
}
972
+ }
973
+ }
947
974
948
- if (env. testPhase2StageName) {
949
- parameters + = [
950
- ' testPhase2StageName' : env. testPhase2StageName,
951
- ]
975
+ def requireMultiGpuTesting = currentBuild. description?. contains(" Require Multi-GPU Testing" ) ?: false
976
+ echo " requireMultiGpuTesting: ${ requireMultiGpuTesting} "
977
+ if (! requireMultiGpuTesting) {
978
+ return
979
+ }
980
+
981
+ if (singleGpuTestFailed) {
982
+ if (env. JOB_NAME ==~ / .*PostMerge.*/ ) {
983
+ echo " In the official post-merge pipeline, single-GPU test failed, whereas multi-GPU test is still kept running."
984
+ } else {
985
+ stage(" [Test-x86_64-Multi-GPU] Blocked" ) {
986
+ catchError(
987
+ buildResult : ' FAILURE' ,
988
+ stageResult : ' FAILURE' ) {
989
+ error " This pipeline requires running multi-GPU test, but single-GPU test has failed."
990
+ }
952
991
}
992
+ return
993
+ }
994
+ }
953
995
954
- echo " trigger x86_64 test job, params: ${ parameters} "
996
+ testStageName = " [Test-x86_64-Multi-GPU] ${ env.localJobCredentials ? "Remote Run" : "Run"} "
997
+ stage(testStageName) {
998
+ if (X86_TEST_CHOICE == STAGE_CHOICE_SKIP ) {
999
+ echo " x86_64 test job is skipped due to Jenkins configuration"
1000
+ return
1001
+ }
1002
+ try {
1003
+ def testFilterJson = writeJSON returnText : true , json : testFilter
1004
+ def additionalParameters = [
1005
+ ' testFilter' : testFilterJson,
1006
+ ' dockerImage' : globalVars[" LLM_DOCKER_IMAGE" ],
1007
+ ' wheelDockerImagePy310' : globalVars[" LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE" ],
1008
+ ' wheelDockerImagePy312' : globalVars[" LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE" ],
1009
+ ]
955
1010
956
- def status = triggerJob(
957
- " L0_Test-x86_64" ,
958
- parameters,
959
- )
1011
+ launchJob(" L0_Test-x86_64-Multi-GPU" , false , enableFailFast, globalVars, " x86_64" , additionalParameters)
960
1012
961
- if (status != " SUCCESS" ) {
962
- error " Downstream job did not succeed"
963
- }
964
1013
} catch (InterruptedException e) {
965
1014
throw e
966
1015
} catch (Exception e) {
@@ -991,79 +1040,26 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
991
1040
return
992
1041
}
993
1042
994
- def stageName = " Build"
995
- stage(stageName) {
996
- def parameters = getCommonParameters()
997
- String globalVarsJson = writeJSON returnText : true , json : globalVars
998
- parameters + = [
999
- ' enableFailFast' : enableFailFast,
1043
+ stage(" Build" ) {
1044
+ def additionalParameters = [
1000
1045
" dockerImage" : globalVars[" LLM_SBSA_DOCKER_IMAGE" ],
1001
- ' globalVars' : globalVarsJson,
1002
1046
]
1003
-
1004
- if (env. alternativeTrtSBSA) {
1005
- parameters + = [
1006
- " alternativeTRT" : env. alternativeTrtSBSA,
1007
- ]
1008
- }
1009
-
1010
- if (reuseBuild) {
1011
- parameters[' reuseArtifactPath' ] = " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ reuseBuild} "
1012
- }
1013
-
1014
- echo " trigger SBSA build job, params: ${ parameters} "
1015
-
1016
- def status = triggerJob(
1017
- " /LLM/helpers/Build-SBSA" ,
1018
- parameters,
1019
- jenkinsUrl,
1020
- credentials,
1021
- )
1022
-
1023
- if (status != " SUCCESS" ) {
1024
- error " Downstream job did not succeed"
1025
- }
1047
+ launchJob(" /LLM/helpers/Build-SBSA" , reuseBuild, enableFailFast, globalVars, " SBSA" , additionalParameters)
1026
1048
}
1027
1049
stage(testStageName) {
1028
1050
if (SBSA_TEST_CHOICE == STAGE_CHOICE_SKIP ) {
1029
1051
echo " SBSA test job is skipped due to Jenkins configuration"
1030
1052
return
1031
1053
}
1032
1054
try {
1033
- def parameters = getCommonParameters()
1034
- String testFilterJson = writeJSON returnText : true , json : testFilter
1035
- String globalVarsJson = writeJSON returnText : true , json : globalVars
1036
- parameters + = [
1037
- ' enableFailFast' : enableFailFast,
1055
+ def testFilterJson = writeJSON returnText : true , json : testFilter
1056
+ def additionalParameters = [
1038
1057
' testFilter' : testFilterJson,
1039
1058
" dockerImage" : globalVars[" LLM_SBSA_DOCKER_IMAGE" ],
1040
- ' globalVars' : globalVarsJson,
1041
1059
]
1042
1060
1043
- if (env. alternativeTrtSBSA) {
1044
- parameters + = [
1045
- " alternativeTRT" : env. alternativeTrtSBSA,
1046
- ]
1047
- }
1048
-
1049
- if (env. testPhase2StageName) {
1050
- parameters + = [
1051
- ' testPhase2StageName' : env. testPhase2StageName,
1052
- ]
1053
- }
1054
-
1055
- echo " trigger SBSA test job, params: ${ parameters} "
1061
+ launchJob(" L0_Test-SBSA" , false , enableFailFast, globalVars, " SBSA" , additionalParameters)
1056
1062
1057
- def status = triggerJob(
1058
- " L0_Test-SBSA" ,
1059
- parameters,
1060
- jenkinsUrl,
1061
- credentials,
1062
- )
1063
-
1064
- if (status != " SUCCESS" ) {
1065
- error " Downstream job did not succeed"
1066
- }
1067
1063
} catch (InterruptedException e) {
1068
1064
throw e
1069
1065
} catch (Exception e) {
@@ -1085,31 +1081,23 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
1085
1081
" Build-Docker-Images" : {
1086
1082
script {
1087
1083
stage(" [Build-Docker-Images] Remote Run" ) {
1088
- def parameters = getCommonParameters()
1089
- String globalVarsJson = writeJSON returnText : true , json : globalVars
1090
1084
def branch = env. gitlabBranch ? env. gitlabBranch : " main"
1091
1085
if (globalVars[GITHUB_PR_API_URL ]) {
1092
1086
branch = " github-pr-" + globalVars[GITHUB_PR_API_URL ]. split(' /' ). last()
1093
1087
}
1094
1088
1095
- parameters + = [
1096
- ' enableFailFast' : enableFailFast,
1089
+ def additionalParameters = [
1097
1090
' branch' : branch,
1098
1091
' action' : " push" ,
1099
1092
' triggerType' : env. JOB_NAME ==~ / .*PostMerge.*/ ? " post-merge" : " pre-merge" ,
1100
- ' globalVars' : globalVarsJson,
1101
1093
]
1102
1094
1103
- echo " trigger BuildDockerImages job, params: ${ parameters} "
1104
-
1105
- def status = triggerJob(" /LLM/helpers/BuildDockerImages" , parameters)
1106
- if (status != " SUCCESS" ) {
1107
- error " Downstream job did not succeed"
1108
- }
1095
+ launchJob(" /LLM/helpers/BuildDockerImages" , false , enableFailFast, globalVars, " x86_64" , additionalParameters)
1109
1096
}
1110
1097
}
1111
1098
}
1112
1099
]
1100
+
1113
1101
if (env. JOB_NAME ==~ / .*PostMerge.*/ ) {
1114
1102
stages + = dockerBuildJob
1115
1103
}
0 commit comments