SlideShare a Scribd company logo
⽤用 Go 語⾔言
打造多台機器 Scale 架構
Bo-Yi Wu
2020/09/08
About me
• Software Engineer in Mediatek
• Member of Drone CI/CD Platform
• Member of Gitea Platform
• Member of Gin Golang Framework
• Maintain Some GitHub Actions Plugins.
• Teacher of Udemy Platform: Golang + Drone
NeuroPilot
MediaTek Ecosystem for AI Development
https://neuropilot.mediatek.com/
專案需求
• 客⼾戶單機版 (Docker 版本)
• 內建簡易易的 Queue 機制
• 公司內部架構 (軟體 + 硬體)
• 多台 Queue 機制 + 硬體模擬
每個 Job 吃 2core 8GB 記憶體
為什什麼選 Go 語⾔言
• 公司環境限制
• 保護程式邏輯
• 跨平台編譯 (Windows, Linux)
• 強⼤大 Concurrency
客⼾戶單機版
導入 Queue 機制
RabbitMQ
NSQ
Service 部分元件
• Database: SQLite (不需要 MySQL, Postgres)
• Cache: Memory (不需要 Redis)
• Queue: ⾃自⾏行行開發
客⼾戶 IT 環境
如何實作簡易易的
Queue 機制
每個 Job 吃 2core 8GB 記憶體
先了了解
Channel Blocking
https://utcc.utoronto.ca/~cks/space/blog/programming/GoConcurrencyStillNotEasy
Limit Concurrency Issue
found	:=	make(chan	int)	
			limitCh	:=	make(chan	struct{},	concurrencyProcesses)	
			for	i	:=	0;	i	<	jobCount;	i++	{	
							limitCh	<-	struct{}{}	
							go	func(val	int)	{	
											defer	func()	{	
															wg.Done()	
															<-limitCh	
											}()	
											found	<-	val	
							}(i)	
			}
jobCount = 100
concurrencyProcesses = 10
found	:=	make(chan	int)	
			limitCh	:=	make(chan	struct{},	concurrencyProcesses)	
			for	i	:=	0;	i	<	jobCount;	i++	{	
							limitCh	<-	struct{}{}	
							go	func(val	int)	{	
											defer	func()	{	
															wg.Done()	
															<-limitCh	
											}()	
											found	<-	val	
							}(i)	
			}
jobCount = 100
concurrencyProcesses = 10
解決⽅方案
將 limitCh 丟到背景處理理?
found	:=	make(chan	int)	
			limitCh	:=	make(chan	struct{},	concurrencyProcesses)	
			for	i	:=	0;	i	<	jobCount;	i++	{	
							go	func()	{	
											limitCh	<-	struct{}{}	
							}()	
							go	func(val	int)	{	
											defer	func()	{	
															<-limitCh	
															wg.Done()	
											}()	
											found	<-	val	
							}(i)	
			}
jobCount = 100
concurrencyProcesses = 10
found	:=	make(chan	int)	
			limitCh	:=	make(chan	struct{},	concurrencyProcesses)	
			for	i	:=	0;	i	<	jobCount;	i++	{	
							go	func()	{	
											limitCh	<-	struct{}{}	
							}()	
							go	func(val	int)	{	
											defer	func()	{	
															<-limitCh	
															wg.Done()	
											}()	
											found	<-	val	
							}(i)	
			}
無法解決 Limit Concurrency
jobCount = 100
concurrencyProcesses = 10
解決⽅方案
重新改寫架構
found	:=	make(chan	int)	
		queue	:=	make(chan	int)	
		go	func(queue	chan<-	int)	{	
								for	i	:=	0;	i	<	jobCount;	i++	{	
												queue	<-	i	
								}	
								close(queue)	
				}(queue)
				for	i	:=	0;	i	<	concurrencyProcesses;	i++	{	
								go	func(queue	<-chan	int,	found	chan<-	int)	{	
												for	val	:=	range	queue	{	
																defer	wg.Done()	
																found	<-	val	
												}	
								}(queue,	found)	
				}
jobCount = 100
concurrencyProcesses = 10
Internal Queue
單機版
用 Go 語言打造多台機器 Scale 架構
Setup Consumer
type Consumer struct {
inputChan chan int
jobsChan chan int
}
const PoolSize = 200
func main() {
// create the consumer
consumer := Consumer{
inputChan: make(chan int, 1),
jobsChan: make(chan int, PoolSize),
}
}
用 Go 語言打造多台機器 Scale 架構
func (c *Consumer) queue(input int) {
fmt.Println("send input value:", input)
c.jobsChan <- input
}
func (c *Consumer) worker(num int) {
for job := range c.jobsChan {
fmt.Println("worker:", num, " job value:", job)
}
}
for i := 0; i < WorkerSize; i++ {
go consumer.worker(i)
}
rewrite queue func
func (c *Consumer) queue(input int) bool {
fmt.Println("send input value:", input)
select {
case c.jobsChan <- input:
return true
default:
return false
}
}
避免使⽤用者⼤大量量送資料進來來
Shutdown with
Sigterm Handling
func WithContextFunc(ctx context.Context, f func()) context.Context {
ctx, cancel := context.WithCancel(ctx)
go func() {
c := make(chan os.Signal)
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM)
defer signal.Stop(c)
select {
case <-ctx.Done():
case <-c:
f()
cancel()
}
}()
return ctx
}
func (c Consumer) startConsumer(ctx context.Context) {
for {
select {
case job := <-c.inputChan:
if ctx.Err() != nil {
close(c.jobsChan)
return
}
c.jobsChan <- job
case <-ctx.Done():
close(c.jobsChan)
return
}
}
}
select 不保證讀取 Channel 的順序性
Cancel by ctx.Done() event
func (c *Consumer) worker(num int) {
for job := range c.jobsChan {
fmt.Println("worker:", num, " job value:", job)
}
}
Channel 關閉後,還是可以讀取資料到結束
Graceful shutdown
with worker
sync.WaitGroup
用 Go 語言打造多台機器 Scale 架構
wg := &sync.WaitGroup{}
wg.Add(WorkerSize)
// Start [PoolSize] workers
for i := 0; i < WorkerSize; i++ {
go consumer.worker(i)
}
WaitGroup
WaitGroup
WaitGroup
WaitGroup
func (c Consumer) worker(wg *sync.WaitGroup) {
defer wg.Done()
for job := range c.jobsChan {
// handle the job event
}
}
Add WaitGroup
after Cancel Function
func WithContextFunc(ctx context.Context, f func()) context.Context {
ctx, cancel := context.WithCancel(ctx)
go func() {
c := make(chan os.Signal)
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM)
defer signal.Stop(c)
select {
case <-ctx.Done():
case <-c:
cancel()
f()
}
}()
return ctx
}
Add WaitGroup after Cancel Function
wg := &sync.WaitGroup{}
wg.Add(numberOfWorkers)
ctx := signal.WithContextFunc(
context.Background(),
func() {
wg.Wait()
close(finishChan)
},
)
go consumer.startConsumer(ctx)
End of Program
select {
case <-finished:
case err := <-errChannel:
if err != nil {
return err
}
}
單機版限制
系統資源不⾜足
系統架構
Server - Agent
Server 跟 Agent 溝通方式
https://github.com/hashicorp/go-retryablehttp
r := e.Group("/rpc")
r.Use(rpc.Check())
{
r.POST("/v1/healthz", web.RPCHeartbeat)
r.POST("/v1/request", web.RPCRquest)
r.POST("/v1/accept", web.RPCAccept)
r.POST("/v1/details", web.RPCDetails)
r.POST("/v1/updateStatus", web.RPCUpdateStatus)
r.POST("/v1/upload", web.RPCUploadBytes)
r.POST("/v1/reset", web.RPCResetStatus)
}
Check RPC Secret
/rpc/v1/accept
Update jobs set version = (oldVersion + 1)
where machine = "fooBar" and version = oldVersion
Create multiple worker
if r.Capacity != 0 {
var g errgroup.Group
for i := 0; i < r.Capacity; i++ {
g.Go(func() error {
return r.start(ctx, 0)
})
time.Sleep(1 * time.Second)
}
return g.Wait()
}
單機版設定多個 Worker
for {
var (
id int64
err error
)
if id, err = r.request(ctx); err != nil {
time.Sleep(1 * time.Second)
continue
}
go func() {
if err := r.start(ctx, id); err != nil {
log.Error().Err(err).Msg("runner: cannot start the job")
}
}()
}
公司內部 + Submit Job
Break for and select loop
func (r *Runner) start(ctx context.Context, id int64) error {
LOOP:
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
r.poll(ctx, id)
if r.Capacity == 0 {
break LOOP
}
}
time.Sleep(1 * time.Second)
}
return nil
}
即時取消正在執⾏行行的任務?
用 Go 語言打造多台機器 Scale 架構
用 Go 語言打造多台機器 Scale 架構
用 Go 語言打造多台機器 Scale 架構
Context with Cancel or Timeout
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
timeout, cancel := context.WithTimeout(ctx, 60*time.Minute)
defer cancel()
Job03 context
Context with Cancel or Timeout
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
timeout, cancel := context.WithTimeout(ctx, 60*time.Minute)
defer cancel()
Job03 context
Job05 context
Watch the Cancel event (Agent)
go func() {
done, _ := r.Manager.Watch(ctx, id)
if done {
cancel()
}
}()
Handle cancel event on Server
subscribers: make(map[chan struct{}]int64),
cancelled: make(map[int64]time.Time),
User cancel running job
c.Lock()
c.cancelled[id] = time.Now().Add(time.Minute * 5)
for subscriber, build := range c.subscribers {
if id == build {
close(subscriber)
}
}
c.Unlock()
Agent subscribe the cancel event
for {
select {
case <-ctx.Done():
return false, ctx.Err()
case <-time.After(time.Minute):
c.Lock()
_, ok := c.cancelled[id]
c.Unlock()
if ok {
return true, nil
}
case <-subscriber:
return true, nil
}
}
case <-time.After(time.Minute):
c.Lock()
_, ok := c.cancelled[id]
c.Unlock()
if ok {
return true, nil
}
case <-time.After(time.Minute):
c.Lock()
_, ok := c.cancelled[id]
c.Unlock()
if ok {
return true, nil
}
1 Cancel
case <-time.After(time.Minute):
c.Lock()
_, ok := c.cancelled[id]
c.Unlock()
if ok {
return true, nil
}
1
2 Reconnect Server
Cancel
感謝參參與

More Related Content

PDF
Job Queue in Golang
PDF
Build microservice with gRPC in golang
PDF
Hiveminder - Everything but the Secret Sauce
PDF
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
PDF
How to make a large C++-code base manageable
PDF
GDG Devfest 2019 - Build go kit microservices at kubernetes with ease
PDF
Global Interpreter Lock: Episode I - Break the Seal
PPTX
C++17 now
Job Queue in Golang
Build microservice with gRPC in golang
Hiveminder - Everything but the Secret Sauce
Global Interpreter Lock: Episode III - cat &lt; /dev/zero > GIL;
How to make a large C++-code base manageable
GDG Devfest 2019 - Build go kit microservices at kubernetes with ease
Global Interpreter Lock: Episode I - Break the Seal
C++17 now

What's hot (20)

KEY
Streams are Awesome - (Node.js) TimesOpen Sep 2012
PDF
Using Node.js to Build Great Streaming Services - HTML5 Dev Conf
PDF
Profiling and optimizing go programs
PDF
RestMQ - HTTP/Redis based Message Queue
PDF
How to Write Node.js Module
PDF
Bridge TensorFlow to run on Intel nGraph backends (v0.4)
PDF
PyCon KR 2019 sprint - RustPython by example
PDF
Puppet and Openshift
PDF
GraphQL IN Golang
PPTX
Node.js/io.js Native C++ Addons
PPTX
HHVM: Efficient and Scalable PHP/Hack Execution / Guilherme Ottoni (Facebook)
PDF
Node.js streaming csv downloads proxy
PPTX
Taking Jenkins Pipeline to the Extreme
PPTX
node.js and native code extensions by example
PDF
KubeCon EU 2016: Getting the Jobs Done With Kubernetes
PDF
High Performance tDiary
PPTX
Streams for the Web
PDF
Source Plugins
PDF
Nodejs Explained with Examples
Streams are Awesome - (Node.js) TimesOpen Sep 2012
Using Node.js to Build Great Streaming Services - HTML5 Dev Conf
Profiling and optimizing go programs
RestMQ - HTTP/Redis based Message Queue
How to Write Node.js Module
Bridge TensorFlow to run on Intel nGraph backends (v0.4)
PyCon KR 2019 sprint - RustPython by example
Puppet and Openshift
GraphQL IN Golang
Node.js/io.js Native C++ Addons
HHVM: Efficient and Scalable PHP/Hack Execution / Guilherme Ottoni (Facebook)
Node.js streaming csv downloads proxy
Taking Jenkins Pipeline to the Extreme
node.js and native code extensions by example
KubeCon EU 2016: Getting the Jobs Done With Kubernetes
High Performance tDiary
Streams for the Web
Source Plugins
Nodejs Explained with Examples
Ad

More from Bo-Yi Wu (20)

PDF
Drone CI/CD 自動化測試及部署
PDF
Golang Project Layout and Practice
PDF
Introduction to GitHub Actions
PDF
Drone 1.0 Feature
PDF
Drone CI/CD Platform
PPTX
Go 語言基礎簡介
PPTX
drone continuous Integration
PPTX
Gorush: A push notification server written in Go
PPTX
用 Drone 打造 輕量級容器持續交付平台
PPTX
用 Go 語言 打造微服務架構
PPTX
Introduction to Gitea with Drone
PDF
運用 Docker 整合 Laravel 提升團隊開發效率
PDF
用 Go 語言實戰 Push Notification 服務
PPTX
用 Go 語言打造 DevOps Bot
PPTX
A painless self-hosted Git service: Gitea
PPTX
Write microservice in golang
PPTX
用 Docker 改善團隊合作模式
PPTX
Git flow 與團隊合作
PPTX
PHP & JavaScript & CSS Coding style
PPTX
Docker 基礎介紹與實戰
Drone CI/CD 自動化測試及部署
Golang Project Layout and Practice
Introduction to GitHub Actions
Drone 1.0 Feature
Drone CI/CD Platform
Go 語言基礎簡介
drone continuous Integration
Gorush: A push notification server written in Go
用 Drone 打造 輕量級容器持續交付平台
用 Go 語言 打造微服務架構
Introduction to Gitea with Drone
運用 Docker 整合 Laravel 提升團隊開發效率
用 Go 語言實戰 Push Notification 服務
用 Go 語言打造 DevOps Bot
A painless self-hosted Git service: Gitea
Write microservice in golang
用 Docker 改善團隊合作模式
Git flow 與團隊合作
PHP & JavaScript & CSS Coding style
Docker 基礎介紹與實戰
Ad

Recently uploaded (20)

PDF
Diabetes mellitus diagnosis method based random forest with bat algorithm
PDF
Build a system with the filesystem maintained by OSTree @ COSCUP 2025
PDF
gpt5_lecture_notes_comprehensive_20250812015547.pdf
PDF
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
PDF
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
PDF
Getting Started with Data Integration: FME Form 101
PDF
Electronic commerce courselecture one. Pdf
PDF
The Rise and Fall of 3GPP – Time for a Sabbatical?
PDF
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
PPT
“AI and Expert System Decision Support & Business Intelligence Systems”
PDF
Building Integrated photovoltaic BIPV_UPV.pdf
PDF
Agricultural_Statistics_at_a_Glance_2022_0.pdf
PDF
7 ChatGPT Prompts to Help You Define Your Ideal Customer Profile.pdf
PDF
Unlocking AI with Model Context Protocol (MCP)
PPTX
Digital-Transformation-Roadmap-for-Companies.pptx
PPTX
Tartificialntelligence_presentation.pptx
PDF
Network Security Unit 5.pdf for BCA BBA.
PDF
cuic standard and advanced reporting.pdf
PPTX
1. Introduction to Computer Programming.pptx
PPTX
Big Data Technologies - Introduction.pptx
Diabetes mellitus diagnosis method based random forest with bat algorithm
Build a system with the filesystem maintained by OSTree @ COSCUP 2025
gpt5_lecture_notes_comprehensive_20250812015547.pdf
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
Getting Started with Data Integration: FME Form 101
Electronic commerce courselecture one. Pdf
The Rise and Fall of 3GPP – Time for a Sabbatical?
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
“AI and Expert System Decision Support & Business Intelligence Systems”
Building Integrated photovoltaic BIPV_UPV.pdf
Agricultural_Statistics_at_a_Glance_2022_0.pdf
7 ChatGPT Prompts to Help You Define Your Ideal Customer Profile.pdf
Unlocking AI with Model Context Protocol (MCP)
Digital-Transformation-Roadmap-for-Companies.pptx
Tartificialntelligence_presentation.pptx
Network Security Unit 5.pdf for BCA BBA.
cuic standard and advanced reporting.pdf
1. Introduction to Computer Programming.pptx
Big Data Technologies - Introduction.pptx

用 Go 語言打造多台機器 Scale 架構