SlideShare a Scribd company logo
PRACTICAL 
CELERY
CAMERON MASKE 
twitter: @cameronmaske 
email: cam@trackmaven.com 
web: http://cameronmaske.com
WHAT WE'LL 
COVER...
WHAT IS CELERY? 
HOW DOES IT WORK?
USING CELERY, BEST 
PRACTICES AND SCALING.
SURVEY
CELERY 
ASYNCHRONOUS 
DISTRIBUTED 
TASK QUEUE
OUT OF THE 
REQUEST/RESPONSE 
CYCLE. 
Example: Sending emails asynchronously.
TASKS IN THE 
BACKGROUND. 
Example: Computational heavy jobs. 
Example: Interacting with external APIs.
PERIODIC JOBS.
HISTORY 
Python. 
Released (0.1) in 2009. 
Currently on 3.1, with 3.2 in alpha. 
Developed by Ask Solem (@asksol)
ARCHITECTURE
PRODUCER 
Produces a task for the queue.
BROKER 
Stores the task backlog 
Answers, what work remains to be done? 
RabbitMQ, Redis, SQLAlchemy, Django's ORM, MongoDB...
WORKER 
Execute and consumes tasks. 
Distributed.
RESULTS BACKEND. 
Stores the results from our tasks. 
Redis, Redis, SQLAlchemy, Django's ORM, MongoDB... 
Optional!
EXAMPLE
from celery import Celery 
app = Celery('tasks', backend='amqp', broker='amqp://guest@localhost//') 
@app.task 
def add(x, y): 
return x + y
>>> result = add.delay(4, 4) 
>>> result.state 
'SUCCESS' 
>>> result.id 
'4cc7438e-afd4-4f8f-a2f3-f46567e7ca77' 
>>> result.get() 
8 
http://celery.readthedocs.org/en/latest/reference/celery.result.html
PICK YOUR FLAVOR. 
@app.task 
def add(x, y): 
return x + y 
add(2, 4) 
class AddTask(app.Task): 
def run(self, x, y): 
return x + y 
AddTask().run(2, 4)
# Async 
add.delay(2, 4) 
add.apply_aync(args=(2, 4), expires=30) 
# Eager! 
result = add.apply(args=(2, 4)) # Executes locally. 
# Or... 
add(2, 4) # Does not return a celery result!
INTERGRATING WITH 
DJANGO.
BEWARE OF DJANGO-CELERY.
http://docs.celeryproject.org/en/master/django/first-steps-with-django. 
html 
- project/ 
- config/__init__.py 
- config/settings.py 
- config/urls.py 
- manage.py
# project/config/celery.py 
from __future__ import absolute_import 
import os 
from celery import Celery 
from django.conf import settings 
# Set the default Django settings module for the 'celery' program. 
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') 
app = Celery('app') 
# Using a string here means the worker will not have to 
# pickle the object when using Windows. 
app.config_from_object('django.conf:settings') 
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) 
@app.task(bind=True) 
def debug_task(self): 
print('Request: {0!r}'.format(self.request))
# project/config/__init__.py 
from __future__ import absolute_import 
# This will make sure the app is always imported when 
# Django starts so that shared_task will use this app. 
from .celery import app as celery_app 
__all__ = ['celery_app']
celery -A project worker -l info
TESTING 
# settings.py 
import sys 
if 'test' in sys.argv: 
CELERY_EAGER_PROPAGATES_EXCEPTIONS=True, 
CELERY_ALWAYS_EAGER=True, 
BROKER_BACKEND='memory'
PATTERNS 
AND BEST 
PRACTICES.
NEVER PASS OBJECTS AS 
ARGUMENTS.
# Bad 
@app.task() 
def send_reminder(reminder): 
reminder.send_email() 
# Good 
@app.task() 
def send_reminder(pk): 
try: 
reminder = Reminder.objects.get(pk=pk) 
except Reminder.DoesNotExist: 
return 
reminder.send_email()
KEEP TASKS GRANUAL. 
CAN PROCESS MORE IN 
PARALLEL.
AVOID LAUNCHING 
SYNCHRONOUS 
SUBTASKS
# Bad 
@app.task 
def update_page_info(url): 
page = fetch_page.delay(url).get() 
info = parse_page.delay(url, page).get() 
store_page_info.delay(url, info) 
@app.task 
def fetch_page(url): 
return myhttplib.get(url) 
@app.task 
def parse_page(url, page): 
return myparser.parse_document(page) 
@app.task 
def store_page_info(url, info): 
return PageInfo.objects.create(url, info)
# Good 
def update_page_info(url): 
chain = fetch_page.s() | parse_page.s() | store_page_info.s(url) 
chain() 
@app.task() 
def fetch_page(url): 
return myhttplib.get(url) 
@app.task() 
def parse_page(page): 
return myparser.parse_document(page) 
@app.task(ignore_result=True) 
def store_page_info(info, url): 
PageInfo.objects.create(url=url, info=info) 
http://celery.readthedocs.org/en/latest/userguide/canvas.html
PERIODIC TASKS. 
http://celery.readthedocs.org/en/latest/userguide/periodic-tasks. 
html
from datetime import timedelta 
@app.periodic_task(run_every=timedelta(minutes=5)): 
def run_every_five(): 
pass
from datetime import timedelta 
class RunEveryFive(app.PeriodicTask): 
run_every = timedelta(minutes=5) 
def run(self): 
pass
from datetime import timedelta 
@app.task(): 
def run_every_five(): 
pass 
CELERYBEAT_SCHEDULE = { 
'run-every-five': { 
'task': 'tasks.run_every_five', 
'schedule': timedelta(seconds=30) 
}, 
}
CRON STYLE. 
from celery.schedules import crontab 
crontab(minute=0, hour='*/3') # Every 3 hours. 
crontab(day_of_week='sunday') # Every minute on Sundays. 
crontab(0, 0, 0, month_of_year='*/3') # First month of every quarter.
@app.periodic_task(run_every=crontab(minute=0, hour=1)) 
def schedule_emails(): 
user_ids = User.objects.values_list('id', flat=True) 
for user_id in user_ids: 
send_daily_email.delay(user_id) 
@app.task() 
def send_daily_email(user_id): 
user = User.objects.get(id=user_id) 
try: 
today = datetime.now() 
Email.objects.get( 
user=user, date__year=today.year, date__month=today.month, date__day=today.day) 
except Email.DoesNotExist: 
email = Email(user=user, body="Hey, don't forget to LOGIN PLEASE!") 
email.send() 
email.save()
CELERY BEAT A.K.A THE 
SCHEDULER. 
celery -A project beat
NEVER RUN A BEAT + 
WORKER ON A SINGLE 
CELERY PROCESS. 
# Really bad idea.... 
celery -A project worker -B
FREQUENTLY RUNNING 
PERIODIC TASKS. 
BEWARE OF "TASK STACKING"
Schedule task runs every 5 minutes. 
Tasks take 30 minutes. 
Schedule task stacks. 
Bad stuff.
EXPIRES! 
from time import sleep 
@app.periodic_task(expires=5*60, run_every=timedelta(minutes=5)) 
def schedule_task(): 
for _ in range(30): 
one_minute_task.delay() 
@app.task(expires=5*60) 
def one_minute_task(): 
sleep(60)
THINGS GO WRONG IN 
TASKS!
RETRY
from celery.exceptions import Retry 
@app.task(max_retries=10) 
def gather_data(): 
try: 
data = api.get_data() 
# etc, etc, ... 
except api.RateLimited as e: 
raise Retry(exc=e, when=e.cooldown) 
except api.IsDown: 
return
ERROR INSIGHT.
SENTRY.
STAGES
class DebugTask(app.Task): 
def after_return(self, status, retval, task_id, args, kwargs, einfo): 
print("I'm done!") 
def on_failure(self, exc, task_id, args, kwargs, einfo): 
print("I failed :(") 
def on_retry(self, exc, task_id, args, kwargs, einfo): 
print("I'll try again!") 
def on_success(self, retval, task_id, args, kwargs): 
print("I did it!")
ABSTRACT 
class AbstractTask(app.Task): 
abstract = True 
def after_return(self, *args, **kwargs): 
print("All done!") 
@app.task(base=AbstractTask) 
def add(x, y): 
return x + y
INSTANTIATION 
class DatabaseTask(app.Task): 
abstract = True 
_db = None 
@property 
def db(self): 
if self._db is None: 
self._db = Database.connect() 
return self._db
ENSURE A TASK IS 
EXECUTED ONE AT A TIME
from celery import task 
from celery.utils.log import get_task_logger 
from django.core.cache import cache 
from django.utils.hashcompat import md5_constructor as md5 
from djangofeeds.models import Feed 
logger = get_task_logger(__name__) 
LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes 
@task 
def import_feed(feed_url): 
# The cache key consists of the task name and the MD5 digest 
# of the feed URL. 
feed_url_digest = md5(feed_url).hexdigest() 
lock_id = '{0}-lock-{1}'.format(self.name, feed_url_hexdigest) 
# cache.add fails if if the key already exists 
acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE) 
# memcache delete is very slow, but we have to use it to take 
# advantage of using add() for atomic locking 
release_lock = lambda: cache.delete(lock_id) 
logger.debug('Importing feed: %s', feed_url) 
if acquire_lock(): 
try: 
feed = Feed.objects.import_feed(feed_url) 
finally: 
release_lock() 
return feed.url 
logger.debug( 
'Feed %s is already being imported by another worker', feed_url)
IMPORTANT SETTINGS
# settings.py 
CELERY_IGNORE_RESULT = True 
CELERYD_TASK_SOFT_TIME_LIMIT = 500 
CELERYD_TASK_TIME_LIMIT = 1000
# tasks.py 
app.task(ignore_result=True, soft_time_limit=60, time_limit=120) 
def add(x, y): 
pass
# settings.py 
CELERYD_MAX_TASKS_PER_CHILD = 500 
CELERYD_PREFETCH_MULTIPLIER = 4
BROKER
SO MANY 
CHOICES! 
RabbitMQ 
Redis 
SQLAlchemy 
Django's ORM 
MongoDB 
Amazon SQS 
CouchDB 
Beanstalk 
IronMQ
DJANGO ORM. 
# settings.py 
BROKER_URL = 'django://' 
INSTALLED_APPS = ( 
'kombu.transport.django', 
) CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend' 
python manage.py syncdb
DON'T DO THIS FOR 
ANYTHING SERIOUS.
USE RABBITMQ
C OPTIMIZED LIBRARY 
$ pip install librabbitmq
WORKERS
CONCURRENCY 
celery -A project worker -C 10 
celery -A project worker --autoscale=10,1
INCREASED CONCURRENCY CAN 
QUICKLY DRAIN CONNECTIONS ON 
YOUR DATABASE 
Use a connection pooler (pgbouncer).
ROUTING
CELERY_ROUTES = { 
'email.tasks.send_mail': { 
'queue': 'priority', 
}, 
} 
# or 
send_mail.apply_async(queue="priority") 
celery -A project worker -Q email
DEDICATED WORKERS.
BOTTLENECKS
Identify 
Fix 
Repeat
Make tasks faster. 
Reduce volume of tasks.
NEWRELIC
Practical Celery
MONITORING IS VITAL.
RABBITMQ MANGEMENT 
PLUGIN
RABBITMQ MANGEMENT PLUGIN 
HAS A GREAT HTTP API!
LIBRATO 
Gist!
CELERY FLOWER
QUESTIONS?

More Related Content

PDF
Celery: The Distributed Task Queue
PDF
Advanced task management with Celery
PDF
Data processing with celery and rabbit mq
PDF
An Introduction to Celery
PDF
Scaling up task processing with Celery
PDF
Why Task Queues - ComoRichWeb
KEY
Django Celery
ODP
Introduction to Python Celery
Celery: The Distributed Task Queue
Advanced task management with Celery
Data processing with celery and rabbit mq
An Introduction to Celery
Scaling up task processing with Celery
Why Task Queues - ComoRichWeb
Django Celery
Introduction to Python Celery

What's hot (20)

PPT
Asynchronous JavaScript & XML (AJAX)
PPTX
Asynchronous programming
PPTX
Javascript operators
PDF
JavaScript Fetch API
PPTX
PDF
ZIO-Direct - Functional Scala 2022
PDF
Kotlin Coroutines in Practice @ KotlinConf 2018
KEY
JavaOne 2011 - JVM Bytecode for Dummies
PPT
SQLITE Android
PDF
Functional Programming Patterns (NDC London 2014)
PPTX
Component lifecycle hooks in Angular 2.0
PDF
Mastering message queues | Tobias Nyholm | CODEiD
PPT
Collections Framework
PPTX
Joshua bloch effect java chapter 3
PDF
confirm & alert
PPTX
Dts x dicoding #2 memulai pemrograman kotlin
PDF
Introduction à ASP.NET
PDF
Javascript essentials
PDF
Introduction to RxJS
Asynchronous JavaScript & XML (AJAX)
Asynchronous programming
Javascript operators
JavaScript Fetch API
ZIO-Direct - Functional Scala 2022
Kotlin Coroutines in Practice @ KotlinConf 2018
JavaOne 2011 - JVM Bytecode for Dummies
SQLITE Android
Functional Programming Patterns (NDC London 2014)
Component lifecycle hooks in Angular 2.0
Mastering message queues | Tobias Nyholm | CODEiD
Collections Framework
Joshua bloch effect java chapter 3
confirm & alert
Dts x dicoding #2 memulai pemrograman kotlin
Introduction à ASP.NET
Javascript essentials
Introduction to RxJS
Ad

Similar to Practical Celery (20)

PDF
PDF
Celery
PDF
Celery with python
PPT
Introduction to Django-Celery and Supervisor
ODP
Europython 2011 - Playing tasks with Django & Celery
PDF
Celery by dummy
PDF
Tasks: you gotta know how to run them
PDF
Deixa para depois, Procrastinando com Celery em Python
PDF
Django Celery - A distributed task queue
PDF
PyCon India 2012: Celery Talk
PDF
[Quase] Tudo que você precisa saber sobre tarefas assíncronas
PDF
Advanced workflows
PDF
Celery - A Distributed Task Queue
PPTX
Celery in the Django
PDF
Django for IoT: From hackathon to production (DjangoCon US)
PDF
Django at Scale
PPTX
Celery workshop
PPTX
python_development.pptx
PDF
MinbilDinbil Django Speed Tricks
PDF
Django productivity tips and tricks
Celery
Celery with python
Introduction to Django-Celery and Supervisor
Europython 2011 - Playing tasks with Django & Celery
Celery by dummy
Tasks: you gotta know how to run them
Deixa para depois, Procrastinando com Celery em Python
Django Celery - A distributed task queue
PyCon India 2012: Celery Talk
[Quase] Tudo que você precisa saber sobre tarefas assíncronas
Advanced workflows
Celery - A Distributed Task Queue
Celery in the Django
Django for IoT: From hackathon to production (DjangoCon US)
Django at Scale
Celery workshop
python_development.pptx
MinbilDinbil Django Speed Tricks
Django productivity tips and tricks
Ad

Recently uploaded (20)

PDF
Chapter 3 Spatial Domain Image Processing.pdf
PDF
Dropbox Q2 2025 Financial Results & Investor Presentation
PPTX
sap open course for s4hana steps from ECC to s4
PPTX
20250228 LYD VKU AI Blended-Learning.pptx
PDF
Machine learning based COVID-19 study performance prediction
PDF
Agricultural_Statistics_at_a_Glance_2022_0.pdf
PPTX
Effective Security Operations Center (SOC) A Modern, Strategic, and Threat-In...
PDF
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
PDF
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
PPTX
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
PDF
Architecting across the Boundaries of two Complex Domains - Healthcare & Tech...
PPTX
Digital-Transformation-Roadmap-for-Companies.pptx
PPTX
Big Data Technologies - Introduction.pptx
PDF
Approach and Philosophy of On baking technology
PDF
Optimiser vos workloads AI/ML sur Amazon EC2 et AWS Graviton
PDF
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
PDF
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
PDF
Empathic Computing: Creating Shared Understanding
PPTX
ACSFv1EN-58255 AWS Academy Cloud Security Foundations.pptx
PDF
Build a system with the filesystem maintained by OSTree @ COSCUP 2025
Chapter 3 Spatial Domain Image Processing.pdf
Dropbox Q2 2025 Financial Results & Investor Presentation
sap open course for s4hana steps from ECC to s4
20250228 LYD VKU AI Blended-Learning.pptx
Machine learning based COVID-19 study performance prediction
Agricultural_Statistics_at_a_Glance_2022_0.pdf
Effective Security Operations Center (SOC) A Modern, Strategic, and Threat-In...
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
Profit Center Accounting in SAP S/4HANA, S4F28 Col11
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
Architecting across the Boundaries of two Complex Domains - Healthcare & Tech...
Digital-Transformation-Roadmap-for-Companies.pptx
Big Data Technologies - Introduction.pptx
Approach and Philosophy of On baking technology
Optimiser vos workloads AI/ML sur Amazon EC2 et AWS Graviton
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
Empathic Computing: Creating Shared Understanding
ACSFv1EN-58255 AWS Academy Cloud Security Foundations.pptx
Build a system with the filesystem maintained by OSTree @ COSCUP 2025

Practical Celery

  • 2. CAMERON MASKE twitter: @cameronmaske email: cam@trackmaven.com web: http://cameronmaske.com
  • 4. WHAT IS CELERY? HOW DOES IT WORK?
  • 5. USING CELERY, BEST PRACTICES AND SCALING.
  • 8. OUT OF THE REQUEST/RESPONSE CYCLE. Example: Sending emails asynchronously.
  • 9. TASKS IN THE BACKGROUND. Example: Computational heavy jobs. Example: Interacting with external APIs.
  • 11. HISTORY Python. Released (0.1) in 2009. Currently on 3.1, with 3.2 in alpha. Developed by Ask Solem (@asksol)
  • 13. PRODUCER Produces a task for the queue.
  • 14. BROKER Stores the task backlog Answers, what work remains to be done? RabbitMQ, Redis, SQLAlchemy, Django's ORM, MongoDB...
  • 15. WORKER Execute and consumes tasks. Distributed.
  • 16. RESULTS BACKEND. Stores the results from our tasks. Redis, Redis, SQLAlchemy, Django's ORM, MongoDB... Optional!
  • 18. from celery import Celery app = Celery('tasks', backend='amqp', broker='amqp://guest@localhost//') @app.task def add(x, y): return x + y
  • 19. >>> result = add.delay(4, 4) >>> result.state 'SUCCESS' >>> result.id '4cc7438e-afd4-4f8f-a2f3-f46567e7ca77' >>> result.get() 8 http://celery.readthedocs.org/en/latest/reference/celery.result.html
  • 20. PICK YOUR FLAVOR. @app.task def add(x, y): return x + y add(2, 4) class AddTask(app.Task): def run(self, x, y): return x + y AddTask().run(2, 4)
  • 21. # Async add.delay(2, 4) add.apply_aync(args=(2, 4), expires=30) # Eager! result = add.apply(args=(2, 4)) # Executes locally. # Or... add(2, 4) # Does not return a celery result!
  • 24. http://docs.celeryproject.org/en/master/django/first-steps-with-django. html - project/ - config/__init__.py - config/settings.py - config/urls.py - manage.py
  • 25. # project/config/celery.py from __future__ import absolute_import import os from celery import Celery from django.conf import settings # Set the default Django settings module for the 'celery' program. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings') app = Celery('app') # Using a string here means the worker will not have to # pickle the object when using Windows. app.config_from_object('django.conf:settings') app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) @app.task(bind=True) def debug_task(self): print('Request: {0!r}'.format(self.request))
  • 26. # project/config/__init__.py from __future__ import absolute_import # This will make sure the app is always imported when # Django starts so that shared_task will use this app. from .celery import app as celery_app __all__ = ['celery_app']
  • 27. celery -A project worker -l info
  • 28. TESTING # settings.py import sys if 'test' in sys.argv: CELERY_EAGER_PROPAGATES_EXCEPTIONS=True, CELERY_ALWAYS_EAGER=True, BROKER_BACKEND='memory'
  • 29. PATTERNS AND BEST PRACTICES.
  • 30. NEVER PASS OBJECTS AS ARGUMENTS.
  • 31. # Bad @app.task() def send_reminder(reminder): reminder.send_email() # Good @app.task() def send_reminder(pk): try: reminder = Reminder.objects.get(pk=pk) except Reminder.DoesNotExist: return reminder.send_email()
  • 32. KEEP TASKS GRANUAL. CAN PROCESS MORE IN PARALLEL.
  • 34. # Bad @app.task def update_page_info(url): page = fetch_page.delay(url).get() info = parse_page.delay(url, page).get() store_page_info.delay(url, info) @app.task def fetch_page(url): return myhttplib.get(url) @app.task def parse_page(url, page): return myparser.parse_document(page) @app.task def store_page_info(url, info): return PageInfo.objects.create(url, info)
  • 35. # Good def update_page_info(url): chain = fetch_page.s() | parse_page.s() | store_page_info.s(url) chain() @app.task() def fetch_page(url): return myhttplib.get(url) @app.task() def parse_page(page): return myparser.parse_document(page) @app.task(ignore_result=True) def store_page_info(info, url): PageInfo.objects.create(url=url, info=info) http://celery.readthedocs.org/en/latest/userguide/canvas.html
  • 37. from datetime import timedelta @app.periodic_task(run_every=timedelta(minutes=5)): def run_every_five(): pass
  • 38. from datetime import timedelta class RunEveryFive(app.PeriodicTask): run_every = timedelta(minutes=5) def run(self): pass
  • 39. from datetime import timedelta @app.task(): def run_every_five(): pass CELERYBEAT_SCHEDULE = { 'run-every-five': { 'task': 'tasks.run_every_five', 'schedule': timedelta(seconds=30) }, }
  • 40. CRON STYLE. from celery.schedules import crontab crontab(minute=0, hour='*/3') # Every 3 hours. crontab(day_of_week='sunday') # Every minute on Sundays. crontab(0, 0, 0, month_of_year='*/3') # First month of every quarter.
  • 41. @app.periodic_task(run_every=crontab(minute=0, hour=1)) def schedule_emails(): user_ids = User.objects.values_list('id', flat=True) for user_id in user_ids: send_daily_email.delay(user_id) @app.task() def send_daily_email(user_id): user = User.objects.get(id=user_id) try: today = datetime.now() Email.objects.get( user=user, date__year=today.year, date__month=today.month, date__day=today.day) except Email.DoesNotExist: email = Email(user=user, body="Hey, don't forget to LOGIN PLEASE!") email.send() email.save()
  • 42. CELERY BEAT A.K.A THE SCHEDULER. celery -A project beat
  • 43. NEVER RUN A BEAT + WORKER ON A SINGLE CELERY PROCESS. # Really bad idea.... celery -A project worker -B
  • 44. FREQUENTLY RUNNING PERIODIC TASKS. BEWARE OF "TASK STACKING"
  • 45. Schedule task runs every 5 minutes. Tasks take 30 minutes. Schedule task stacks. Bad stuff.
  • 46. EXPIRES! from time import sleep @app.periodic_task(expires=5*60, run_every=timedelta(minutes=5)) def schedule_task(): for _ in range(30): one_minute_task.delay() @app.task(expires=5*60) def one_minute_task(): sleep(60)
  • 47. THINGS GO WRONG IN TASKS!
  • 48. RETRY
  • 49. from celery.exceptions import Retry @app.task(max_retries=10) def gather_data(): try: data = api.get_data() # etc, etc, ... except api.RateLimited as e: raise Retry(exc=e, when=e.cooldown) except api.IsDown: return
  • 53. class DebugTask(app.Task): def after_return(self, status, retval, task_id, args, kwargs, einfo): print("I'm done!") def on_failure(self, exc, task_id, args, kwargs, einfo): print("I failed :(") def on_retry(self, exc, task_id, args, kwargs, einfo): print("I'll try again!") def on_success(self, retval, task_id, args, kwargs): print("I did it!")
  • 54. ABSTRACT class AbstractTask(app.Task): abstract = True def after_return(self, *args, **kwargs): print("All done!") @app.task(base=AbstractTask) def add(x, y): return x + y
  • 55. INSTANTIATION class DatabaseTask(app.Task): abstract = True _db = None @property def db(self): if self._db is None: self._db = Database.connect() return self._db
  • 56. ENSURE A TASK IS EXECUTED ONE AT A TIME
  • 57. from celery import task from celery.utils.log import get_task_logger from django.core.cache import cache from django.utils.hashcompat import md5_constructor as md5 from djangofeeds.models import Feed logger = get_task_logger(__name__) LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes @task def import_feed(feed_url): # The cache key consists of the task name and the MD5 digest # of the feed URL. feed_url_digest = md5(feed_url).hexdigest() lock_id = '{0}-lock-{1}'.format(self.name, feed_url_hexdigest) # cache.add fails if if the key already exists acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE) # memcache delete is very slow, but we have to use it to take # advantage of using add() for atomic locking release_lock = lambda: cache.delete(lock_id) logger.debug('Importing feed: %s', feed_url) if acquire_lock(): try: feed = Feed.objects.import_feed(feed_url) finally: release_lock() return feed.url logger.debug( 'Feed %s is already being imported by another worker', feed_url)
  • 59. # settings.py CELERY_IGNORE_RESULT = True CELERYD_TASK_SOFT_TIME_LIMIT = 500 CELERYD_TASK_TIME_LIMIT = 1000
  • 60. # tasks.py app.task(ignore_result=True, soft_time_limit=60, time_limit=120) def add(x, y): pass
  • 61. # settings.py CELERYD_MAX_TASKS_PER_CHILD = 500 CELERYD_PREFETCH_MULTIPLIER = 4
  • 63. SO MANY CHOICES! RabbitMQ Redis SQLAlchemy Django's ORM MongoDB Amazon SQS CouchDB Beanstalk IronMQ
  • 64. DJANGO ORM. # settings.py BROKER_URL = 'django://' INSTALLED_APPS = ( 'kombu.transport.django', ) CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend' python manage.py syncdb
  • 65. DON'T DO THIS FOR ANYTHING SERIOUS.
  • 67. C OPTIMIZED LIBRARY $ pip install librabbitmq
  • 69. CONCURRENCY celery -A project worker -C 10 celery -A project worker --autoscale=10,1
  • 70. INCREASED CONCURRENCY CAN QUICKLY DRAIN CONNECTIONS ON YOUR DATABASE Use a connection pooler (pgbouncer).
  • 72. CELERY_ROUTES = { 'email.tasks.send_mail': { 'queue': 'priority', }, } # or send_mail.apply_async(queue="priority") celery -A project worker -Q email
  • 76. Make tasks faster. Reduce volume of tasks.
  • 81. RABBITMQ MANGEMENT PLUGIN HAS A GREAT HTTP API!