Ansible playbooks enable you to automate complex multi-server deployments, configuration management, and maintenance tasks across your entire infrastructure. This guide covers writing production-grade playbooks that manage multiple servers simultaneously with role-based organization, error handling, and idempotent operations.
Multi-Server Inventory Structure
# inventory/production/hosts.yml
all:
children:
webservers:
hosts:
web1.example.com:
ansible_host: 198.51.100.10
nginx_worker_processes: 4
web2.example.com:
ansible_host: 198.51.100.11
nginx_worker_processes: 8
vars:
ansible_user: deploy
ansible_python_interpreter: /usr/bin/python3
databases:
hosts:
db-primary.example.com:
ansible_host: 198.51.100.20
postgres_role: primary
db-replica.example.com:
ansible_host: 198.51.100.21
postgres_role: replica
vars:
postgres_version: 16
loadbalancers:
hosts:
lb1.example.com:
ansible_host: 198.51.100.5
vars:
ansible_ssh_private_key_file: ~/.ssh/deploy_key
ntp_server: pool.ntp.org
timezone: America/New_York
Multi-Server Deployment Playbook
# playbooks/deploy-webapp.yml
---
- name: Deploy Web Application
hosts: all
become: yes
serial: 1 # Rolling deployment — one server at a time
pre_tasks:
- name: Check disk space
assert:
that: ansible_mounts | selectattr('mount', 'equalto', '/') | map(attribute='size_available') | first > 1073741824
fail_msg: "Less than 1GB disk space available"
tasks:
- name: Pull latest application code
git:
repo: "https://github.com/yourorg/webapp.git"
dest: /opt/webapp
version: "{{ app_version | default('main') }}"
force: yes
notify: restart app
- name: Install dependencies
pip:
requirements: /opt/webapp/requirements.txt
virtualenv: /opt/webapp/venv
virtualenv_python: python3.11
- name: Run database migrations (only on first webserver)
command: /opt/webapp/venv/bin/python manage.py migrate --noinput
args:
chdir: /opt/webapp
run_once: true
when: "'webservers' in group_names"
- name: Collect static files
command: /opt/webapp/venv/bin/python manage.py collectstatic --noinput
args:
chdir: /opt/webapp
run_once: true
handlers:
- name: restart app
systemd:
name: webapp
state: restarted
daemon_reload: yes
post_tasks:
- name: Verify application health
uri:
url: "http://localhost:8000/health/"
status_code: 200
retries: 5
delay: 3
register: health_check
until: health_check.status == 200
Rolling Updates with Zero Downtime
# playbooks/rolling-update.yml
---
- name: Rolling Update Behind Load Balancer
hosts: webservers
become: yes
serial: 1
max_fail_percentage: 0
pre_tasks:
- name: Disable server in load balancer
community.general.haproxy:
state: disabled
host: "{{ inventory_hostname }}"
socket: /var/run/haproxy/admin.sock
delegate_to: "{{ groups['loadbalancers'][0] }}"
- name: Wait for connections to drain
wait_for:
timeout: 30
roles:
- role: deploy-app
vars:
app_version: "{{ deploy_version }}"
post_tasks:
- name: Verify app health
uri:
url: "http://localhost:8000/health/"
status_code: 200
retries: 10
delay: 5
- name: Re-enable in load balancer
community.general.haproxy:
state: enabled
host: "{{ inventory_hostname }}"
socket: /var/run/haproxy/admin.sock
delegate_to: "{{ groups['loadbalancers'][0] }}"
- name: Wait for traffic to stabilize
pause:
seconds: 10
Error Handling and Recovery
- name: Deploy with rollback capability
hosts: webservers
become: yes
tasks:
- name: Backup current version
copy:
src: /opt/webapp/
dest: /opt/webapp-backup/
remote_src: yes
ignore_errors: yes
- name: Deploy new version
block:
- name: Pull new code
git:
repo: "https://github.com/yourorg/webapp.git"
dest: /opt/webapp
version: "{{ new_version }}"
- name: Install dependencies
pip:
requirements: /opt/webapp/requirements.txt
virtualenv: /opt/webapp/venv
- name: Run migrations
command: /opt/webapp/venv/bin/python manage.py migrate
args:
chdir: /opt/webapp
- name: Restart application
systemd:
name: webapp
state: restarted
- name: Health check
uri:
url: "http://localhost:8000/health/"
status_code: 200
retries: 5
delay: 5
rescue:
- name: Rollback — restore backup
copy:
src: /opt/webapp-backup/
dest: /opt/webapp/
remote_src: yes
- name: Rollback — restart with old version
systemd:
name: webapp
state: restarted
- name: Notify about failed deployment
mail:
to: ops@example.com
subject: "FAILED: Deployment to {{ inventory_hostname }}"
body: "Deployment of {{ new_version }} failed. Rolled back."
delegate_to: localhost
always:
- name: Clean up backup
file:
path: /opt/webapp-backup
state: absent
Useful Patterns
# Run tasks on specific host groups
- name: Configure databases only
hosts: databases
tasks:
- name: Ensure PostgreSQL is configured
template:
src: postgresql.conf.j2
dest: /etc/postgresql/16/main/postgresql.conf
notify: restart postgresql
# Conditional execution
- name: Setup monitoring agents
hosts: all
tasks:
- name: Install node_exporter
package:
name: prometheus-node-exporter
state: present
when: monitoring_enabled | default(true)
# Run ad-hoc commands across all servers
# ansible all -m shell -a "df -h /" -i inventory/production/
# ansible webservers -m service -a "name=nginx state=restarted"
Best Practices
- Use
serialfor rolling deployments to avoid taking down all servers at once - Always include health checks after deployment to verify success
- Use
block/rescue/alwaysfor automatic rollback on failure - Tag your tasks so you can run specific parts:
ansible-playbook deploy.yml --tags "config" - Use
--checkmode (dry run) before applying changes to production - Keep secrets in Ansible Vault — never commit plaintext passwords