Docs / Automation & IaC / Manage Multi-Server Infrastructure with Ansible Playbooks

Manage Multi-Server Infrastructure with Ansible Playbooks

By Admin · Mar 15, 2026 · Updated Apr 24, 2026 · 320 views · 4 min read

Ansible playbooks enable you to automate complex multi-server deployments, configuration management, and maintenance tasks across your entire infrastructure. This guide covers writing production-grade playbooks that manage multiple servers simultaneously with role-based organization, error handling, and idempotent operations.

Multi-Server Inventory Structure

# inventory/production/hosts.yml
all:
  children:
    webservers:
      hosts:
        web1.example.com:
          ansible_host: 198.51.100.10
          nginx_worker_processes: 4
        web2.example.com:
          ansible_host: 198.51.100.11
          nginx_worker_processes: 8
      vars:
        ansible_user: deploy
        ansible_python_interpreter: /usr/bin/python3

    databases:
      hosts:
        db-primary.example.com:
          ansible_host: 198.51.100.20
          postgres_role: primary
        db-replica.example.com:
          ansible_host: 198.51.100.21
          postgres_role: replica
      vars:
        postgres_version: 16

    loadbalancers:
      hosts:
        lb1.example.com:
          ansible_host: 198.51.100.5

  vars:
    ansible_ssh_private_key_file: ~/.ssh/deploy_key
    ntp_server: pool.ntp.org
    timezone: America/New_York

Multi-Server Deployment Playbook

# playbooks/deploy-webapp.yml
---
- name: Deploy Web Application
  hosts: all
  become: yes
  serial: 1  # Rolling deployment — one server at a time

  pre_tasks:
    - name: Check disk space
      assert:
        that: ansible_mounts | selectattr('mount', 'equalto', '/') | map(attribute='size_available') | first > 1073741824
        fail_msg: "Less than 1GB disk space available"

  tasks:
    - name: Pull latest application code
      git:
        repo: "https://github.com/yourorg/webapp.git"
        dest: /opt/webapp
        version: "{{ app_version | default('main') }}"
        force: yes
      notify: restart app

    - name: Install dependencies
      pip:
        requirements: /opt/webapp/requirements.txt
        virtualenv: /opt/webapp/venv
        virtualenv_python: python3.11

    - name: Run database migrations (only on first webserver)
      command: /opt/webapp/venv/bin/python manage.py migrate --noinput
      args:
        chdir: /opt/webapp
      run_once: true
      when: "'webservers' in group_names"

    - name: Collect static files
      command: /opt/webapp/venv/bin/python manage.py collectstatic --noinput
      args:
        chdir: /opt/webapp
      run_once: true

  handlers:
    - name: restart app
      systemd:
        name: webapp
        state: restarted
        daemon_reload: yes

  post_tasks:
    - name: Verify application health
      uri:
        url: "http://localhost:8000/health/"
        status_code: 200
      retries: 5
      delay: 3
      register: health_check
      until: health_check.status == 200

Rolling Updates with Zero Downtime

# playbooks/rolling-update.yml
---
- name: Rolling Update Behind Load Balancer
  hosts: webservers
  become: yes
  serial: 1
  max_fail_percentage: 0

  pre_tasks:
    - name: Disable server in load balancer
      community.general.haproxy:
        state: disabled
        host: "{{ inventory_hostname }}"
        socket: /var/run/haproxy/admin.sock
      delegate_to: "{{ groups['loadbalancers'][0] }}"

    - name: Wait for connections to drain
      wait_for:
        timeout: 30

  roles:
    - role: deploy-app
      vars:
        app_version: "{{ deploy_version }}"

  post_tasks:
    - name: Verify app health
      uri:
        url: "http://localhost:8000/health/"
        status_code: 200
      retries: 10
      delay: 5

    - name: Re-enable in load balancer
      community.general.haproxy:
        state: enabled
        host: "{{ inventory_hostname }}"
        socket: /var/run/haproxy/admin.sock
      delegate_to: "{{ groups['loadbalancers'][0] }}"

    - name: Wait for traffic to stabilize
      pause:
        seconds: 10

Error Handling and Recovery

- name: Deploy with rollback capability
  hosts: webservers
  become: yes

  tasks:
    - name: Backup current version
      copy:
        src: /opt/webapp/
        dest: /opt/webapp-backup/
        remote_src: yes
      ignore_errors: yes

    - name: Deploy new version
      block:
        - name: Pull new code
          git:
            repo: "https://github.com/yourorg/webapp.git"
            dest: /opt/webapp
            version: "{{ new_version }}"

        - name: Install dependencies
          pip:
            requirements: /opt/webapp/requirements.txt
            virtualenv: /opt/webapp/venv

        - name: Run migrations
          command: /opt/webapp/venv/bin/python manage.py migrate
          args:
            chdir: /opt/webapp

        - name: Restart application
          systemd:
            name: webapp
            state: restarted

        - name: Health check
          uri:
            url: "http://localhost:8000/health/"
            status_code: 200
          retries: 5
          delay: 5

      rescue:
        - name: Rollback — restore backup
          copy:
            src: /opt/webapp-backup/
            dest: /opt/webapp/
            remote_src: yes

        - name: Rollback — restart with old version
          systemd:
            name: webapp
            state: restarted

        - name: Notify about failed deployment
          mail:
            to: ops@example.com
            subject: "FAILED: Deployment to {{ inventory_hostname }}"
            body: "Deployment of {{ new_version }} failed. Rolled back."
          delegate_to: localhost

      always:
        - name: Clean up backup
          file:
            path: /opt/webapp-backup
            state: absent

Useful Patterns

# Run tasks on specific host groups
- name: Configure databases only
  hosts: databases
  tasks:
    - name: Ensure PostgreSQL is configured
      template:
        src: postgresql.conf.j2
        dest: /etc/postgresql/16/main/postgresql.conf
      notify: restart postgresql

# Conditional execution
- name: Setup monitoring agents
  hosts: all
  tasks:
    - name: Install node_exporter
      package:
        name: prometheus-node-exporter
        state: present
      when: monitoring_enabled | default(true)

# Run ad-hoc commands across all servers
# ansible all -m shell -a "df -h /" -i inventory/production/
# ansible webservers -m service -a "name=nginx state=restarted"

Best Practices

  • Use serial for rolling deployments to avoid taking down all servers at once
  • Always include health checks after deployment to verify success
  • Use block/rescue/always for automatic rollback on failure
  • Tag your tasks so you can run specific parts: ansible-playbook deploy.yml --tags "config"
  • Use --check mode (dry run) before applying changes to production
  • Keep secrets in Ansible Vault — never commit plaintext passwords

Was this article helpful?