Event-Driven Ansible (EDA) extends Ansible from push-based automation to reactive, event-driven workflows. Instead of running playbooks on a schedule, EDA responds to events from monitoring systems, log aggregators, webhooks, and other sources in real-time. This guide covers setting up EDA for automated incident response and reactive infrastructure management.
What is Event-Driven Ansible?
- Sources: Listen for events from webhooks, Kafka, Prometheus AlertManager, file changes, etc.
- Rules: Define conditions that match specific events
- Actions: Trigger playbooks, modules, or commands when conditions are met
Install EDA Controller
# Install ansible-rulebook
pip install ansible-rulebook ansible-runner
# Install required collections
ansible-galaxy collection install ansible.eda
ansible-galaxy collection install community.general
# Verify installation
ansible-rulebook --version
Rulebook Structure
# rulebooks/auto-remediation.yml
---
- name: Automatic Server Remediation
hosts: all
sources:
- ansible.eda.webhook:
host: 0.0.0.0
port: 5000
rules:
- name: Restart service on crash
condition: event.payload.alert == "service_down"
action:
run_playbook:
name: playbooks/restart-service.yml
extra_vars:
target_host: "{{ event.payload.hostname }}"
service_name: "{{ event.payload.service }}"
- name: Scale up on high CPU
condition: >
event.payload.alert == "high_cpu" and
event.payload.cpu_percent > 90
action:
run_playbook:
name: playbooks/scale-response.yml
extra_vars:
target_host: "{{ event.payload.hostname }}"
cpu_percent: "{{ event.payload.cpu_percent }}"
- name: Block IP on brute force detection
condition: >
event.payload.alert == "brute_force" and
event.payload.attempts > 10
action:
run_playbook:
name: playbooks/block-ip.yml
extra_vars:
attacking_ip: "{{ event.payload.source_ip }}"
target_host: "{{ event.payload.hostname }}"
- name: Disk cleanup on low space
condition: >
event.payload.alert == "disk_space_low" and
event.payload.percent_used > 90
action:
run_playbook:
name: playbooks/disk-cleanup.yml
extra_vars:
target_host: "{{ event.payload.hostname }}"
mount_point: "{{ event.payload.mount }}"
Remediation Playbooks
# playbooks/restart-service.yml
---
- name: Restart Failed Service
hosts: "{{ target_host }}"
become: yes
tasks:
- name: Check service status
systemd:
name: "{{ service_name }}"
register: service_status
- name: Restart the service
systemd:
name: "{{ service_name }}"
state: restarted
when: service_status.status.ActiveState != "active"
- name: Wait for service to be healthy
wait_for:
port: "{{ service_port | default(omit) }}"
timeout: 60
when: service_port is defined
- name: Send notification
uri:
url: "https://hooks.slack.com/services/YOUR/WEBHOOK"
method: POST
body_format: json
body:
text: "Auto-remediation: Restarted {{ service_name }} on {{ target_host }}"
# playbooks/disk-cleanup.yml
---
- name: Emergency Disk Cleanup
hosts: "{{ target_host }}"
become: yes
tasks:
- name: Clean apt cache
apt:
autoclean: yes
autoremove: yes
- name: Rotate and compress logs
shell: |
find /var/log -name "*.log" -size +100M -exec truncate -s 0 {} \;
journalctl --vacuum-size=500M
- name: Remove old kernels
shell: |
dpkg -l 'linux-*' | awk '/^ii/{ print $2 }' | \
grep -v $(uname -r | sed 's/-generic//') | \
grep -E 'linux-(image|headers)' | \
head -n -2 | xargs apt-get -y purge
ignore_errors: yes
- name: Clean Docker (if installed)
community.docker.docker_prune:
containers: yes
images: yes
volumes: no
builder_cache: yes
ignore_errors: yes
- name: Report new disk usage
shell: df -h {{ mount_point }}
register: disk_status
- name: Notify
uri:
url: "https://hooks.slack.com/services/YOUR/WEBHOOK"
method: POST
body_format: json
body:
text: "Disk cleanup on {{ target_host }}: {{ disk_status.stdout }}"
Prometheus AlertManager Integration
# Configure AlertManager to send webhooks to EDA
# alertmanager.yml
receivers:
- name: 'eda-webhook'
webhook_configs:
- url: 'http://eda-server:5000/endpoint'
send_resolved: true
route:
receiver: 'eda-webhook'
routes:
- match:
severity: critical
receiver: 'eda-webhook'
# EDA rulebook for AlertManager events
- name: Prometheus Alert Handler
hosts: all
sources:
- ansible.eda.alertmanager:
host: 0.0.0.0
port: 5000
rules:
- name: Handle high memory alert
condition: >
event.alert.labels.alertname == "HighMemoryUsage" and
event.alert.status == "firing"
action:
run_playbook:
name: playbooks/memory-remediation.yml
extra_vars:
target_host: "{{ event.alert.labels.instance }}"
Run EDA as a Service
sudo cat > /etc/systemd/system/eda-controller.service