RHCS ESXi 5.1 Fencing

Turns out that using the “free” version of ESXi 5.1 does not work with RHCS fencing due to SOAP limitations.

Place the following file in /usr/sbin/fence_esxi and chmod a+x /usr/sbin/fence_esxi

Make sure to install paramiko (yum install python-paramiko)
#!/usr/bin/python

import paramiko
import sys
import time
import datetime
import re
sys.path.append("/usr/share/fence")
from fencing import *

device_opt = [  "help", "version", "agent", "quiet", "verbose", "debug", "action", "ipaddr", "login", "passwd", "passwd_script", "ssl", "port", "uuid", "separator", "ipport", "power_timeout", "shell_timeout", "login_timeout", "power_wait" ]

options = check_input(device_opt, process_input(device_opt))

f = open("/var/log/cluster/fence_esxi.log","w+")
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
f.write(st + " starting fencing.\n")
f.write("-n " + options["-n"] + "\n")
f.write("-a " + options["-a"] + "\n")
f.write("-l " + options["-l"] + "\n")
f.write("-p " + options["-p"] + "\n")

client = paramiko.SSHClient()
client.load_system_host_keys()
client.connect(options["-a"],username=options["-l"],password=options["-p"])

command="esxcli vm process list | grep ^" + options["-n"]  + " -A 1 | tail -n 1 | sed \'s/  */ /g\' | cut -d \" \" -f 4"

f.write("Cmd: " + command + "\n")

stdin, stdout, stderr = client.exec_command(command)
while not stdout.channel.exit_status_ready():
        f.write("Waiting for command to finish... \n")
        time.sleep(2)

wwid = stdout.read()
f.write("wwid: " + wwid + "\n")

if len(wwid) < 2:
	f.write("VM not found or alread offline \n")
	client.close()
	sys.exit(1)

f.write("VM found \n")
command="esxcli vm process kill --type=soft --world-id=" + wwid
f.write("Cmd: " + command + "\n")

stdin, stdout, stderr = client.exec_command(command)
while not stdout.channel.exit_status_ready():
        f.write("Waiting for command to finish... \n")
        time.sleep(2)

#Give the VM some time to shut down gracefully
time.sleep(30)
f.write("Waited 30 seconds \n")

command="vm-support -V | grep centos | cut -d \"(\" -f 2 | cut -d \")\" -f 1"
f.write("Cmd: " + command + "\n")

stdin, stdout, stderr = client.exec_command(command)
while not stdout.channel.exit_status_ready():
        f.write("Waiting for command to finish... \n")
        time.sleep(2)

status = stdout.read()
f.write("VM Status: " + status + "\n")
sregex = re.compile('Running')

if sregex.search(status):
	f.write("VM still running, hard kill required \n")
	command="esxcli vm process kill --type=hard --world-id=" + wwid
	f.write("Cmd: " + command + "\n")
	stdin, stdout, stderr = client.exec_command(command)
	while not stdout.channel.exit_status_ready():
	        f.write("Waiting for command to finish... \n")
	        time.sleep(2)

	time.sleep(30)
else:
	f.write("VM successfully soft killed \n")

#Get VM info while powered off
command="vim-cmd vmsvc/getallvms | grep " + options["-n"] + " | sed 's/  */ /g' | cut -d \" \" -f 1"
f.write("Cmd: " + command + "\n")
stdin, stdout, stderr = client.exec_command(command)
while not stdout.channel.exit_status_ready():
        f.write("Waiting for command to finish... \n")
        time.sleep(2)

vmid = stdout.read()

#Start VM back up
command="vim-cmd vmsvc/power.on " + vmid
f.write("Cmd: " + command + "\n")
stdin, stdout, stderr = client.exec_command(command)

while not stdout.channel.exit_status_ready():
	f.write("Waiting for command to finish... \n")
	time.sleep(2)

f.write("fence_esxi exiting...")
f.close()
client.close()
sys.exit(0)

In your cluster.conf you would then have something like the following. Make sure to enable SSH on your ESXi hosts


<fence>
<method name="fence-cluster1">
<device name="esx1" port="centos-cluster1" ssl="on" />
</method>
</fence>

<fencedevices>
<fencedevice agent=”fence_esxi” ipaddr=”esx2.FQDN.com” login=”root” name=”esx2″ passwd=”YOURPASSWORDHERE” delay=”60″ />
</fencedevices>