This tutorial describes how to configure a two nodes cluster to support NFS server with filesystem on DRBD volume. Without further writing, read on if you are curious ;)
Prepare the disk and create the needed vg/lv on both nodes:
cfdisk /dev/sdb
pvcreate /dev/sdb1
vgcreate nfsvg /dev/sdb1
lvcreate -n nfs -L25G nfsvg
lvcreate -n drbd -L150M nfsvg
The output of work would be:
root@eave:~# cfdisk /dev/sdb Disk has been changed. WARNING: If you have created or modified any DOS 6.x partitions, please see the cfdisk manual page for additional information. root@eave:~# pvcreate /dev/sdb1 Writing physical volume data to disk "/dev/sdb1" Physical volume "/dev/sdb1" successfully created root@eave:~# vgcreate nfsvg /dev/sdb1 Volume group "nfsvg" successfully created root@eave:~# lvcreate -n nfs -L25G nfsvg Logical volume "nfs" created root@eave:~# lvcreate -n drbd -L150M nfsvg Rounding up size to full physical extent 152.00 MiB Logical volume "drbd" created root@eave:~# lvscan | grep nfs ACTIVE '/dev/nfsvg/nfs' [25.00 GiB] inherit ACTIVE '/dev/nfsvg/drbd' [152.00 MiB] inherit
root@candy:~# cfdisk /dev/sdb Disk has been changed. WARNING: If you have created or modified any DOS 6.x partitions, please see the cfdisk manual page for additional information. root@candy:~# pvcreate /dev/sdb1 Writing physical volume data to disk "/dev/sdb1" Physical volume "/dev/sdb1" successfully created root@candy:~# vgcreate nfsvg /dev/sdb1 Volume group "nfsvg" successfully created root@candy:~# lvcreate -n nfs -L25G nfsvg Logical volume "nfs" created root@candy:~# lvcreate -n drbd -L150M nfsvg Rounding up size to full physical extent 152.00 MiB Logical volume "drbd" created root@candy:~# lvscan | grep nfs ACTIVE '/dev/nfsvg/nfs' [25.00 GiB] inherit ACTIVE '/dev/nfsvg/drbd' [152.00 MiB] inherit
Make sure the time is always right on both nodes. For that install and configure ntp client:
aptitude install ntp ntpdate
date
cp /usr/share/zoneinfo/Europe/Bucharest /etc/localtime
echo "Europe/Bucharest" > /etc/timezone
date
diff -s /etc/localtime /usr/share/zoneinfo/`cat /etc/timezone`
IF you have a gateway with access to both nodes, check if date is OK:
for i in candy eave; do ssh $i date; done
Work output:
root@eave:~# aptitude install ntp ntpdate The following NEW packages will be installed: libopts25{a} lockfile-progs{a} ntp ntpdate 0 packages upgraded, 4 newly installed, 0 to remove and 0 not upgraded. Need to get 729 kB of archives. After unpacking 1,765 kB will be used. Do you want to continue? [Y/n/?] y Get: 1 http://ftp.us.debian.org/debian/ wheezy/main libopts25 amd64 1:5.12-0.1 [73.8 kB] Get: 2 http://ftp.us.debian.org/debian/ wheezy/main ntp amd64 1:4.2.6.p5+dfsg-2 [562 kB] Get: 3 http://ftp.us.debian.org/debian/ wheezy/main ntpdate amd64 1:4.2.6.p5+dfsg-2 [82.6 kB] Get: 4 http://ftp.us.debian.org/debian/ wheezy/main lockfile-progs amd64 0.1.17 [11.0 kB] Fetched 729 kB in 2s (322 kB/s) Selecting previously unselected package libopts25. (Reading database ... 38478 files and directories currently installed.) Unpacking libopts25 (from .../libopts25_1%3a5.12-0.1_amd64.deb) ... Selecting previously unselected package ntp. Unpacking ntp (from .../ntp_1%3a4.2.6.p5+dfsg-2_amd64.deb) ... Selecting previously unselected package ntpdate. Unpacking ntpdate (from .../ntpdate_1%3a4.2.6.p5+dfsg-2_amd64.deb) ... Selecting previously unselected package lockfile-progs. Unpacking lockfile-progs (from .../lockfile-progs_0.1.17_amd64.deb) ... Processing triggers for man-db ... Setting up libopts25 (1:5.12-0.1) ... Setting up ntp (1:4.2.6.p5+dfsg-2) ... Starting NTP server: ntpd. Setting up ntpdate (1:4.2.6.p5+dfsg-2) ... Setting up lockfile-progs (0.1.17) ... root@eave:~# date Thu Nov 27 05:18:24 CST 2014 root@eave:~# cp /usr/share/zoneinfo/Europe/Bucharest /etc/localtime root@eave:~# echo "Europe/Bucharest" > /etc/timezone root@eave:~# date Thu Nov 27 12:18:30 EST 2014 root@eave:~# diff -s /etc/localtime /usr/share/zoneinfo/`cat /etc/timezone` Files /etc/localtime and /usr/share/zoneinfo/Europe/Bucharest are identical root@eave:~# /usr/sbin/ntpdate -s -b 192.168.11.1 root@eave:~# date Thu Nov 27 12:18:37 EST 2014
root@candy:~# aptitude install ntp ntpdate The following NEW packages will be installed: libopts25{a} lockfile-progs{a} ntp ntpdate 0 packages upgraded, 4 newly installed, 0 to remove and 0 not upgraded. Need to get 729 kB of archives. After unpacking 1,765 kB will be used. Do you want to continue? [Y/n/?] y Get: 1 http://ftp.us.debian.org/debian/ wheezy/main libopts25 amd64 1:5.12-0.1 [73.8 kB] Get: 2 http://ftp.us.debian.org/debian/ wheezy/main ntp amd64 1:4.2.6.p5+dfsg-2 [562 kB] Get: 3 http://ftp.us.debian.org/debian/ wheezy/main ntpdate amd64 1:4.2.6.p5+dfsg-2 [82.6 kB] Get: 4 http://ftp.us.debian.org/debian/ wheezy/main lockfile-progs amd64 0.1.17 [11.0 kB] Fetched 729 kB in 1s (387 kB/s) Selecting previously unselected package libopts25. (Reading database ... 38467 files and directories currently installed.) Unpacking libopts25 (from .../libopts25_1%3a5.12-0.1_amd64.deb) ... Selecting previously unselected package ntp. Unpacking ntp (from .../ntp_1%3a4.2.6.p5+dfsg-2_amd64.deb) ... Selecting previously unselected package ntpdate. Unpacking ntpdate (from .../ntpdate_1%3a4.2.6.p5+dfsg-2_amd64.deb) ... Selecting previously unselected package lockfile-progs. Unpacking lockfile-progs (from .../lockfile-progs_0.1.17_amd64.deb) ... Processing triggers for man-db ... Setting up libopts25 (1:5.12-0.1) ... Setting up ntp (1:4.2.6.p5+dfsg-2) ... Starting NTP server: ntpd. Setting up ntpdate (1:4.2.6.p5+dfsg-2) ... Setting up lockfile-progs (0.1.17) ... root@candy:~# date Thu Nov 27 05:17:03 CST 2014 root@candy:~# cp /usr/share/zoneinfo/Europe/Bucharest /etc/localtime root@candy:~# echo "Europe/Bucharest" > /etc/timezone root@candy:~# date Thu Nov 27 12:17:20 EST 2014 root@candy:~# /usr/sbin/ntpdate -s -b 192.168.11.1 root@candy:~# date Thu Nov 27 12:17:41 EST 2014
core:~# for i in candy eave; do ssh $i date; done Thu Nov 27 12:19:25 EST 2014 Thu Nov 27 12:19:25 EST 2014
Install NFS and remove it from startup because it will be handled by heartbeat:
apt-get install nfs-kernel-server
update-rc.d -f nfs-kernel-server remove
update-rc.d -f nfs-common remove
Work output:
root@eave:~# apt-get install nfs-kernel-server Reading package lists... Done Building dependency tree Reading state information... Done The following NEW packages will be installed: nfs-kernel-server 0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded. Need to get 156 kB of archives. After this operation, 512 kB of additional disk space will be used. Get:1 http://ftp.us.debian.org/debian/ wheezy/main nfs-kernel-server amd64 1:1.2.6-4 [156 kB] Fetched 156 kB in 0s (175 kB/s) Selecting previously unselected package nfs-kernel-server. (Reading database ... 38551 files and directories currently installed.) Unpacking nfs-kernel-server (from .../nfs-kernel-server_1%3a1.2.6-4_amd64.deb) ... Processing triggers for man-db ... Setting up nfs-kernel-server (1:1.2.6-4) ... Creating config file /etc/exports with new version Creating config file /etc/default/nfs-kernel-server with new version Starting NFS common utilities: statd idmapd. Not starting NFS kernel daemon: no exports. ... (warning). root@eave:~# update-rc.d -f nfs-kernel-server remove update-rc.d: using dependency based boot sequencing root@eave:~# update-rc.d -f nfs-common remove update-rc.d: using dependency based boot sequencing
root@candy:~# apt-get install nfs-kernel-server Reading package lists... Done Building dependency tree Reading state information... Done The following NEW packages will be installed: nfs-kernel-server 0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded. Need to get 156 kB of archives. After this operation, 512 kB of additional disk space will be used. Get:1 http://ftp.us.debian.org/debian/ wheezy/main nfs-kernel-server amd64 1:1.2.6-4 [156 kB] Fetched 156 kB in 0s (172 kB/s) Selecting previously unselected package nfs-kernel-server. (Reading database ... 38540 files and directories currently installed.) Unpacking nfs-kernel-server (from .../nfs-kernel-server_1%3a1.2.6-4_amd64.deb) ... Processing triggers for man-db ... Setting up nfs-kernel-server (1:1.2.6-4) ... Creating config file /etc/exports with new version Creating config file /etc/default/nfs-kernel-server with new version Starting NFS common utilities: statd idmapd. Not starting NFS kernel daemon: no exports. ... (warning). root@candy:~# update-rc.d -f nfs-kernel-server remove update-rc.d: using dependency based boot sequencing root@candy:~# update-rc.d -f nfs-common remove update-rc.d: using dependency based boot sequencing
Add this line on both nodes to /etc/exports (in my example, uranus and jupiter are the nfs clients):
/nfs uranus(rw,sync,no_subtree_check,no_root_squash) jupiter(rw,sync,no_subtree_check,no_root_squash)
Install drbd from debian repository:
aptitude install drbd8-utils
Work output:
root@eave:~# aptitude install drbd8-utils The following NEW packages will be installed: drbd8-utils 0 packages upgraded, 1 newly installed, 0 to remove and 0 not upgraded. Need to get 0 B/250 kB of archives. After unpacking 630 kB will be used. Selecting previously unselected package drbd8-utils. (Reading database ... 38580 files and directories currently installed.) Unpacking drbd8-utils (from .../drbd8-utils_2%3a8.3.13-2_amd64.deb) ... Processing triggers for man-db ... Setting up drbd8-utils (2:8.3.13-2) ...
root@candy:~# aptitude install drbd8-utils The following NEW packages will be installed: drbd8-utils 0 packages upgraded, 1 newly installed, 0 to remove and 0 not upgraded. Need to get 250 kB of archives. After unpacking 630 kB will be used. Get: 1 http://ftp.us.debian.org/debian/ wheezy/main drbd8-utils amd64 2:8.3.13-2 [250 kB] Fetched 250 kB in 1s (217 kB/s) Selecting previously unselected package drbd8-utils. (Reading database ... 38569 files and directories currently installed.) Unpacking drbd8-utils (from .../drbd8-utils_2%3a8.3.13-2_amd64.deb) ... Processing triggers for man-db ... Setting up drbd8-utils (2:8.3.13-2) ...
Now is time to configure drbd. If you want, you can put everything in /etc/drbd.conf but I would go as suggested and create separate config file:
On both nodes, put the following in /etc/drbd.d/nfs.res:
resource nfs { protocol C; handlers { pri-on-incon-degr "halt -f"; } startup { degr-wfc-timeout 120; # 2 minutes. } disk { on-io-error detach; } net { } syncer { rate 24M; al-extents 257; } on eave { device /dev/drbd0; disk /dev/nfsvg/nfs; address 192.168.69.11:7788; meta-disk /dev/nfsvg/drbd[0]; } on candy { device /dev/drbd0; disk /dev/nfsvg/nfs; address 192.168.69.12:7788; meta-disk /dev/nfsvg/drbd[0]; } }
Populate the metadata device and startup drbd:
drbdadm create-md nfs
drbdadm up all
cat /proc/drbd
Work output:
root@candy:~# drbdadm create-md nfs Writing meta data... initializing activity log NOT initialized bitmap New drbd meta data block successfully created. success root@candy:~# drbdadm up all root@candy:~# cat /proc/drbd version: 8.3.11 (api:88/proto:86-96) srcversion: F937DCB2E5D83C6CCE4A6C9 0: cs:WFConnection ro:Secondary/Unknown ds:Inconsistent/DUnknown C r----- ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:26214144
root@eave:~# drbdadm create-md nfs Writing meta data... initializing activity log NOT initialized bitmap New drbd meta data block successfully created. success root@eave:~# drbdadm up all root@eave:~# cat /proc/drbd version: 8.3.11 (api:88/proto:86-96) srcversion: F937DCB2E5D83C6CCE4A6C9 0: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----- ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:26214144
Now we need to start the first sync but from one node only. As you can see, both nodes are secondary, which is normal. Now we choose the node who will act as a primary now (eave), that will initiate the first full sync:
drbdadm -- --overwrite-data-of-peer primary nfs
You can check the progress from one of the nodes, no matter which one::
cat /proc/drbd
Output of running sync:
root@eave:~# cat /proc/drbd version: 8.3.11 (api:88/proto:86-96) srcversion: F937DCB2E5D83C6CCE4A6C9 0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----- ns:810112 nr:0 dw:0 dr:811672 al:0 bm:49 lo:0 pe:2 ua:7 ap:0 ep:1 wo:f oos:25404416 [>....................] sync'ed: 3.2% (24808/25600)Mfinish: 0:40:52 speed: 10,340 (10,124) K/sec
Output of synced devices:
root@eave:~# cat /proc/drbd version: 8.3.11 (api:88/proto:86-96) srcversion: F937DCB2E5D83C6CCE4A6C9 0: cs:Connected ro:Secondary/Secondary ds:UpToDate/UpToDate C r----- ns:19126656 nr:0 dw:0 dr:19126656 al:0 bm:1168 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0
Now is time to format the drbd volume and mount it (from one node only). We can also do a test to see if things are as we want:
On primary node format drbd volume, mount it and create some files on it:
mkreiserfs /dev/drbd0
mkdir -p /nfs
mount /dev/drbd0 /nfs
for i in {1..5}; do dd if=/dev/zero of=/nfs/$i bs=10M count=100; done
ls -la /nfs
Work output:
root@eave:~# mkreiserfs /dev/drbd0 mkreiserfs 3.6.21 (2009 www.namesys.com) A pair of credits: Vladimir Demidov wrote the parser for sys_reiser4(), the V3 alpha port, part of the V3 journal relocation code, and helped Hans keep the business side of things running. The Defense Advanced Research Projects Agency (DARPA, www.darpa.mil) is the primary sponsor of Reiser4. DARPA does not endorse this project; it merely sponsors it. Guessing about desired format.. Kernel 3.2.0-4-amd64 is running. Format 3.6 with standard journal Count of blocks on the device: 6553600 Number of blocks consumed by mkreiserfs formatting process: 8411 Blocksize: 4096 Hash function used to sort names: "r5" Journal Size 8193 blocks (first block 18) Journal Max transaction length 1024 inode generation number: 0 UUID: 5d0014d9-23f3-474c-9f32-96a8990908ef ATTENTION: YOU SHOULD REBOOT AFTER FDISK! ALL DATA WILL BE LOST ON '/dev/drbd0'! Continue (y/n):y Initializing journal - 0%....20%....40%....60%....80%....100% Syncing..ok ReiserFS is successfully created on /dev/drbd0. root@eave:~# mkdir -p /nfs root@eave:~# mount /dev/drbd0 /nfs root@eave:~# df -h /nfs Filesystem Size Used Avail Use% Mounted on /dev/drbd0 25G 33M 25G 1% /nfs root@eave:~# for i in {1..5}; do dd if=/dev/zero of=/nfs/$i bs=10M count=10; done 10+0 records in 10+0 records out 104857600 bytes (105 MB) copied, 2.76659 s, 37.9 MB/s 10+0 records in 10+0 records out 104857600 bytes (105 MB) copied, 10.7658 s, 9.7 MB/s 10+0 records in 10+0 records out 104857600 bytes (105 MB) copied, 5.83441 s, 18.0 MB/s 10+0 records in 10+0 records out 104857600 bytes (105 MB) copied, 8.53885 s, 12.3 MB/s 10+0 records in 10+0 records out 104857600 bytes (105 MB) copied, 7.43451 s, 14.1 MB/s root@eave:~# ls -la /nfs total 512500 drwxr-xr-x 4 root root 200 Nov 27 01:28 . drwxr-xr-x 3 root root 72 Nov 26 17:40 .. -rw-r--r-- 1 root root 104857600 Nov 27 01:28 1 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 2 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 3 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 4 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 5
Now we switch manually to the second node and see if the data is written there too:
1st node:
umount /nfs
drbdadm secondary nfs
2nd node:
mkdir -p /nfs
drbdadm primary nfs
mount /dev/drbd0 /nfs
ls -la /nfs
Work output:
root@eave:~# umount /nfs root@eave:~# drbdadm secondary nfs root@eave:~# cat /proc/drbd version: 8.3.11 (api:88/proto:86-96) srcversion: F937DCB2E5D83C6CCE4A6C9 0: cs:Connected ro:Secondary/Secondary ds:UpToDate/UpToDate C r----- ns:1840648 nr:0 dw:1840648 dr:67756 al:648 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0
root@candy:~# mkdir -p /nfs root@candy:~# drbdadm primary nfs root@candy:~# mount /dev/drbd0 /nfs root@candy:~# ls -la /nfs total 512500 drwxr-xr-x 4 root root 200 Nov 27 01:28 . drwxr-xr-x 3 root root 72 Nov 26 18:24 .. -rw-r--r-- 1 root root 104857600 Nov 27 01:28 1 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 2 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 3 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 4 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 5 root@candy:~# cat /proc/drbd version: 8.3.11 (api:88/proto:86-96) srcversion: F937DCB2E5D83C6CCE4A6C9 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----- ns:16 nr:1840532 dw:1840548 dr:1040 al:2 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0
We could also delete and recreate some files on this node and see if everything is fine on the other side:
2nd node:
rm /nfs/1
dd if=/dev/zero of=/nfs/6 bs=100M count=2
ls -la /nfs
umount /nfs
drbdadm secondary nfs
1st node:
drbdadm primary nfs
mount /dev/drbd0 /nfs
ls -la /nfs
work output:
root@candy:~# rm /nfs/1 root@candy:~# dd if=/dev/zero of=/nfs/6 bs=100M count=2 2+0 records in 2+0 records out 209715200 bytes (210 MB) copied, 12.0627 s, 17.4 MB/s root@candy:~# ls -la /nfs total 615000 drwxr-xr-x 4 root root 200 Nov 27 01:40 . drwxr-xr-x 3 root root 72 Nov 26 18:24 .. -rw-r--r-- 1 root root 104857600 Nov 27 01:28 2 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 3 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 4 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 5 -rw-r--r-- 1 root root 209715200 Nov 27 01:40 6 root@candy:~# umount /nfs root@candy:~# drbdadm secondary nfs root@candy:~#
root@eave:~# drbdadm primary nfs root@eave:~# mount /dev/drbd0 /nfs root@eave:~# ls -la /nfs total 615000 drwxr-xr-x 4 root root 200 Nov 27 01:40 . drwxr-xr-x 3 root root 72 Nov 26 17:40 .. -rw-r--r-- 1 root root 104857600 Nov 27 01:28 2 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 3 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 4 -rw-r--r-- 1 root root 104857600 Nov 27 01:28 5 -rw-r--r-- 1 root root 209715200 Nov 27 01:40 6
Satisfied with the tests? We can move forward then :)
NFS stores some important information (file locks, etc.) in /var/lib/nfs. It is very important to have this directory where NFS will run, so a bit of modification is needed in order to store this on drbd volume:
On 1st node:
drbdadm primary nfs
mount /dev/drbd0 /nfs
mv /var/lib/nfs/ /nfs/
ln -s /nfs/nfs/ /var/lib/nfs
ls -ald /var/lib/nfs
umount /nfs/
drbdadm secondary nfs
On 2nd node:
drbdadm primary nfs
rm -fr /var/lib/nfs
mount /dev/drbd0 /nfs
ln -s /nfs/nfs/ /var/lib/nfs
ls -ald /var/lib/nfs
ls -la /nfs/
umount /nfs/
drbdadm secondary nfs
Work output:
root@eave:/nfs# mv /var/lib/nfs/ /nfs/ root@eave:~# ln -s /nfs/nfs/ /var/lib/nfs root@eave:~# ls -lad /var/lib/nfs lrwxrwxrwx 1 root root 9 Nov 27 16:08 /var/lib/nfs -> /nfs/nfs/ root@eave:/nfs# ls -la total 1 drwxr-xr-x 5 root root 104 Nov 27 15:51 . drwxr-xr-x 25 root root 640 Nov 27 00:40 .. drwxr-xr-x 6 statd nogroup 256 Nov 26 23:53 nfs root@eave:~# umount /nfs/ root@eave:~# drbdadm secondary nfs
root@candy:~# drbdadm primary nfs root@candy:~# rm -fr /var/lib/nfs root@candy:~# mount /dev/drbd0 /nfs root@candy:~# ln -s /nfs/nfs/ /var/lib/nfs root@candy:~# ls -ald /var/lib/nfs lrwxrwxrwx 1 root root 9 Nov 27 16:10 /var/lib/nfs -> /nfs/nfs/ root@candy:~# ls -la /nfs/ total 1 drwxr-xr-x 5 root root 104 Nov 27 15:51 . drwxr-xr-x 25 root root 640 Nov 27 01:24 .. drwxr-xr-x 6 statd nogroup 280 Nov 27 16:07 nfs root@candy:~# umount /nfs/ root@candy:~# drbdadm secondary nfs
Heartbeat is the control instance of this whole setup and it will be installed on both servers. This way they will control each other.
If server1 goes down, heartbeat on server2 detects this and makes server2 take over. Heartbeat also will start and stops the NFS server on both server1 and server2.
It also provides NFS as a virtual IP address 192.168.11.13 because the nfs clients can only connect to one IP address.
First we install heartbeat on both nodes:
aptitude install heartbeat
Work output:
root@eave:~# aptitude install heartbeat The following NEW packages will be installed: cluster-glue{a} dbus{a} gawk{a} heartbeat libcfg4{a} libcib1{a} libconfdb4{a} libcoroipcc4{a} libcoroipcs4{a} libcorosync4{a} libcpg4{a} libcrmcluster1{a} libcrmcommon2{a} libcurl3{a} libdbus-1-3{a} libdbus-glib-1-2{a} libesmtp6{a} libevs4{a} libglib2.0-0{a} libglib2.0-data{a} libheartbeat2{a} liblogsys4{a} liblrm2{a} libltdl7{a} libnet1{a} libnspr4 libnspr4-0d{a} libnss3 libnss3-1d{a} libopenhpi2{a} libopenipmi0{a} libpe-rules2{a} libpe-status3{a} libpengine3{a} libperl5.14{a} libpils2{a} libpload4{a} libplumb2{a} libplumbgpl2{a} libquorum4{a} librtmp0{a} libsam4{a} libsensors4{a} libsigsegv2{a} libsnmp-base{a} libsnmp15{a} libssh2-1{a} libstonith1{a} libstonithd1{a} libsystemd-login0{a} libtimedate-perl{a} libtotem-pg4{a} libtransitioner1{a} libvotequorum4{a} libxml2-utils{a} libxslt1.1{a} openhpid{a} pacemaker{a} resource-agents{a} shared-mime-info{a} 0 packages upgraded, 60 newly installed, 0 to remove and 0 not upgraded. Need to get 8,783 kB/18.0 MB of archives. After unpacking 46.9 MB will be used. Do you want to continue? [Y/n/?] [..useless output..]
root@candy:~# aptitude install heartbeat The following NEW packages will be installed: cluster-glue{a} dbus{a} gawk{a} heartbeat libcfg4{a} libcib1{a} libconfdb4{a} libcoroipcc4{a} libcoroipcs4{a} libcorosync4{a} libcpg4{a} libcrmcluster1{a} libcrmcommon2{a} libcurl3{a} libdbus-1-3{a} libdbus-glib-1-2{a} libesmtp6{a} libevs4{a} libglib2.0-0{a} libglib2.0-data{a} libheartbeat2{a} liblogsys4{a} liblrm2{a} libltdl7{a} libnet1{a} libnspr4 libnspr4-0d{a} libnss3 libnss3-1d{a} libopenhpi2{a} libopenipmi0{a} libpe-rules2{a} libpe-status3{a} libpengine3{a} libperl5.14{a} libpils2{a} libpload4{a} libplumb2{a} libplumbgpl2{a} libquorum4{a} librtmp0{a} libsam4{a} libsensors4{a} libsigsegv2{a} libsnmp-base{a} libsnmp15{a} libssh2-1{a} libstonith1{a} libstonithd1{a} libsystemd-login0{a} libtimedate-perl{a} libtotem-pg4{a} libtransitioner1{a} libvotequorum4{a} libxml2-utils{a} libxslt1.1{a} openhpid{a} pacemaker{a} resource-agents{a} shared-mime-info{a} 0 packages upgraded, 60 newly installed, 0 to remove and 0 not upgraded. Need to get 18.0 MB of archives. After unpacking 46.9 MB will be used. Do you want to continue? [Y/n/?] [..useless output..]
Now we have to configure the heartbeat:
Add on both nodes the following in /etc/ha.d/ha.cf:
keepalive 2 deadtime 30 warntime 10 initdead 120 bcast eth0 eth1 eth2 eth3 node eave node candy crm yes #this is optional, up to your decision later in the tutorial.
Create the authentification key /etc/ha.d/authkeys and set the permission to 600:
auth 1 1 sha1 MySecret
chmod 600 /etc/ha.d/authkeys
Start the heartbeat on both nodes and check the cluster status:
service heartbeat start
crm_mon --one-shot
Work output:
root@eave:~# pico /etc/ha.d/ha.cf root@eave:~# pico /etc/ha.d/authkeys root@eave:~# chmod 600 /etc/ha.d/authkeys root@eave:~# service heartbeat start Starting High-Availability services: Done. ============ Last updated: Thu Nov 27 12:39:01 2014 Last change: Thu Nov 27 12:38:54 2014 via crmd on eave Stack: Heartbeat Current DC: eave (00000000-3ab4-4erf-asf1-d4645df5a290) - partition with quorum Version: 1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff 1 Nodes configured, unknown expected votes 0 Resources configured. ============ Online: [ eave ]
root@candy:~# pico /etc/ha.d/ha.cf root@candy:~# pico /etc/ha.d/authkeys root@candy:~# chmod 600 /etc/ha.d/authkeys root@candy:~# service heartbeat start Starting High-Availability services: Done. ============ Last updated: Thu Nov 27 12:40:46 2014 Last change: Thu Nov 27 12:40:45 2014 via crmd on eave Stack: Heartbeat Current DC: eave (00000000-3ab4-4erf-asf1-d4645df5a290) - partition with quorum Version: 1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff 2 Nodes configured, unknown expected votes 0 Resources configured. ============ Node candy (gf034gk1-682c-3fg3-a7a7-4w9823232a3t): pending Online: [ eave ]
... in few seconds:
============ Last updated: Thu Nov 27 12:41:29 2014 Last change: Thu Nov 27 12:40:45 2014 via crmd on eave Stack: Heartbeat Current DC: eave (00000000-3ab4-4erf-asf1-d4645df5a290) - partition with quorum Version: 1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff 2 Nodes configured, unknown expected votes 0 Resources configured. ============ Online: [ eave candy ]
NOTE:
Please take firewall considerations into account between the nodes.
DRBD: 7788
HEARTBEAT: 694
Now we have to configure our NFS resource group in order to failover from one node to anonther. There are two ways and both will be explained below
Heartbeat R1-style cluster:
This is the simplest way to configure the cluster and the main disadvantage is you cannot use the resource for more than 2 nodes. You can find more information in the drbd official links at the end of this article if you are curious.
So, here it goes.
Add to /etc/heartbeat/haresources the following:
eave IPaddr::192.168.69.13/24/bond0 drbddisk::nfs Filesystem::/dev/drbd0::/nfs::reiserfs nfs-common nfs-kernel-server
Also, you will have to comment out crm yes in /etc/ha.d/ha.cf otherwise will not work.
A bit of explanation is needed here:
eave
-> the main node where the resource will run
192.168.69.13
-> the IP address which will failover along with the resource from one node to another
nfs
-> drbd resource
/dev/drbd0
-> the DRBD device
/nfs
-> the mount point
reiserfs
-> the filesystem type
nfs-kernel-server
and nfs-common
-> nfs daemons will be started to the target node when failover will occur
Now restart the heartbeat on both nodes and you are good to go. You can restart one node and then another to see how nicely the resource is moving back and forth.
Out of curiosity, in the log, when the resource is released you will see something similar to this:
Nov 27 23:11:40 candy heartbeat: [2884]: info: Heartbeat restart on node eave Nov 27 23:11:40 candy heartbeat: [2884]: info: Link eave:eth0 up. Nov 27 23:11:40 candy heartbeat: [2884]: info: Status update for node eave: status init Nov 27 23:11:40 candy heartbeat: [2884]: info: Link eave:eth1 up. Nov 27 23:11:40 candy heartbeat: [2884]: info: Link eave:bond0 up. Nov 27 23:11:40 candy heartbeat: [2884]: info: Status update for node eave: status up harc[7961]: 2014/11/27_23:11:40 info: Running /etc/ha.d//rc.d/status status harc[7975]: 2014/11/27_23:11:40 info: Running /etc/ha.d//rc.d/status status Nov 27 23:11:41 candy heartbeat: [2884]: info: Status update for node eave: status active harc[7989]: 2014/11/27_23:11:41 info: Running /etc/ha.d//rc.d/status status harc[8003]: 2014/11/27_23:11:41 info: Running /etc/ha.d//rc.d/ip-request ip-request IPaddr[8033]: 2014/11/27_23:11:41 INFO: Running OK ResourceManager[8075]: 2014/11/27_23:11:41 info: Releasing resource group: eave IPaddr::192.168.69.13/24/bond0 drbddisk::nfs Filesystem::/dev/drbd0::/nfs::reiserfs nfs-common nfs-kernel-server ResourceManager[8075]: 2014/11/27_23:11:41 info: Running /etc/init.d/nfs-kernel-server stop ResourceManager[8075]: 2014/11/27_23:11:42 info: Running /etc/init.d/nfs-common stop ResourceManager[8075]: 2014/11/27_23:11:43 info: Running /etc/ha.d/resource.d/Filesystem /dev/drbd0 /nfs reiserfs stop Filesystem[8164]: 2014/11/27_23:11:43 INFO: Running stop for /dev/drbd0 on /nfs Filesystem[8164]: 2014/11/27_23:11:43 INFO: Trying to unmount /nfs Filesystem[8164]: 2014/11/27_23:11:44 INFO: unmounted /nfs successfully Filesystem[8158]: 2014/11/27_23:11:44 INFO: Success ResourceManager[8075]: 2014/11/27_23:11:44 info: Running /etc/ha.d/resource.d/drbddisk nfs stop ResourceManager[8075]: 2014/11/27_23:11:45 info: Running /etc/ha.d/resource.d/IPaddr 192.168.69.13/24/bond0 stop IPaddr[8307]: 2014/11/27_23:11:45 INFO: ifconfig bond0:0 down IPaddr[8283]: 2014/11/27_23:11:45 INFO: Success
And when the resource is taken, something similar to this:
Nov 27 23:12:32 candy heartbeat: [2884]: info: Received shutdown notice from 'eave'. Nov 27 23:12:32 candy heartbeat: [2884]: info: Resources being acquired from eave. Nov 27 23:12:32 candy heartbeat: [8333]: info: No local resources [/usr/share/heartbeat/ResourceManager listkeys candy] to acquire. harc[8332]: 2014/11/27_23:12:32 info: Running /etc/ha.d//rc.d/status status mach_down[8359]: 2014/11/27_23:12:32 info: Taking over resource group IPaddr::192.168.69.13/24/bond0 ResourceManager[8383]: 2014/11/27_23:12:32 info: Acquiring resource group: eave IPaddr::192.168.69.13/24/bond0 drbddisk::nfs Filesystem::/dev/drbd0::/nfs::reiserfs nfs-common nfs-kernel-server IPaddr[8409]: 2014/11/27_23:12:32 INFO: Resource is stopped ResourceManager[8383]: 2014/11/27_23:12:32 info: Running /etc/ha.d/resource.d/IPaddr 192.168.69.13/24/bond0 start IPaddr[8487]: 2014/11/27_23:12:33 INFO: Using calculated netmask for 192.168.69.13: 255.255.255.0 IPaddr[8487]: 2014/11/27_23:12:33 INFO: eval ifconfig bond0:0 192.168.69.13 netmask 255.255.255.0 broadcast 192.168.69.255 IPaddr[8463]: 2014/11/27_23:12:33 INFO: Success ResourceManager[8383]: 2014/11/27_23:12:33 info: Running /etc/ha.d/resource.d/drbddisk nfs start Filesystem[8615]: 2014/11/27_23:12:33 INFO: Resource is stopped ResourceManager[8383]: 2014/11/27_23:12:33 info: Running /etc/ha.d/resource.d/Filesystem /dev/drbd0 /nfs reiserfs start Filesystem[8683]: 2014/11/27_23:12:33 INFO: Running start for /dev/drbd0 on /nfs Filesystem[8677]: 2014/11/27_23:12:33 INFO: Success ResourceManager[8383]: 2014/11/27_23:12:33 info: Running /etc/init.d/nfs-common start ResourceManager[8383]: 2014/11/27_23:12:34 info: Running /etc/init.d/nfs-kernel-server start mach_down[8359]: 2014/11/27_23:12:34 info: mach_down takeover complete for node eave. Nov 27 23:13:04 candy heartbeat: [2884]: WARN: node eave: is dead Nov 27 23:13:04 candy heartbeat: [2884]: info: Dead node eave gave up resources. Nov 27 23:13:04 candy heartbeat: [2884]: info: Resources being acquired from eave. Nov 27 23:13:04 candy heartbeat: [2884]: info: Link eave:eth0 dead. Nov 27 23:13:04 candy heartbeat: [2884]: info: Link eave:eth1 dead. Nov 27 23:13:04 candy heartbeat: [2884]: info: Link eave:bond0 dead. harc[8844]: 2014/11/27_23:13:04 info: Running /etc/ha.d//rc.d/status status Nov 27 23:13:04 candy heartbeat: [8845]: info: No local resources [/usr/share/heartbeat/ResourceManager listkeys candy] to acquire. mach_down[8871]: 2014/11/27_23:13:04 info: Taking over resource group IPaddr::192.168.69.13/24/bond0 ResourceManager[8895]: 2014/11/27_23:13:04 info: Acquiring resource group: eave IPaddr::192.168.69.13/24/bond0 drbddisk::nfs Filesystem::/dev/drbd0::/nfs::reiserfs nfs-common nfs-kernel-server IPaddr[8921]: 2014/11/27_23:13:04 INFO: Running OK Filesystem[8990]: 2014/11/27_23:13:04 INFO: Running OK mach_down[8871]: 2014/11/27_23:13:04 info: mach_down takeover complete for node eave.
STOP HERE IF YOU ARE SATISFIED WITH YOUR CLUSTER. MOVE FORWARD ONLY IF YOU DO NOT WANT R1-STYLE CLUSTER!
Heartbeat CRM-enabled cluster:
This is a bit more complicated but the advantage is you can configure the resource group with more than 2 nodes. Further information about CRM you can find in the links at the end of this article.
Now, in order to configre your cluster, you can do the following from one node only because the beauty of CRM cluster is the configuration is automatically propagated to all nodes.
But before moving forward, we must configure STONITH else we will have an error when trying to commit our configuration.
Please follow this tutorial http://tar.gz.ro/CRM-cluster-STONITH.html and get back here afterwards.
Now, I presume you have either configured or disabled STONITH, so we can move forward.
So, take your favorite node and start typing.
First, using the crm shell, we will first have to create the primitive resource and then embed that into the master resource:
crm configure primitive drbd_main ocf:heartbeat:drbd \ params drbd_resource=nfs \ op monitor role=Master interval=59s timeout=30s \ op monitor role=Slave interval=60s timeout=30s ms ms-drbd_main drbd_main \ meta clone-max=2 notify=true globally-unique=false target-role=stopped commit quit
Work output:
root@eave:~# crm crm(live)# configure crm(live)configure# primitive drbd_main ocf:heartbeat:drbd \ > params drbd_resource=nfs \ > op monitor role=Master interval=59s timeout=30s \ > op monitor role=Slave interval=60s timeout=30s WARNING: drbd_main: default timeout 20s for start is smaller than the advised 240 WARNING: drbd_main: default timeout 20s for stop is smaller than the advised 100 crm(live)configure# ms ms-drbd_main drbd_main \ > meta clone-max=2 notify=true globally-unique=false target-role=stopped crm(live)configure# commit WARNING: drbd_main: default timeout 20s for start is smaller than the advised 240 WARNING: drbd_main: default timeout 20s for stop is smaller than the advised 100 crm(live)configure# quit bye
If you have a cluster with only two nodes, you can skip this step but I would recommend to do it, just in case you add another one in the future:
crm configure location ms-drbd_main-placement ms-drbd_main rule -inf: \#uname ne eave and \#uname ne candy
Work output:
root@eave:~# crm configure location ms-drbd_main-placement ms-drbd_main rule -inf: \#uname ne eave and \#uname ne candy WARNING: ms-drbd_main-placement: referenced node eave does not exist WARNING: ms-drbd_main-placement: referenced node candy does not exist
Now is time to select a node as master:
crm configure location ms-drbd_main-master-on-eave ms-drbd_main rule role=master 100: \#uname eq eave
Work output:
root@eave:~# crm configure location ms-drbd_main-master-on-eave ms-drbd_main rule role=master 100: \#uname eq eave WARNING: ms-drbd_main-master-on-eave: referenced node eave does not exist
Hooray, now it is time to start your resource and since you have selected a master, it will run on it:
crm resource start ms-drbd_main
At this point, our cluster should look like this:
root@eave:~# crm_mon -1 ============ Last updated: Sat Nov 29 04:34:14 2014 Last change: Sat Nov 29 04:33:46 2014 via cibadmin on eave Stack: Heartbeat Current DC: candy (gf034gk1-682c-3fg3-a7a7-4w9823232a3t) - partition with quorum Version: 1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff 2 Nodes configured, unknown expected votes 4 Resources configured. ============ Online: [ eave candy ] Master/Slave Set: ms-drbd_main [drbd_main] Masters: [ eave ] Slaves: [ candy ] Clone Set: fencing_rg [stonith_rg] Started: [ candy eave ]
DRBD is up and running but alone won't serve you much. You have to run a service on top of it. In our example, NFS will the main service but we need also filesystem and IP address. All this will have to failover together from one node to another.
First we create the filesystem resource:
crm configure primitive drbd_fs ocf:heartbeat:Filesystem params fstype=reiserfs directory=/nfs device=/dev/drbd0 meta target-role=stopped
Work output:
root@eave:~# crm configure primitive drbd_fs ocf:heartbeat:Filesystem params fstype=reiserfs directory=/nfs device=/dev/drbd0 meta target-role=stopped WARNING: drbd_fs: default timeout 20s for start is smaller than the advised 60 WARNING: drbd_fs: default timeout 20s for stop is smaller than the advised 60
Now we teach the cluster to mount the filesystem where DRBD is in primary state and only after it was promoted:
crm configure order ms-drbd_main-before-drbd_fs mandatory: ms-drbd_main:promote drbd_fs:start colocation drbd_fs-on-ms-drbd_main inf: drbd_fs ms-drbd_main:Master commit quit
Work output:
root@eave:~# crm crm(live)# configure crm(live)configure# order ms-drbd_main-before-drbd_fs mandatory: ms-drbd_main:promote drbd_fs:start crm(live)configure# colocation drbd_fs-on-ms-drbd_main inf: drbd_fs ms-drbd_main:Master crm(live)configure# commit crm(live)configure# quit bye root@eave:~#
Now you can activate the filesystem resource:
crm resource start drbd_fs
Work output:
root@eave:~# crm resource start drbd_fs root@eave:~# crm_mon -1 ============ Last updated: Sat Nov 29 04:37:09 2014 Last change: Sat Nov 29 04:36:47 2014 via cibadmin on eave Stack: Heartbeat Current DC: candy (gf034gk1-682c-3fg3-a7a7-4w9823232a3t) - partition with quorum Version: 1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff 2 Nodes configured, unknown expected votes 5 Resources configured. ============ Online: [ eave candy ] Master/Slave Set: ms-drbd_main [drbd_main] Masters: [ eave ] Slaves: [ candy ] Clone Set: fencing_rg [stonith_rg] Started: [ candy eave ] drbd_fs (ocf::heartbeat:Filesystem): Started eave root@eave:~# df |grep nfs /dev/drbd0 26213596 1057876 25155720 5% /nfs
Now it's time to add also the IP address and NFS resource:
crm configure primitive nfs_server lsb:nfs-kernel-server \ op monitor interval="10" timeout="15" on-fail="restart" start-delay="15" primitive nfs_common lsb:nfs-common \ op monitor interval="5" timeout="15" on-fail="restart" start-delay="15" primitive nfs_ip ocf:heartbeat:IPaddr2 \ params ip=192.168.69.13 broadcast=192.168.69.255 nic=bond0 cidr_netmask=24 \ op monitor interval=21s timeout=5s group nfs-group drbd_fs nfs_server nfs_common nfs_ip order ms-drbd_main-before-nfs-group mandatory: ms-drbd_main:promote nfs-group:start colocation nfs-group-on-ms-drbd_main inf: nfs-group ms-drbd_main:Master commit end resource start ms-drbd_main quit
Work output:
root@eave:~# crm cocrm(live)# configure crm(live)configure# primitive nfs_server lsb:nfs-kernel-server \ > op monitor interval="10" timeout="15" on-fail="restart" start-delay="15" crm(live)configure# primitive nfs_common lsb:nfs-common \ > op monitor interval="5" timeout="15" on-fail="restart" start-delay="15" crm(live)configure# primitive nfs_ip ocf:heartbeat:IPaddr2 \ > params ip=192.168.69.13 broadcast=192.168.69.255 nic=bond0 cidr_netmask=24 \ > op monitor interval=21s timeout=5s WARNING: nfs_ip: specified timeout 5s for monitor is smaller than the advised 20s crm(live)configure# group nfs-group drbd_fs nfs_server nfs_common nfs_ip INFO: resource references in colocation:drbd_fs-on-ms-drbd_main updated INFO: resource references in order:ms-drbd_main-before-drbd_fs updated crm(live)configure# order ms-drbd_main-before-nfs-group mandatory: ms-drbd_main:promote nfs-group:start crm(live)configure# colocation nfs-group-on-ms-drbd_main inf: nfs-group ms-drbd_main:Master crm(live)configure# commit WARNING: nfs_ip: specified timeout 5s for monitor is smaller than the advised 20s crm(live)configure# end crm(live)# resource start ms-drbd_main crm(live)# quit bye
Hooray, we have created the desired cluster!
At this moment your cluster should look like this and you should have all resources up and running on the right node:
root@eave:~# crm_mon -1 ============ Last updated: Sat Nov 29 04:54:48 2014 Last change: Sat Nov 29 04:49:49 2014 via crm_resource on eave Stack: Heartbeat Current DC: candy (gf034gk1-682c-3fg3-a7a7-4w9823232a3t) - partition with quorum Version: 1.1.7-ee0730e13d124c3d58f00016c3376a1de5323cff 2 Nodes configured, unknown expected votes 8 Resources configured. ============ Online: [ eave candy ] Master/Slave Set: ms-drbd_main [drbd_main] Masters: [ eave ] Slaves: [ candy ] Clone Set: fencing_rg [stonith_rg] Started: [ candy eave ] Resource Group: nfs-group drbd_fs (ocf::heartbeat:Filesystem): Started eave nfs_server (lsb:nfs-kernel-server): Started eave nfs_common (lsb:nfs-common): Started eave nfs_ip (ocf::heartbeat:IPaddr2): Started eave root@eave:~# ip address show bond0 6: bond0: <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP link/ether 08:00:27:09:76:28 brd ff:ff:ff:ff:ff:ff inet 192.168.69.11/24 brd 192.168.69.255 scope global bond0 inet 192.168.69.13/24 brd 192.168.69.255 scope global secondary bond0 inet6 fe80::a00:27ff:fe09:7628/64 scope link tentative dadfailed valid_lft forever preferred_lft forever root@eave:~# df | grep nfs /dev/drbd0 26213596 1057876 25155720 5% /nfs root@eave:~# ps -ef|grep -i nfs root 3442 2 0 Nov27 ? 00:00:00 [nfsiod] root 18771 2 0 04:56 ? 00:00:00 [nfsd4] root 18772 2 0 04:56 ? 00:00:00 [nfsd4_callbacks] root 18773 2 0 04:56 ? 00:00:00 [nfsd] root 18774 2 0 04:56 ? 00:00:00 [nfsd] root 18775 2 0 04:56 ? 00:00:00 [nfsd] root 18776 2 0 04:56 ? 00:00:00 [nfsd] root 18777 2 0 04:56 ? 00:00:00 [nfsd] root 18778 2 0 04:56 ? 00:00:00 [nfsd] root 18779 2 0 04:56 ? 00:00:00 [nfsd] root 18780 2 0 04:56 ? 00:00:00 [nfsd] root 19876 12298 0 05:01 pts/0 00:00:00 grep -i nfs root@eave:~# exportfs /nfs/squid uranus-iscsi /nfs/squid jupiter-iscsi
Optionally if you want to keep the master role on a network connected node, you can do it with help of pingd as follows:
crm configure location ms-drbd_master_on_connected_node ms-drbd_main \ rule role=master -inf: not_defined pingd or pingd lte 0
Important is to remember that all crm settings are found in one file: /var/lib/heartbeat/crm/cib.xml. However, do not modify it directly because it will be overwritten.
Inspiration:
http://www.howtoforge.com/high_availability_nfs_drbd_heartbeat
http://wiki.centos.org/HowTos/Ha-Drbd
http://www.remsys.com/blog/heartbeat
http://www.drbd.org/users-guide-8.3/s-heartbeat-crm.html
http://www.drbd.org/users-guide-8.3/s-heartbeat-r1.html
http://clusterlabs.org/wiki/DRBD_HowTo_1.0