问题描述:
HACMP5.5+AIX 5.3.10+CONCURRENTVG+ORACLE 10.2.0.4 RAC
测试库已经在RAC上正常运行。
因为RAC的一个节点老是无法加入DLPAR。
交给IBM的2线工程师,没有结果,说要发给三线才能解决 .
因为测试库是流复制的目端。如果重建整个目的RAC耗费太多。
是否有一种方式可以直接把一个节点干掉,重装OS后恢复整个应用?
最后决定采用下面的方式来恢复整个数据库。
|||||||||
在第一个节点清除相关日志信息,着重关注调试过程中的错误日志
errclear 0
||||||||
|||||||||
pre 备份ocr_disk vote_disk,
# ls -l vote_disk
crw-r--r-- 1 oracle oinstall 40, 3 Mar 25 12:01 vote_disk
# ls -l ocr_disk
crw-r----- 1 root oinstall 40, 2 Mar 25 12:01 ocr_disk
#
#
# dd if=/dev/vote_disk of=/home/oracle/vote_disk.bak
# dd if=/dev/vote_disk of=/home/oracle/vote_disk.bak
2096640+0 records in.
2096640+0 records out.
# dd if=/dev/ocr_disk of=/home/oracle/ocr_disk.bak
2096640+0 records in.
2096640+0 records out.
# dd if=/dev/ocr_disk of=/home/oracle/ocr_disk.bak
# dd if=/dev/raw/raw2 f=/home/oracle/vote_disk.bak
# dd if=/dev/raw/raw2 f=/home/oracle/ocr_disk.bak
|||||||||||||
在zhyw2上扫描阵列,确认 powerpath路径两节点一致
[root@zhyw2]#./symdev list pd
Symmetrix ID: 000290103435
Device Name Directors Device
--------------------------- ------------- -------------------------------------
Cap
Sym Physical SA DA :IT Config Attribute Sts (MB)
--------------------------- ------------- -------------------------------------
0022 /dev/rhdisk3 09B:1 01A:C0 2-Way Mir N/Grp'd VCM WD 3
0035 /dev/rhdiskpower64 08B:1 16D:D7 2-Way Mir N/Grp'd RW 3
0036 /dev/rhdiskpower65 08B:1 16C:C7 2-Way Mir N/Grp'd RW 3
0065 /dev/rhdiskpower0 09B:1 01B:DC 2-Way Mir N/Grp'd RW 3
0066 /dev/rhdiskpower1 09B:1 16A:CB 2-Way Mir N/Grp'd RW 3
0084 /dev/rhdiskpower2 09B:1 16A:D0 2-Way Mir N/Grp'd RW 1024
0085 /dev/rhdiskpower3 09B:1 16B:D3 2-Way Mir N/Grp'd RW 1024
013A /dev/rhdiskpower4 09B:1 16B:CE 2-Way Mir N/Grp'd (M) RW 49140
013E /dev/rhdiskpower5 09B:1 01C:D5 2-Way Mir N/Grp'd (M) RW 49140
0142 /dev/rhdiskpower6 09B:1 16C:D6 2-Way Mir N/Grp'd (M) RW 49140
0146 /dev/rhdiskpower7 09B:1 01C:D7 2-Way Mir N/Grp'd (M) RW 49140
014A /dev/rhdiskpower8 09B:1 16C:D8 2-Way Mir N/Grp'd (M) RW 49140
014E /dev/rhdiskpower9 09B:1 16A:C3 2-Way Mir N/Grp'd (M) RW 49140
0152 /dev/rhdiskpower10 09B:1 01A:C2 2-Way Mir N/Grp'd (M) RW 49140
0156 /dev/rhdiskpower11 09B:1 16A:C1 2-Way Mir N/Grp'd (M) RW 49140
015A /dev/rhdiskpower12 09B:1 01A:D9 2-Way Mir N/Grp'd (M) RW 49140
015E /dev/rhdiskpower13 09B:1 16A:D8 2-Way Mir N/Grp'd (M) RW 49140
0162 /dev/rhdiskpower14 09B:1 01A:DB 2-Way Mir N/Grp'd (M) RW 49140
0166 /dev/rhdiskpower15 09B:1 16A:DA 2-Way Mir N/Grp'd (M) RW 49140
016A /dev/rhdiskpower16 09B:1 01A:D5 2-Way Mir N/Grp'd (M) RW 49140
016E /dev/rhdiskpower17 09B:1 16A:D4 2-Way Mir N/Grp'd (M) RW 49140
0172 /dev/rhdiskpower18 09B:1 01A:D7 2-Way Mir N/Grp'd (M) RW 49140
0176 /dev/rhdiskpower19 09B:1 16A:D6 2-Way Mir N/Grp'd (M) RW 49140
017A /dev/rhdiskpower20 09B:1 16C:C3 2-Way Mir N/Grp'd (M) RW 49140
017E /dev/rhdiskpower21 09B:1 01C:C2 2-Way Mir N/Grp'd (M) RW 49140
0182 /dev/rhdiskpower22 09B:1 16C:C1 2-Way Mir N/Grp'd (M) RW 49140
0186 /dev/rhdiskpower23 09B:1 01C:C0 2-Way Mir N/Grp'd (M) RW 49140
018A /dev/rhdiskpower24 09B:1 16C:D0 2-Way Mir N/Grp'd (M) RW 49140
018E /dev/rhdiskpower25 09B:1 16C:DA 2-Way Mir N/Grp'd (M) RW 49140
0192 /dev/rhdiskpower26 09B:1 01C:D9 2-Way Mir N/Grp'd (M) RW 49140
0196 /dev/rhdiskpower27 09B:1 16C:DC 2-Way Mir N/Grp'd (M) RW 49140
019A /dev/rhdiskpower28 09B:1 01C:DB 2-Way Mir N/Grp'd (M) RW 49140
019E /dev/rhdiskpower29 09B:1 16B:CE 2-Way Mir N/Grp'd (M) RW 49140
01A2 /dev/rhdiskpower30 09B:1 01C:D5 2-Way Mir N/Grp'd (M) RW 49140
01A6 /dev/rhdiskpower31 09B:1 16C:D6 2-Way Mir N/Grp'd (M) RW 49140
01AA /dev/rhdiskpower32 09B:1 01C:D7 2-Way Mir N/Grp'd (M) RW 49140
01AE /dev/rhdiskpower33 09B:1 16C:D8 2-Way Mir N/Grp'd (M) RW 49140
01B2 /dev/rhdiskpower34 09B:1 16A:C3 2-Way Mir N/Grp'd (M) RW 49140
01B6 /dev/rhdiskpower35 09B:1 01A:C2 2-Way Mir N/Grp'd (M) RW 49140
01BA /dev/rhdiskpower36 09B:1 16A:C1 2-Way Mir N/Grp'd (M) RW 49140
01BE /dev/rhdiskpower37 09B:1 01A:D9 2-Way Mir N/Grp'd (M) RW 49140
01C2 /dev/rhdiskpower38 09B:1 16A:D8 2-Way Mir N/Grp'd (M) RW 49140
01C6 /dev/rhdiskpower39 09B:1 01A:DB 2-Way Mir N/Grp'd (M) RW 49140
01CA /dev/rhdiskpower40 09B:1 16A:DA 2-Way Mir N/Grp'd (M) RW 49140
01CE /dev/rhdiskpower41 09B:1 01A:D5 2-Way Mir N/Grp'd (M) RW 49140
01D2 /dev/rhdiskpower42 09B:1 16A:D4 2-Way Mir N/Grp'd (M) RW 49140
01D6 /dev/rhdiskpower43 09B:1 01A:D7 2-Way Mir N/Grp'd (M) RW 49140
01DA /dev/rhdiskpower44 09B:1 16A:D6 2-Way Mir N/Grp'd (M) RW 49140
01DE /dev/rhdiskpower45 09B:1 16C:C3 2-Way Mir N/Grp'd (M) RW 49140
01E2 /dev/rhdiskpower46 09B:1 01C:C2 2-Way Mir N/Grp'd (M) RW 49140
01E6 /dev/rhdiskpower47 09B:1 16C:C1 2-Way Mir N/Grp'd (M) RW 49140
01EA /dev/rhdiskpower48 09B:1 01C:C0 2-Way Mir N/Grp'd (M) RW 49140
01EE /dev/rhdiskpower49 09B:1 16C:D0 2-Way Mir N/Grp'd (M) RW 49140
01F2 /dev/rhdiskpower50 09B:1 16C:DA 2-Way Mir N/Grp'd (M) RW 49140
01F6 /dev/rhdiskpower51 09B:1 01C:D9 2-Way Mir N/Grp'd (M) RW 49140
01FA /dev/rhdiskpower52 09B:1 16C:DC 2-Way Mir N/Grp'd (M) RW 49140
01FE /dev/rhdiskpower53 09B:1 01C:DB 2-Way Mir N/Grp'd (M) RW 49140
0202 /dev/rhdiskpower54 09B:1 16B:CE 2-Way Mir N/Grp'd (M) RW 49140
0206 /dev/rhdiskpower55 09B:1 01C:D5 2-Way Mir N/Grp'd (M) RW 49140
020A /dev/rhdiskpower56 09B:1 16C:D6 2-Way Mir N/Grp'd (M) RW 49140
020E /dev/rhdiskpower57 09B:1 01C:D7 2-Way Mir N/Grp'd (M) RW 49140
0212 /dev/rhdiskpower58 09B:1 16C:D8 2-Way Mir N/Grp'd (M) RW 49140
0216 /dev/rhdiskpower59 09B:1 16A:C3 2-Way Mir N/Grp'd (M) RW 49140
021A /dev/rhdiskpower60 09B:1 01A:C2 2-Way Mir N/Grp'd (M) RW 49140
021E /dev/rhdiskpower61 09B:1 16A:C1 2-Way Mir N/Grp'd (M) RW 49140
0222 /dev/rhdiskpower62 09B:1 01A:D9 2-Way Mir N/Grp'd (M) RW 49140
0226 /dev/rhdiskpower63 09B:1 16A:D8 2-Way Mir N/Grp'd (M) RW 49140
在zhyw2上备份hacmp信息
备份zhyw2相关的rac参数
|||||||||||||||||||||||||||
1、备份 lpar zhyw2_old的,ORACLE_BASE ,以及相应的启动参数
ls -l /etc/init.cssd /etc/init.crs /etc/init.crsd /etc/init.evmd /etc/rc.d/rc2.d/K96init.crs
ls -l /etc/rc.d/rc2.d/S96init.crs /etc/oracle/scls_scr /etc/oracle/oprocd /etc/inittab.crs
ls -l /etc/inittab
ls -l /etc/oracle
/etc/oracle/scls_scr/<节点名>/oracle/cssfatal
$ORACLE_CRS_HOME/install/paramfile.crs
[root@zhyw2]#ls -l /etc/init.cssd /etc/init.crs /etc/init.crsd /etc/init.evmd /etc/rc.d/rc2.d/K96init.crs
-r-xr-xr-x 1 root system 2236 Mar 13 16:02 /etc/init.crs
-r-xr-xr-x 1 root system 4944 Mar 13 16:02 /etc/init.crsd
-r-xr-xr-x 1 root system 54038 Mar 13 16:02 /etc/init.cssd
-r-xr-xr-x 1 root system 3226 Mar 13 16:02 /etc/init.evmd
lrwxrwxrwx 1 root system 13 Mar 13 16:02 /etc/rc.d/rc2.d/K96init.crs -> /etc/init.crs
lrwxrwxrwx 1 root system 13 Mar 13 16:02 /etc/rc.d/rc2.d/S96init.crs -> /etc/init.crs
/etc/oracle/oprocd:
total 0
drwxrwx--- 2 root system 256 Mar 13 07:35 check
drwxrwx--- 2 root system 256 Mar 13 07:35 fatal
drwxrwx--- 2 root system 256 Mar 13 07:35 stop
/etc/oracle/scls_scr:
total 0
drwxr-xr-x 4 root system 256 Mar 13 07:35 zhyw2
[root@zhyw2]#
[root@zhyw2]#ls -l /etc/inittab
-rw-r--r-- 1 root system 4014 Mar 25 02:05 /etc/inittab
||||||||||||||||||||||||||||||||||
2、HMC 新建profile,zhyw2_new, 新增一个disk加入这个profile,把cdrom/rmt ,加入这个Profile
3、对zhyw1做mksysb,把备份的磁带恢复到zhyw2_new上来
4、把zhyw2_new对应的etherchannel 去掉,adapter删掉。
(确认1已经成功了)
4-1>把adapter加回来(hba+网卡)
4-2>重新扫盘(cfgmgr -v, /usr/symcli/bin/symcfg dis,powermt config,powermt save)
4-3>添加ip地址,主机名
确认powerpath 路径没有问题,对 相应的vote,ocr做mknod 操作
0084 /dev/rhdiskpower2 09B:1 16A:D0 2-Way Mir N/Grp'd RW 1024
0085 /dev/rhdiskpower3 09B:1 16B:D3 2-Way Mir N/Grp'd RW 1024
ls -l /dev/*hdiskpower2
ls -l /dev/*hdiskpower3
brw-rw---- 1 root system 40, 2 Mar 25 19:38 /dev/hdiskpower2
crw-rw---- 1 root system 40, 2 Mar 25 19:43 /dev/rhdiskpower2
brw-rw---- 1 root system 40, 3 Mar 25 19:38 /dev/hdiskpower3
crw-rw---- 1 root system 40, 3 Mar 25 19:43 /dev/rhdiskpower3
mknod /dev/ocr_disk c 40 2
mknod /dev/vote_disk c 40 3
# ls -l /dev/*_disk
crw-r--r-- 1 root system 40, 2 Mar 25 19:45 /dev/ocr_disk
crw-r--r-- 1 root system 40, 3 Mar 25 19:45 /dev/vote_disk
# chown oracle:oinstall *_disk
4-4>做dlpar,确认新建节点可以加入这个dlpar组
4-5>调整HA
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
把zhyw2_old的 HBA卡,网卡加入
添加 pvid
chdev -l hdiskpower64 -a pv=yes
chdev -l hdiskpower65 -a pv=yes
chdev -l hdiskpower64 -a reserve_lock=no
chdev -l hdiskpower65 -a reserve_lock=no
mknod操作
cd /dev/
ls -l /dev/*hdiskpower64
brw-rw---- 1 root system 39, 64 Mar 09 15:12 /dev/hdiskpower64
crw-rw---- 1 root system 39, 64 Mar 09 15:24 /dev/rhdiskpower64
mknod /dev/ocr_disk c 39 64 ls -l /dev/*hdiskpower65 brw-rw---- 1 root system 39, 65 Mar 09 15:12 /dev/hdiskpower65
crw-rw---- 1 root system 39, 65 Mar 09 15:24 /dev/rhdiskpower65
mknod /dev/vote_disk c 39 65
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
4-3>把ORACLE_BASE 干掉,把备份的ORACLE_BASE tar 回来。
修改.profile ,把instance-id 换成第2个节点的
4-4>恢复这些文件
/etc/init.cssd /etc/init.crs /etc/init.crsd /etc/init.evmd /etc/rc.d/rc2.d/K96init.crs
/etc/rc.d/rc2.d/S96init.crs /etc/oracle/scls_scr /etc/oracle/oprocd /etc/inittab.crs
/etc/inittab
/etc/oracle/scls_scr/<节点名>/oracle/cssfatal
$ORACLE_CRS_HOME/install/paramfile.crs
4-5>为zhyw2_new重新连线,尝试启动crs
5>停zhyw1 ,停HA,concurrentvg
[root@zhyw1]#ls -al *vg*
crw-rw---- 1 root system 10, 0 Mar 02 15:03 IPL_rootvg
crw------- 1 root system 10, 0 Mar 08 14:08 __vg10
crw------- 1 root system 100, 0 Mar 15 22:05 __vg100
crw------- 1 root system 101, 0 Mar 15 23:27 __vg101
crw------- 1 root system 102, 0 Mar 16 04:53 __vg102
crw------- 1 root system 103, 0 Mar 16 05:27 __vg103
crw------- 1 root system 104, 0 Mar 16 05:27 __vg104
crw------- 1 root system 105, 0 Mar 16 05:27 __vg105
crw------- 1 root system 106, 0 Mar 16 05:27 __vg106
crw------- 1 root system 42, 0 Mar 16 00:12 __vg42
crw-rw---- 1 root system 42, 0 Mar 16 00:11 archvg
crw-rw---- 1 root system 100, 0 Mar 15 22:01 oravg
crw-rw---- 1 root system 101, 0 Mar 15 23:26 oravg2
crw-rw---- 1 root system 102, 0 Mar 16 04:53 oravg3
crw-rw---- 1 root system 103, 0 Mar 16 05:27 oravg4
crw-rw---- 1 root system 104, 0 Mar 16 05:27 oravg5
crw-rw---- 1 root system 105, 0 Mar 16 05:27 oravg6
crw-rw---- 1 root system 106, 0 Mar 16 05:27 oravg7
crw-rw---- 1 root system 10, 0 Mar 02 14:18 rootvg
importvg -y oravg -V 100 hdiskpower63
importvg -y oravg2 -V 101 hdiskpower59
importvg -y oravg3 -V 102 hdiskpower48
importvg -y oravg4 -V 103 hdiskpower40
importvg -y oravg5 -V 104 hdiskpower32
importvg -y oravg6 -V 105 hdiskpower24
importvg -y oravg7 -V 106 hdiskpower16
6>在zhyw1上调试HA,启动HACMP
7>启动zhyw2_new上的instance