Назад | Перейти на главную страницу

Кластер узлов Ubuntu 2 Postgresql 9.3 с кардиостимулятором и потоковой репликацией

Я пытаюсь настроить кластер с 2 узлами с Postresql 9.3 и потоковой репликацией. Потоковая репликация успешно настроена. Мастер прослушивает и отправляет подчиненному. Я могу выполнить аварийное переключение вручную, создав файл триггера и резервный мастер, обновив каталог данных с помощью rsync, снова продвигая старый мастер как подчиненный и снова выполняя аварийное переключение.

На данный момент все работает нормально.

Теперь я хочу настроить автоматическое переключение при отказе с виртуальным (плавающим) IP-адресом с кардиостимулятором. Мастер всегда должен иметь VIP для приема клиентских подключений для чтения и записи. Если служба postgresql, которая является ведущей, выходит из строя, аварийное переключение должно запускаться, а плавающий IP-адрес должен перемещаться на ведомый (новый ведущий)

Я пробую это, следуя руководствам по настройке https://github.com/t-matsuo/resource-agents/wiki/Resource-Agent-for-PostgreSQL-9.1-streaming-replication https://github.com/t-matsuo/resource-agents/wiki но, к сожалению, postgres не запускается кардиостимулятором, и только vip-master назначается первому запущенному хосту.

Моя конфигурация кардиостимулятора:

property \
    no-quorum-policy="ignore" \
    stonith-enabled="false" \
    crmd-transition-delay="0s"


primitive pgsql ocf:heartbeat:pgsql \
    params \
        pgctl="/usr/bin/pg_ctlcluster" \
        psql="/usr/bin/psql" \
        pgdata="/database/postgresql/9.3/main/" \
        start_opt="-p 5432" \
        rep_mode="sync" \
        node_list="robin marshall" \
        restore_command="" \
        primary_conninfo_opt="keepalives_idle=60 keepalives_interval=5 keepalives_count=5" \
        master_ip="172.16.1.1" \
        stop_escalate="0" \
    op start   timeout="60s" interval="0s"  on-fail="restart" \
    op monitor timeout="60s" interval="5s" on-fail="restart" \
    op monitor timeout="60s" interval="2s"  on-fail="restart" role="Master" \
    op promote timeout="60s" interval="0s"  on-fail="restart" \
    op demote  timeout="60s" interval="0s"  on-fail="stop" \
    op stop    timeout="60s" interval="0s"  on-fail="block" \
    op notify  timeout="60s" interval="0s"

primitive pingCheck ocf:pacemaker:ping \
    params \
        name="default_ping_set" \
        host_list="172.16.0.1" \
        multiplier="100" \
    op start   timeout="60s" interval="0s"  on-fail="restart" \
    op monitor timeout="60s" interval="2s" on-fail="restart" \
    op stop    timeout="60s" interval="0s"  on-fail="ignore"


rsc_defaults \
    resource-stickiness="INFINITY" \
    migration-threshold="1"

ms msPostgresql pgsql \
    meta \
        master-max="1" \
        master-node-max="1" \
        clone-max="2" \
        clone-node-max="1" \
        notify="true"


primitive vip-master ocf:heartbeat:IPaddr2 \
    params \
        ip="172.16.1.1" \
        nic="bond0" \
        cidr_netmask="16" \
    op start   timeout="60s" interval="0s"  on-fail="stop" \
    op monitor timeout="60s" interval="2s" on-fail="restart" \
    op stop    timeout="60s" interval="0s"  on-fail="block"


primitive vip-slave ocf:heartbeat:IPaddr2 \
    params \
        ip="172.16.1.2" \
        nic="bond0" \
        cidr_netmask="16" \
    meta \
        resource-stickiness="1" \
    op start   timeout="60s" interval="0s"  on-fail="restart" \
    op monitor timeout="60s" interval="2s" on-fail="restart" \
    op stop    timeout="60s" interval="0s"  on-fail="block"


clone clnPingCheck pingCheck
group master-group \
      vip-master \
      vip-rep 

location rsc_location-1 vip-slave \
    rule  200: pgsql-status eq "HS:sync" \
    rule  100: pgsql-status eq "PRI" \
    rule  -inf: not_defined pgsql-status \
    rule  -inf: pgsql-status ne "HS:sync" and pgsql-status ne "PRI"


location rsc_location-2 msPostgresql \
    rule -inf: not_defined default_ping_set or default_ping_set lt 100

colocation rsc_colocation-1 inf: msPostgresql        clnPingCheck
colocation rsc_colocation-2 inf: master-group        msPostgresql:Master

order rsc_order-1 0: clnPingCheck          msPostgresql
order rsc_order-2 0: msPostgresql:promote  master-group:start   symmetrical=false
order rsc_order-3 0: msPostgresql:demote   master-group:stop    symmetrical=false

Конфигурация corosync:

totem {
        version: 2

        # How long before declaring a token lost (ms)
        token: 3000

        # How many token retransmits before forming a new configuration
        token_retransmits_before_loss_const: 10

        # How long to wait for join messages in the membership protocol (ms)
        join: 60

        # How long to wait for consensus to be achieved before starting a new round of membership configuration (ms)
        consensus: 3600

        # Turn off the virtual synchrony filter
        vsftype: none

        # Number of messages that may be sent by one processor on receipt of the token
        max_messages: 20

        # Limit generated nodeids to 31-bits (positive signed integers)
        clear_node_high_bit: yes

        # Disable encryption
        secauth: off

        # How many threads to use for encryption/decryption
        threads: 0

        # Optionally assign a fixed node id (integer)
        # nodeid: 1234

        # This specifies the mode of redundant ring, which may be none, active, or passive.
        rrp_mode: none

        interface {
                # The following values need to be set based on your environment 
                ringnumber: 0
                #bindnetaddr: 127.0.0.1 
                bindnetaddr: 172.16.1.10
                mcastaddr: 226.94.1.1
                mcastport: 5405
        }
}

amf {
        mode: disabled
}

quorum {
        # Quorum for the Pacemaker Cluster Resource Manager
        provider: corosync_votequorum
        expected_votes: 1
}

aisexec {
        user:   root
        group:  root
}

logging {
        fileline: off
        to_stderr: yes
        to_logfile: no
        to_syslog: yes
        syslog_facility: daemon
        debug: off
        timestamp: on
        logger_subsys {
                subsys: AMF
                debug: off
                tags: enter|leave|trace1|trace2|trace3|trace4|trace6
        }
}

Консоль crm_mom -A показывает мне следующее:

Last updated: Wed Oct 29 20:02:32 2014
Last change: Wed Oct 29 19:51:36 2014 via crm_attribute on robin
Stack: corosync
Current DC: robin (739246346) - partition with quorum
Version: 1.1.10-42f2063
2 Nodes configured
7 Resources configured


Online: [ robin ]
OFFLINE: [ marshall ]

vip-master      (ocf::heartbeat:IPaddr2):       Started robin
 Clone Set: clnPingCheck [pingCheck]
     Started: [ robin ]
     Stopped: [ marshall ]

Node Attributes:
* Node robin:
    + default_ping_set                  : 100
    + pgsql-data-status                 : LATEST    

Failed actions:
    pgsql_start_0 (node=robin, call=48, rc=5, status=complete, last-rc-change=Wed Oct 29 20:01:48 2014
, queued=19ms, exec=0ms
): not installed

Я смотрю в системный журнал и вижу, что у psql какие-то проблемы, но я не могу догадаться, что это такое.

Oct 29 20:00:40 robin crmd[14139]:  warning: do_log: FSA: Input I_DC_TIMEOUT from crm_timer_popped() received in state S_PENDING
Oct 29 20:00:41 robin pgsql(pgsql)[14148]: INFO: Configuration file is /database/postgresql/9.3/main//postgresql.conf not readable during probe.
Oct 29 20:00:41 robin pgsql(pgsql)[14148]: INFO: Don't check /database/postgresql/9.3/main/ during probe
Oct 29 20:00:42 robin pgsql(pgsql)[14148]: INFO: Changing pgsql-data-status on  : ->LATEST.
Oct 29 20:00:42 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86400). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:43 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86399). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:44 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86398). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:45 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86397). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:46 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86396). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:47 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86395). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:48 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86394). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:49 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86393). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:51 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86392). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:52 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86391). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:53 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86390). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:54 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86389). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:55 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86388). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:56 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86387). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:57 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86386). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:58 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86385). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:00:59 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86384). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:00 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86383). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:02 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86382). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:03 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86381). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:04 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86380). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:05 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86379). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:06 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86378). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:07 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86377). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:08 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86376). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:09 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86375). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:10 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86374). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:12 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86373). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:13 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86372). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:14 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86371). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:15 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86370). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:16 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86369). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:17 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86368). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:18 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86367). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:19 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86366). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:20 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86365). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:22 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86364). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:23 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86363). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:24 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86362). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:25 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86361). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:26 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86360). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:27 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86359). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:28 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86358). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:29 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86357). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:30 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86356). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:32 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86355). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:33 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86354). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:34 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86353). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:35 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86352). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:36 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86351). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:37 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86350). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:38 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86349). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:39 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86348). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:40 robin pgsql(pgsql)[14148]: WARNING: Retrying(remain 86347). "exec_with_timeout 0 /usr/sbin/crm_attribute -l forever -N -n pgsql-data-status -v LATEST" failed. rc=22. stdout="non-option ARGV-elements: pgsql-data-status #012Please choose from one of the matches above and suppy the 'id' with --attr-id".
Oct 29 20:01:41 robin lrmd[14136]:  warning: child_timeout_callback: pgsql_monitor_0 process (PID 14148) timed out
Oct 29 20:01:41 robin lrmd[14136]:  warning: operation_finished: pgsql_monitor_0:14148 - timed out after 60000ms
Oct 29 20:01:41 robin crmd[14139]:    error: process_lrm_event: LRM operation pgsql_monitor_0 (10) Timed Out (timeout=60000ms)
Oct 29 20:01:41 robin crmd[14139]:  warning: status_from_rc: Action 5 (pgsql:0_monitor_0) on robin failed (target: 7 vs. rc: 1): Error
Oct 29 20:01:41 robin pengine[14138]:  warning: unpack_rsc_op: Processing failed op monitor for pgsql:0 on robin: unknown error (1)
Oct 29 20:01:41 robin IPaddr2(vip-master)[14821]: INFO: Adding IPv4 address 172.16.1.1/16 with broadcast address 172.16.255.255 to device bond0
Oct 29 20:01:41 robin IPaddr2(vip-master)[14821]: INFO: Bringing device bond0 up
Oct 29 20:01:41 robin IPaddr2(vip-master)[14821]: INFO: /usr/lib/heartbeat/send_arp -i 200 -r 5 -p /var/run/resource-agents/send_arp-172.16.1.1 bond0 172.16.1.1 auto not_used not_used
Oct 29 20:01:48 robin pengine[14138]:  warning: unpack_rsc_op: Processing failed op monitor for pgsql:0 on robin: unknown error (1)
Oct 29 20:01:48 robin crmd[14139]:  warning: status_from_rc: Action 10 (pgsql_start_0) on robin failed (target: 0 vs. rc: 5): Error
Oct 29 20:01:48 robin crmd[14139]:  warning: update_failcount: Updating failcount for pgsql on robin after failed start: rc=5 (update=INFINITY, time=1414609308)
Oct 29 20:01:48 robin crmd[14139]:  warning: update_failcount: Updating failcount for pgsql on robin after failed start: rc=5 (update=INFINITY, time=1414609308)
Oct 29 20:01:48 robin pengine[14138]:  warning: unpack_rsc_op: Processing failed op start for pgsql:0 on robin: not installed (5)
Oct 29 20:01:48 robin pengine[14138]:  warning: common_apply_stickiness: Forcing msPostgresql away from robin after 1000000 failures (max=1)
Oct 29 20:01:48 robin pengine[14138]:  warning: common_apply_stickiness: Forcing msPostgresql away from robin after 1000000 failures (max=1)

Я не очень знаком с кардиостимулятором и pgsql. Может ли кто-нибудь помочь мне или узнать мою ошибку? Может быть, есть решение для автоматического восстановления после сбоя или повторного подключения, если главный или подчиненный узел вернется? Я слышал, что это возможно с помощью repmgr, но есть ли еще автоматизированное решение с кардиостимулятором?

большое спасибо

Я работаю над аналогичным решением на Amazon vpc. Я начал с книги от Пакта. Я не связан с Пактом или автором, но несколько раз писал ему по электронной почте. На самом деле легко общаться по электронной почте. Я не могу больше порекомендовать эту книгу, поскольку в ней описывается создание различных конфигураций высокой доступности.

https://www.packtpub.com/big-data-and-business-intelligence/postgresql-9-high-availability-cookbook

Что касается плавающего VIP, мне пришлось сделать что-то другое, поскольку я использую облако. Он отлично работает, но может быть не тем, что вы ищете. Вы добавляете частный IP-адрес (эластичный IP-адрес) к одному из экземпляров и используете задачу cron для отслеживания доступности другого узла. Итак, если у вас 2 узла, node2 контролирует node1, а node1 контролирует node2. Если узел определяется как недоступный, vip переключается на доступный узел через вызовы aws api. Об этом можно прочитать здесь: https://aws.amazon.com/articles/2127188135977316

Вот:

primitive pgsql ocf:heartbeat:pgsql \ params \ pgctl="/usr/bin/pg_ctlcluster" \ psql="/usr/bin/psql" \

замените значение pgctl на:

pgctl="/var/lib/postgresql/9.3/bin/pg_ctl"

и postgresql запустится. Дважды проверьте путь, я не уверен, поскольку набираю его на своем мобильном телефоне.