Назад | Перейти на главную страницу

Скорость очень низкая при повторной синхронизации жесткого диска с mdadm

Проблема: Когда я добавляю sda3 к /dev/md3 скорость двух дисков уменьшается ... очень сильно, до ~ 100к

Вопрос о том, почему снижается скорость.

  1. Это будет потому, что сервер работает в обычном режиме, и все службы работают ....? например, mysql, apache, json и т. д.
  2. Какой-то разъем неисправен (разъем от материнской платы к жесткому диску)?
  3. Конфигурация плохая? Я пробовал изменить speed_limit_min
  4. Жесткий диск был новым, копировал только структуру, я использовал команду
    sfdisk -d /dev/sdb | sfdisk /dev/sda
  5. Может жесткий диск несовместим с raid1?

Немного информации

//Command for add sda device
mdadm --manage /dev/md3 --add /dev/sda3

// check status of resync
# cat /proc/mdstat

Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
md3 : active raid1 sda3[2] sdb3[0]
      970470016 blocks [2/1] [U_]
      [>....................]  recovery =  0.0% (43840/970470016) finish=29872.1min speed=541K/sec

md1 : active raid1 sda1[0] sdb1[1]
      4194240 blocks [2/2] [UU]

unused devices: <none>

Когда я добавляю диск в mdadm raid1, скорость сильно снизится и на другом жестком диске тоже .. : S

# hdparm -tT /dev/sda

/dev/sda:
 Timing cached reads:     2 MB in 19.52 seconds = 104.94 kB/sec
 Timing buffered disk reads:   2 MB in 65.97 seconds =  31.04 kB/sec


//remove disk because resync is freezen server
mdadm /dev/md0 --fail /dev/sda3 
mdadm /dev/md0 --remove /dev/sda3

Решения, которые я пробовал

# echo "5000" > /proc/sys/dev/raid/speed_limit_min
# echo "50000" > /proc/sys/dev/raid/speed_limit_max

Ссылка: Вот


Нормальная скорость в /dev/sda without resync <- новый диск

# hdparm -tT /dev/sda
/dev/sda:
 Timing cached reads:   7512 MB in  2.00 seconds = 3758.92 MB/sec
 Timing buffered disk reads: 150 MB in  3.02 seconds =  49.66 MB/sec

Нормальная скорость в /dev/sdb без ресинхронизации с mdadm <- хороший диск

# hdparm -tT /dev/sdb
/dev/sdb:
 Timing cached reads:   7000 MB in  2.00 seconds = 3502.26 MB/sec
 Timing buffered disk reads: 248 MB in  3.01 seconds =  82.34 MB/sec

Команда для отображения разделов диска sdb и sda

#lsblk
NAME                   MAJ:MIN RM   SIZE RO TYPE  MOUNTPOINT
sdb                      8:16   0 931.5G  0 disk
|-sdb1                   8:17   0     4G  0 part
| `-md1                  9:1    0     4G  0 raid1 /
|-sdb2                   8:18   0     2G  0 part  [SWAP]
`-sdb3                   8:19   0 925.5G  0 part
  `-md3                  9:3    0 925.5G  0 raid1
    |-vg00-usr (dm-0)  252:0    0   105G  0 lvm   /usr
    |-vg00-var (dm-1)  252:1    0   720G  0 lvm   /var
    `-vg00-home (dm-2) 252:2    0    70G  0 lvm   /home
sda                      8:32   0 931.5G  0 disk
|-sda1                   8:33   0     4G  0 part
| `-md1                  9:1    0     4G  0 raid1 /
|-sda2                   8:34   0     2G  0 part  [SWAP]
`-sda3                   8:35   0 925.5G  0 part

Дополнительная информация


blockdev --getra /dev/md3
256

cat /proc/partitions
major minor  #blocks  name

   8       16  976762584 sdb
   8       17    4194304 sdb1
   8       18    2097152 sdb2
   8       19  970470104 sdb3
   9        1    4194240 md1
   9        3  970470016 md3
 252        0  110100480 dm-0
 252        1  754974720 dm-1
 252        2   73400320 dm-2
   8       32  976762584 sda
   8       33    4194304 sda1
   8       34    2097152 sda2
   8       35  970470104 sda3

# mdadm.conf
#
# Please refer to mdadm.conf(5) for information about this file.
#

# by default (built-in), scan all partitions (/proc/partitions) and all
# containers for MD superblocks. alternatively, specify devices to scan, using
# wildcards if desired.
#DEVICE partitions containers

# auto-create devices with Debian standard permissions
CREATE owner=root group=disk mode=0660 auto=yes

# automatically tag new arrays as belonging to the local system
HOMEHOST <system>

# instruct the monitoring daemon where to send mail alerts
MAILADDR root

# definitions of existing MD arrays

ARRAY /dev/md1 level=raid1 num-devices=2 devices=/dev/sda1,/dev/sdb1  
ARRAY /dev/md3 level=raid1 num-devices=2 devices=/dev/sda3,/dev/sdb3
#ARRAY /dev/md11 level=raid1 num-devices=2 devices=/dev/sdc1,/dev/sdd1

# mdadm --detail /dev/md1
/dev/md1:
        Version : 0.90
  Creation Time : Tue Jun 28 21:37:54 2016
     Raid Level : raid1
     Array Size : 4194240 (4.00 GiB 4.29 GB)
  Used Dev Size : 4194240 (4.00 GiB 4.29 GB)
   Raid Devices : 2
  Total Devices : 1
Preferred Minor : 1
    Persistence : Superblock is persistent

    Update Time : Mon Jan  2 11:11:27 2017
          State : clean, degraded
 Active Devices : 1
Working Devices : 1
 Failed Devices : 0
  Spare Devices : 0

           UUID : 4878c265:cddfb861:1f51fb89:78ee93fe
         Events : 0.581776

    Number   Major   Minor   RaidDevice State
       0       0        0        0      removed
       1       8       17        1      active sync   /dev/sdb1

# mdadm --detail /dev/md3
/dev/md3:
        Version : 0.90
  Creation Time : Tue Jun 28 21:37:54 2016
     Raid Level : raid1
     Array Size : 970470016 (925.51 GiB 993.76 GB)
  Used Dev Size : 970470016 (925.51 GiB 993.76 GB)
   Raid Devices : 2
  Total Devices : 1
Preferred Minor : 3
    Persistence : Superblock is persistent

    Update Time : Mon Jan  2 11:12:15 2017
          State : active, degraded
 Active Devices : 1
Working Devices : 1
 Failed Devices : 0
  Spare Devices : 0

           UUID : 3719a2fe:5de22a92:1f51fb89:78ee93fe
         Events : 0.26462169

    Number   Major   Minor   RaidDevice State
       0       8       19        0      active sync   /dev/sdb3
       1       0        0        1      removed

smartctl с длинными результатами тестирования


Это новый разработчик, новый жесткий диск

# smartctl -a /dev/sda   

smartctl 6.2 2013-07-26 r3841 [x86_64-linux-3.13.0-106-generic] (local build)
Copyright (C) 2002-13, Bruce Allen, Christian Franke, www.smartmontools.org

=== START OF INFORMATION SECTION ===
Model Family:     Seagate Barracuda 7200.14 (AF)
Device Model:     ST1000DM003-1SB102
Serial Number:    Z9A4SRJX
LU WWN Device Id: 5 000c50 092058fcf
Firmware Version: CC43
User Capacity:    1,000,204,886,016 bytes [1.00 TB]
Sector Sizes:     512 bytes logical, 4096 bytes physical
Rotation Rate:    7200 rpm
Device is:        In smartctl database [for details use: -P show]
ATA Version is:   ATA8-ACS T13/1699-D revision 4
SATA Version is:  SATA 3.0, 6.0 Gb/s (current: 3.0 Gb/s)
Local Time is:    Sat Dec 31 13:54:02 2016 MST

==> WARNING: A firmware update for this drive may be available,
see the following Seagate web pages:
http://knowledge.seagate.com/articles/en_US/FAQ/207931en
http://knowledge.seagate.com/articles/en_US/FAQ/223651en

SMART support is: Available - device has SMART capability.
SMART support is: Enabled

=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED

General SMART Values:
Offline data collection status:  (0x82) Offline data collection activity
                                        was completed without error.
                                        Auto Offline Data Collection: Enabled.
Self-test execution status:      ( 241) Self-test routine in progress...
                                        10% of test remaining.
Total time to complete Offline
data collection:                (    0) seconds.
Offline data collection
capabilities:                    (0x7b) SMART execute Offline immediate.
                                        Auto Offline data collection on/off support.
                                        Suspend Offline collection upon new
                                        command.
                                        Offline surface scan supported.
                                        Self-test supported.
                                        Conveyance Self-test supported.
                                        Selective Self-test supported.
SMART capabilities:            (0x0003) Saves SMART data before entering
                                        power-saving mode.
                                        Supports SMART auto save timer.
Error logging capability:        (0x01) Error logging supported.
                                        General Purpose Logging supported.
Short self-test routine
recommended polling time:        (   1) minutes.
Extended self-test routine
recommended polling time:        ( 104) minutes.
Conveyance self-test routine
recommended polling time:        (   2) minutes.
SCT capabilities:              (0x1085) SCT Status supported.

SMART Attributes Data Structure revision number: 10
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_FAILED RAW_VALUE
  1 Raw_Read_Error_Rate     0x000f   073   064   006    Pre-fail  Always       -       22632891
  3 Spin_Up_Time            0x0003   100   100   000    Pre-fail  Always       -       0
  4 Start_Stop_Count        0x0032   100   100   020    Old_age   Always       -       1
  5 Reallocated_Sector_Ct   0x0033   100   100   010    Pre-fail  Always       -       0
  7 Seek_Error_Rate         0x000f   100   253   045    Pre-fail  Always       -       25770426301
  9 Power_On_Hours          0x0032   100   100   000    Old_age   Always       -       70
 10 Spin_Retry_Count        0x0013   100   100   097    Pre-fail  Always       -       0
 12 Power_Cycle_Count       0x0032   100   100   020    Old_age   Always       -       1
183 Runtime_Bad_Block       0x0032   099   099   000    Old_age   Always       -       1
184 End-to-End_Error        0x0032   100   100   099    Old_age   Always       -       0
187 Reported_Uncorrect      0x0032   100   100   000    Old_age   Always       -       0
188 Command_Timeout         0x0032   090   090   000    Old_age   Always       -       24 32 32
189 High_Fly_Writes         0x003a   100   100   000    Old_age   Always       -       0
190 Airflow_Temperature_Cel 0x0022   070   069   040    Old_age   Always       -       30 (Min/Max 25/31)
193 Load_Cycle_Count        0x0032   100   100   000    Old_age   Always       -       3
194 Temperature_Celsius     0x0022   030   025   000    Old_age   Always       -       30 (0 25 0 0 0)
195 Hardware_ECC_Recovered  0x001a   010   009   000    Old_age   Always       -       22632891
197 Current_Pending_Sector  0x0012   100   100   000    Old_age   Always       -       0
198 Offline_Uncorrectable   0x0010   100   100   000    Old_age   Offline      -       0
199 UDMA_CRC_Error_Count    0x003e   200   200   000    Old_age   Always       -       0
240 Head_Flying_Hours       0x0000   100   253   000    Old_age   Offline      -       69h+48m+11.238s
241 Total_LBAs_Written      0x0000   100   253   000    Old_age   Offline      -       21834581
242 Total_LBAs_Read         0x0000   100   253   000    Old_age   Offline      -       798310

SMART Error Log Version: 1
ATA Error Count: 2
        CR = Command Register [HEX]
        FR = Features Register [HEX]
        SC = Sector Count Register [HEX]
        SN = Sector Number Register [HEX]
        CL = Cylinder Low Register [HEX]
        CH = Cylinder High Register [HEX]
        DH = Device/Head Register [HEX]
        DC = Device Command Register [HEX]
        ER = Error register [HEX]
        ST = Status register [HEX]
Powered_Up_Time is measured from power on, and printed as
DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
SS=sec, and sss=millisec. It "wraps" after 49.710 days.

Error 2 occurred at disk power-on lifetime: 58 hours (2 days + 10 hours)
  When the command that caused the error occurred, the device was in an unknown state.

  After command completion occurred, registers were:
  ER ST SC SN CL CH DH
  -- -- -- -- -- -- --
  04 51 00 00 00 00 00  Error: ABRT

  Commands leading to the command that caused the error were:
  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
  -- -- -- -- -- -- -- --  ----------------  --------------------
  00 00 00 00 00 00 00 ff   2d+10:52:15.111  NOP [Abort queued commands]
  b0 d4 00 82 4f c2 00 00   2d+10:51:54.064  SMART EXECUTE OFF-LINE IMMEDIATE
  61 00 10 50 0c db 41 00   2d+10:51:54.064  WRITE FPDMA QUEUED
  61 00 08 ff ff ff 4f 00   2d+10:51:54.064  WRITE FPDMA QUEUED
  ea 00 00 00 00 00 a0 00   2d+10:51:53.984  FLUSH CACHE EXT

Error 1 occurred at disk power-on lifetime: 58 hours (2 days + 10 hours)
  When the command that caused the error occurred, the device was in an unknown state.

  After command completion occurred, registers were:
  ER ST SC SN CL CH DH
  -- -- -- -- -- -- --
  04 51 00 00 00 00 00  Error: ABRT

  Commands leading to the command that caused the error were:
  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
  -- -- -- -- -- -- -- --  ----------------  --------------------
  00 00 00 00 00 00 00 ff   2d+10:46:05.163  NOP [Abort queued commands]
  b0 d4 00 82 4f c2 00 00   2d+10:45:44.584  SMART EXECUTE OFF-LINE IMMEDIATE
  61 00 18 88 0f 44 42 00   2d+10:45:44.584  WRITE FPDMA QUEUED
  ea 00 00 00 00 00 a0 00   2d+10:45:44.584  FLUSH CACHE EXT
  b0 d0 01 00 4f c2 00 00   2d+10:45:43.477  SMART READ DATA

SMART Self-test log structure revision number 1
Num  Test_Description    Status                  Remaining  LifeTime(hours)  LBA_of_first_error
# 1  Extended offline    Self-test routine in progress 10%        70         -
# 2  Extended captive    Interrupted (host reset)      90%        58         -
# 3  Extended captive    Interrupted (host reset)      90%        58         -
# 4  Short offline       Aborted by host               90%        58         -

SMART Selective self-test log data structure revision number 1
 SPAN  MIN_LBA  MAX_LBA  CURRENT_TEST_STATUS
    1        0        0  Not_testing
    2        0        0  Not_testing
    3        0        0  Not_testing
    4        0        0  Not_testing
    5        0        0  Not_testing
Selective self-test flags (0x0):
  After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.

Старый диск, это устройство, где все работает

# smartctl -a /dev/sdb

smartctl 6.2 2013-07-26 r3841 [x86_64-linux-3.13.0-106-generic] (local build)
Copyright (C) 2002-13, Bruce Allen, Christian Franke, www.smartmontools.org

=== START OF INFORMATION SECTION ===
Model Family:     Seagate Barracuda 7200.14 (AF)
Device Model:     ST1000DM003-1CH162
Serial Number:    S1D9YSS6
LU WWN Device Id: 5 000c50 06106421b
Firmware Version: CC46
User Capacity:    1,000,204,886,016 bytes [1.00 TB]
Sector Sizes:     512 bytes logical, 4096 bytes physical
Rotation Rate:    7200 rpm
Device is:        In smartctl database [for details use: -P show]
ATA Version is:   ATA8-ACS T13/1699-D revision 4
SATA Version is:  SATA 3.0, 6.0 Gb/s (current: 3.0 Gb/s)
Local Time is:    Sat Dec 31 13:52:22 2016 MST

==> WARNING: A firmware update for this drive is available,
see the following Seagate web pages:
http://knowledge.seagate.com/articles/en_US/FAQ/207931en
http://knowledge.seagate.com/articles/en_US/FAQ/223651en

SMART support is: Available - device has SMART capability.
SMART support is: Enabled

=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED

General SMART Values:
Offline data collection status:  (0x82) Offline data collection activity
                                        was completed without error.
                                        Auto Offline Data Collection: Enabled.
Self-test execution status:      (   0) The previous self-test routine completed
                                        without error or no self-test has ever
                                        been run.
Total time to complete Offline
data collection:                (  575) seconds.
Offline data collection
capabilities:                    (0x7b) SMART execute Offline immediate.
                                        Auto Offline data collection on/off support.
                                        Suspend Offline collection upon new
                                        command.
                                        Offline surface scan supported.
                                        Self-test supported.
                                        Conveyance Self-test supported.
                                        Selective Self-test supported.
SMART capabilities:            (0x0003) Saves SMART data before entering
                                        power-saving mode.
                                        Supports SMART auto save timer.
Error logging capability:        (0x01) Error logging supported.
                                        General Purpose Logging supported.
Short self-test routine
recommended polling time:        (   1) minutes.
Extended self-test routine
recommended polling time:        ( 112) minutes.
Conveyance self-test routine
recommended polling time:        (   2) minutes.
SCT capabilities:              (0x3085) SCT Status supported.

SMART Attributes Data Structure revision number: 10
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_FAILED RAW_VALUE
  1 Raw_Read_Error_Rate     0x000f   119   099   006    Pre-fail  Always       -       221092144
  3 Spin_Up_Time            0x0003   097   097   000    Pre-fail  Always       -       0
  4 Start_Stop_Count        0x0032   100   100   020    Old_age   Always       -       32
  5 Reallocated_Sector_Ct   0x0033   100   100   010    Pre-fail  Always       -       0
  7 Seek_Error_Rate         0x000f   077   060   030    Pre-fail  Always       -       21764523466
  9 Power_On_Hours          0x0032   087   087   000    Old_age   Always       -       11497
 10 Spin_Retry_Count        0x0013   100   100   097    Pre-fail  Always       -       0
 12 Power_Cycle_Count       0x0032   100   100   020    Old_age   Always       -       32
183 Runtime_Bad_Block       0x0032   100   100   000    Old_age   Always       -       0
184 End-to-End_Error        0x0032   100   100   099    Old_age   Always       -       0
187 Reported_Uncorrect      0x0032   100   100   000    Old_age   Always       -       0
188 Command_Timeout         0x0032   100   100   000    Old_age   Always       -       0 0 0
189 High_Fly_Writes         0x003a   094   094   000    Old_age   Always       -       6
190 Airflow_Temperature_Cel 0x0022   067   063   045    Old_age   Always       -       33 (Min/Max 32/34)
191 G-Sense_Error_Rate      0x0032   100   100   000    Old_age   Always       -       0
192 Power-Off_Retract_Count 0x0032   100   100   000    Old_age   Always       -       31
193 Load_Cycle_Count        0x0032   100   100   000    Old_age   Always       -       183
194 Temperature_Celsius     0x0022   033   040   000    Old_age   Always       -       33 (0 20 0 0 0)
197 Current_Pending_Sector  0x0012   100   100   000    Old_age   Always       -       0
198 Offline_Uncorrectable   0x0010   100   100   000    Old_age   Offline      -       0
199 UDMA_CRC_Error_Count    0x003e   200   200   000    Old_age   Always       -       0
240 Head_Flying_Hours       0x0000   100   253   000    Old_age   Offline      -       11232h+41m+23.205s
241 Total_LBAs_Written      0x0000   100   253   000    Old_age   Offline      -       55718050076
242 Total_LBAs_Read         0x0000   100   253   000    Old_age   Offline      -       82556976137

SMART Error Log Version: 1
No Errors Logged

SMART Self-test log structure revision number 1
Num  Test_Description    Status                  Remaining  LifeTime(hours)  LBA_of_first_error
# 1  Extended offline    Completed without error       00%     11497         -
# 2  Extended offline    Completed without error       00%      8059         -
# 3  Short offline       Completed without error       00%      8042         -

SMART Selective self-test log data structure revision number 1
 SPAN  MIN_LBA  MAX_LBA  CURRENT_TEST_STATUS
    1        0        0  Not_testing
    2        0        0  Not_testing
    3        0        0  Not_testing
    4        0        0  Not_testing
    5        0        0  Not_testing
Selective self-test flags (0x0):
  After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.

Похоже, на вашем новом диске есть проблема с прошивкой или оборудованием. Об этом можно судить по двум точкам данных:

  • медленное чтение с буферизацией (49 МБ / с), что намного ниже, чем у старого диска
  • таймауты команды SATA, указанные в отчете SMART.

Проверьте, существует ли обновление прошивки для вашего диска, и сделайте то же самое для BIOS материнской платы / контроллера. Если возможно, попробуйте свой новый диск на другом ПК, чтобы убедиться в низкой скорости буферизованного чтения.