Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21import os 

22import os.path 

23import sys 

24import time 

25import signal 

26import subprocess 

27import getopt 

28import datetime 

29import traceback 

30import base64 

31import zlib 

32import errno 

33import stat 

34 

35import XenAPI # pylint: disable=import-error 

36import util 

37import lvutil 

38import vhdutil 

39import lvhdutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53try: 

54 from linstorjournaler import LinstorJournaler 

55 from linstorvhdutil import LinstorVhdUtil 

56 from linstorvolumemanager import get_controller_uri 

57 from linstorvolumemanager import LinstorVolumeManager 

58 from linstorvolumemanager import LinstorVolumeManagerError 

59 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

60 

61 LINSTOR_AVAILABLE = True 

62except ImportError: 

63 LINSTOR_AVAILABLE = False 

64 

65# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

66# possible due to lvhd_stop_using_() not working correctly. However, we leave 

67# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

68# record for use by the offline tool (which makes the operation safe by pausing 

69# the VM first) 

70AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

71 

72FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

73 

74# process "lock", used simply as an indicator that a process already exists 

75# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

76# check for the fact by trying the lock). 

77LOCK_TYPE_RUNNING = "running" 

78lockRunning = None 

79 

80# process "lock" to indicate that the GC process has been activated but may not 

81# yet be running, stops a second process from being started. 

82LOCK_TYPE_GC_ACTIVE = "gc_active" 

83lockActive = None 

84 

85# Default coalesce error rate limit, in messages per minute. A zero value 

86# disables throttling, and a negative value disables error reporting. 

87DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

88 

89COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

90COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

91VAR_RUN = "/var/run/" 

92SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

93 

94N_RUNNING_AVERAGE = 10 

95 

96NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

97 

98 

99class AbortException(util.SMException): 

100 pass 

101 

102 

103################################################################################ 

104# 

105# Util 

106# 

107class Util: 

108 RET_RC = 1 

109 RET_STDOUT = 2 

110 RET_STDERR = 4 

111 

112 UUID_LEN = 36 

113 

114 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

115 

116 def log(text): 

117 util.SMlog(text, ident="SMGC") 

118 log = staticmethod(log) 

119 

120 def logException(tag): 

121 info = sys.exc_info() 

122 if info[0] == SystemExit: 122 ↛ 124line 122 didn't jump to line 124, because the condition on line 122 was never true

123 # this should not be happening when catching "Exception", but it is 

124 sys.exit(0) 

125 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

126 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

127 Util.log(" ***********************") 

128 Util.log(" * E X C E P T I O N *") 

129 Util.log(" ***********************") 

130 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

131 Util.log(tb) 

132 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

133 logException = staticmethod(logException) 

134 

135 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

136 "Execute a subprocess, then return its return code, stdout, stderr" 

137 proc = subprocess.Popen(args, 

138 stdin=subprocess.PIPE, \ 

139 stdout=subprocess.PIPE, \ 

140 stderr=subprocess.PIPE, \ 

141 shell=True, \ 

142 close_fds=True) 

143 (stdout, stderr) = proc.communicate(inputtext) 

144 stdout = str(stdout) 

145 stderr = str(stderr) 

146 rc = proc.returncode 

147 if log: 

148 Util.log("`%s`: %s" % (args, rc)) 

149 if type(expectedRC) != type([]): 

150 expectedRC = [expectedRC] 

151 if not rc in expectedRC: 

152 reason = stderr.strip() 

153 if stdout.strip(): 

154 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

155 Util.log("Failed: %s" % reason) 

156 raise util.CommandException(rc, args, reason) 

157 

158 if ret == Util.RET_RC: 

159 return rc 

160 if ret == Util.RET_STDERR: 

161 return stderr 

162 return stdout 

163 doexec = staticmethod(doexec) 

164 

165 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

166 """execute func in a separate thread and kill it if abortTest signals 

167 so""" 

168 abortSignaled = abortTest() # check now before we clear resultFlag 

169 resultFlag = IPCFlag(ns) 

170 resultFlag.clearAll() 

171 pid = os.fork() 

172 if pid: 

173 startTime = _time() 

174 try: 

175 while True: 

176 if resultFlag.test("success"): 

177 Util.log(" Child process completed successfully") 

178 resultFlag.clear("success") 

179 return 

180 if resultFlag.test("failure"): 

181 resultFlag.clear("failure") 

182 raise util.SMException("Child process exited with error") 

183 if abortTest() or abortSignaled: 

184 os.killpg(pid, signal.SIGKILL) 

185 raise AbortException("Aborting due to signal") 

186 if timeOut and _time() - startTime > timeOut: 

187 os.killpg(pid, signal.SIGKILL) 

188 resultFlag.clearAll() 

189 raise util.SMException("Timed out") 

190 time.sleep(pollInterval) 

191 finally: 

192 wait_pid = 0 

193 rc = -1 

194 count = 0 

195 while wait_pid == 0 and count < 10: 

196 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

197 if wait_pid == 0: 

198 time.sleep(2) 

199 count += 1 

200 

201 if wait_pid == 0: 

202 Util.log("runAbortable: wait for process completion timed out") 

203 else: 

204 os.setpgrp() 

205 try: 

206 if func() == ret: 

207 resultFlag.set("success") 

208 else: 

209 resultFlag.set("failure") 

210 except Exception as e: 

211 Util.log("Child process failed with : (%s)" % e) 

212 resultFlag.set("failure") 

213 Util.logException("This exception has occured") 

214 os._exit(0) 

215 runAbortable = staticmethod(runAbortable) 

216 

217 def num2str(number): 

218 for prefix in ("G", "M", "K"): 

219 if number >= Util.PREFIX[prefix]: 

220 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

221 return "%s" % number 

222 num2str = staticmethod(num2str) 

223 

224 def numBits(val): 

225 count = 0 

226 while val: 

227 count += val & 1 

228 val = val >> 1 

229 return count 

230 numBits = staticmethod(numBits) 

231 

232 def countBits(bitmap1, bitmap2): 

233 """return bit count in the bitmap produced by ORing the two bitmaps""" 

234 len1 = len(bitmap1) 

235 len2 = len(bitmap2) 

236 lenLong = len1 

237 lenShort = len2 

238 bitmapLong = bitmap1 

239 if len2 > len1: 

240 lenLong = len2 

241 lenShort = len1 

242 bitmapLong = bitmap2 

243 

244 count = 0 

245 for i in range(lenShort): 

246 val = bitmap1[i] | bitmap2[i] 

247 count += Util.numBits(val) 

248 

249 for i in range(i + 1, lenLong): 

250 val = bitmapLong[i] 

251 count += Util.numBits(val) 

252 return count 

253 countBits = staticmethod(countBits) 

254 

255 def getThisScript(): 

256 thisScript = util.get_real_path(__file__) 

257 if thisScript.endswith(".pyc"): 

258 thisScript = thisScript[:-1] 

259 return thisScript 

260 getThisScript = staticmethod(getThisScript) 

261 

262 

263################################################################################ 

264# 

265# XAPI 

266# 

267class XAPI: 

268 USER = "root" 

269 PLUGIN_ON_SLAVE = "on-slave" 

270 

271 CONFIG_SM = 0 

272 CONFIG_OTHER = 1 

273 CONFIG_ON_BOOT = 2 

274 CONFIG_ALLOW_CACHING = 3 

275 

276 CONFIG_NAME = { 

277 CONFIG_SM: "sm-config", 

278 CONFIG_OTHER: "other-config", 

279 CONFIG_ON_BOOT: "on-boot", 

280 CONFIG_ALLOW_CACHING: "allow_caching" 

281 } 

282 

283 class LookupError(util.SMException): 

284 pass 

285 

286 def getSession(): 

287 session = XenAPI.xapi_local() 

288 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

289 return session 

290 getSession = staticmethod(getSession) 

291 

292 def __init__(self, session, srUuid): 

293 self.sessionPrivate = False 

294 self.session = session 

295 if self.session is None: 

296 self.session = self.getSession() 

297 self.sessionPrivate = True 

298 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

299 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

300 self.hostUuid = util.get_this_host() 

301 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

302 self.task = None 

303 self.task_progress = {"coalescable": 0, "done": 0} 

304 

305 def __del__(self): 

306 if self.sessionPrivate: 

307 self.session.xenapi.session.logout() 

308 

309 def isPluggedHere(self): 

310 pbds = self.getAttachedPBDs() 

311 for pbdRec in pbds: 

312 if pbdRec["host"] == self._hostRef: 

313 return True 

314 return False 

315 

316 def poolOK(self): 

317 host_recs = self.session.xenapi.host.get_all_records() 

318 for host_ref, host_rec in host_recs.items(): 

319 if not host_rec["enabled"]: 

320 Util.log("Host %s not enabled" % host_rec["uuid"]) 

321 return False 

322 return True 

323 

324 def isMaster(self): 

325 if self.srRecord["shared"]: 

326 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

327 return pool["master"] == self._hostRef 

328 else: 

329 pbds = self.getAttachedPBDs() 

330 if len(pbds) < 1: 

331 raise util.SMException("Local SR not attached") 

332 elif len(pbds) > 1: 

333 raise util.SMException("Local SR multiply attached") 

334 return pbds[0]["host"] == self._hostRef 

335 

336 def getAttachedPBDs(self): 

337 """Return PBD records for all PBDs of this SR that are currently 

338 attached""" 

339 attachedPBDs = [] 

340 pbds = self.session.xenapi.PBD.get_all_records() 

341 for pbdRec in pbds.values(): 

342 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

343 attachedPBDs.append(pbdRec) 

344 return attachedPBDs 

345 

346 def getOnlineHosts(self): 

347 return util.get_online_hosts(self.session) 

348 

349 def ensureInactive(self, hostRef, args): 

350 text = self.session.xenapi.host.call_plugin( \ 

351 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

352 Util.log("call-plugin returned: '%s'" % text) 

353 

354 def getRecordHost(self, hostRef): 

355 return self.session.xenapi.host.get_record(hostRef) 

356 

357 def _getRefVDI(self, uuid): 

358 return self.session.xenapi.VDI.get_by_uuid(uuid) 

359 

360 def getRefVDI(self, vdi): 

361 return self._getRefVDI(vdi.uuid) 

362 

363 def getRecordVDI(self, uuid): 

364 try: 

365 ref = self._getRefVDI(uuid) 

366 return self.session.xenapi.VDI.get_record(ref) 

367 except XenAPI.Failure: 

368 return None 

369 

370 def singleSnapshotVDI(self, vdi): 

371 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

372 {"type": "internal"}) 

373 

374 def forgetVDI(self, srUuid, vdiUuid): 

375 """Forget the VDI, but handle the case where the VDI has already been 

376 forgotten (i.e. ignore errors)""" 

377 try: 

378 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

379 self.session.xenapi.VDI.forget(vdiRef) 

380 except XenAPI.Failure: 

381 pass 

382 

383 def getConfigVDI(self, vdi, key): 

384 kind = vdi.CONFIG_TYPE[key] 

385 if kind == self.CONFIG_SM: 

386 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

387 elif kind == self.CONFIG_OTHER: 

388 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

389 elif kind == self.CONFIG_ON_BOOT: 

390 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

391 elif kind == self.CONFIG_ALLOW_CACHING: 

392 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

393 else: 

394 assert(False) 

395 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

396 return cfg 

397 

398 def removeFromConfigVDI(self, vdi, key): 

399 kind = vdi.CONFIG_TYPE[key] 

400 if kind == self.CONFIG_SM: 

401 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

402 elif kind == self.CONFIG_OTHER: 

403 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

404 else: 

405 assert(False) 

406 

407 def addToConfigVDI(self, vdi, key, val): 

408 kind = vdi.CONFIG_TYPE[key] 

409 if kind == self.CONFIG_SM: 

410 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

411 elif kind == self.CONFIG_OTHER: 

412 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

413 else: 

414 assert(False) 

415 

416 def isSnapshot(self, vdi): 

417 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

418 

419 def markCacheSRsDirty(self): 

420 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

421 'field "local_cache_enabled" = "true"') 

422 for sr_ref in sr_refs: 

423 Util.log("Marking SR %s dirty" % sr_ref) 

424 util.set_dirty(self.session, sr_ref) 

425 

426 def srUpdate(self): 

427 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

428 abortFlag = IPCFlag(self.srRecord["uuid"]) 

429 task = self.session.xenapi.Async.SR.update(self._srRef) 

430 cancelTask = True 

431 try: 

432 for i in range(60): 

433 status = self.session.xenapi.task.get_status(task) 

434 if not status == "pending": 

435 Util.log("SR.update_asynch status changed to [%s]" % status) 

436 cancelTask = False 

437 return 

438 if abortFlag.test(FLAG_TYPE_ABORT): 

439 Util.log("Abort signalled during srUpdate, cancelling task...") 

440 try: 

441 self.session.xenapi.task.cancel(task) 

442 cancelTask = False 

443 Util.log("Task cancelled") 

444 except: 

445 pass 

446 return 

447 time.sleep(1) 

448 finally: 

449 if cancelTask: 

450 self.session.xenapi.task.cancel(task) 

451 self.session.xenapi.task.destroy(task) 

452 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

453 

454 def update_task(self): 

455 self.session.xenapi.task.set_other_config( 

456 self.task, 

457 { 

458 "applies_to": self._srRef 

459 }) 

460 total = self.task_progress['coalescable'] + self.task_progress['done'] 

461 if (total > 0): 

462 self.session.xenapi.task.set_progress( 

463 self.task, float(self.task_progress['done']) / total) 

464 

465 def create_task(self, label, description): 

466 self.task = self.session.xenapi.task.create(label, description) 

467 self.update_task() 

468 

469 def update_task_progress(self, key, value): 

470 self.task_progress[key] = value 

471 if self.task: 

472 self.update_task() 

473 

474 def set_task_status(self, status): 

475 if self.task: 

476 self.session.xenapi.task.set_status(self.task, status) 

477 

478 

479################################################################################ 

480# 

481# VDI 

482# 

483class VDI(object): 

484 """Object representing a VDI of a VHD-based SR""" 

485 

486 POLL_INTERVAL = 1 

487 POLL_TIMEOUT = 30 

488 DEVICE_MAJOR = 202 

489 DRIVER_NAME_VHD = "vhd" 

490 

491 # config keys & values 

492 DB_VHD_PARENT = "vhd-parent" 

493 DB_VDI_TYPE = "vdi_type" 

494 DB_VHD_BLOCKS = "vhd-blocks" 

495 DB_VDI_PAUSED = "paused" 

496 DB_VDI_RELINKING = "relinking" 

497 DB_VDI_ACTIVATING = "activating" 

498 DB_GC = "gc" 

499 DB_COALESCE = "coalesce" 

500 DB_LEAFCLSC = "leaf-coalesce" # config key 

501 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

502 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

503 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

504 # no space to snap-coalesce or unable to keep 

505 # up with VDI. This is not used by the SM, it 

506 # might be used by external components. 

507 DB_ONBOOT = "on-boot" 

508 ONBOOT_RESET = "reset" 

509 DB_ALLOW_CACHING = "allow_caching" 

510 

511 CONFIG_TYPE = { 

512 DB_VHD_PARENT: XAPI.CONFIG_SM, 

513 DB_VDI_TYPE: XAPI.CONFIG_SM, 

514 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

515 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

516 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

517 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

518 DB_GC: XAPI.CONFIG_OTHER, 

519 DB_COALESCE: XAPI.CONFIG_OTHER, 

520 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

521 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

522 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

523 } 

524 

525 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

526 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

527 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

528 # feasibility of leaf coalesce 

529 

530 JRN_RELINK = "relink" # journal entry type for relinking children 

531 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

532 JRN_LEAF = "leaf" # used in coalesce-leaf 

533 

534 STR_TREE_INDENT = 4 

535 

536 def __init__(self, sr, uuid, raw): 

537 self.sr = sr 

538 self.scanError = True 

539 self.uuid = uuid 

540 self.raw = raw 

541 self.fileName = "" 

542 self.parentUuid = "" 

543 self.sizeVirt = -1 

544 self._sizeVHD = -1 

545 self._sizeAllocated = -1 

546 self.hidden = False 

547 self.parent = None 

548 self.children = [] 

549 self._vdiRef = None 

550 self._clearRef() 

551 

552 @staticmethod 

553 def extractUuid(path): 

554 raise NotImplementedError("Implement in sub class") 

555 

556 def load(self, info=None): 

557 """Load VDI info""" 

558 pass # abstract 

559 

560 def getDriverName(self): 

561 return self.DRIVER_NAME_VHD 

562 

563 def getRef(self): 

564 if self._vdiRef is None: 

565 self._vdiRef = self.sr.xapi.getRefVDI(self) 

566 return self._vdiRef 

567 

568 def getConfig(self, key, default=None): 

569 config = self.sr.xapi.getConfigVDI(self, key) 

570 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 570 ↛ 571line 570 didn't jump to line 571, because the condition on line 570 was never true

571 val = config 

572 else: 

573 val = config.get(key) 

574 if val: 

575 return val 

576 return default 

577 

578 def setConfig(self, key, val): 

579 self.sr.xapi.removeFromConfigVDI(self, key) 

580 self.sr.xapi.addToConfigVDI(self, key, val) 

581 Util.log("Set %s = %s for %s" % (key, val, self)) 

582 

583 def delConfig(self, key): 

584 self.sr.xapi.removeFromConfigVDI(self, key) 

585 Util.log("Removed %s from %s" % (key, self)) 

586 

587 def ensureUnpaused(self): 

588 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

589 Util.log("Unpausing VDI %s" % self) 

590 self.unpause() 

591 

592 def pause(self, failfast=False): 

593 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

594 self.uuid, failfast): 

595 raise util.SMException("Failed to pause VDI %s" % self) 

596 

597 def _report_tapdisk_unpause_error(self): 

598 try: 

599 xapi = self.sr.xapi.session.xenapi 

600 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

601 msg_name = "failed to unpause tapdisk" 

602 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

603 "VMs using this tapdisk have lost access " \ 

604 "to the corresponding disk(s)" % self.uuid 

605 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

606 except Exception as e: 

607 util.SMlog("failed to generate message: %s" % e) 

608 

609 def unpause(self): 

610 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

611 self.uuid): 

612 self._report_tapdisk_unpause_error() 

613 raise util.SMException("Failed to unpause VDI %s" % self) 

614 

615 def refresh(self, ignoreNonexistent=True): 

616 """Pause-unpause in one step""" 

617 self.sr.lock() 

618 try: 

619 try: 

620 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 620 ↛ 622line 620 didn't jump to line 622, because the condition on line 620 was never true

621 self.sr.uuid, self.uuid): 

622 self._report_tapdisk_unpause_error() 

623 raise util.SMException("Failed to refresh %s" % self) 

624 except XenAPI.Failure as e: 

625 if util.isInvalidVDI(e) and ignoreNonexistent: 

626 Util.log("VDI %s not found, ignoring" % self) 

627 return 

628 raise 

629 finally: 

630 self.sr.unlock() 

631 

632 def isSnapshot(self): 

633 return self.sr.xapi.isSnapshot(self) 

634 

635 def isAttachedRW(self): 

636 return util.is_attached_rw( 

637 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

638 

639 def getVHDBlocks(self): 

640 val = self.updateBlockInfo() 

641 bitmap = zlib.decompress(base64.b64decode(val)) 

642 return bitmap 

643 

644 def isCoalesceable(self): 

645 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

646 return not self.scanError and \ 

647 self.parent and \ 

648 len(self.parent.children) == 1 and \ 

649 self.hidden and \ 

650 len(self.children) > 0 

651 

652 def isLeafCoalesceable(self): 

653 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

654 return not self.scanError and \ 

655 self.parent and \ 

656 len(self.parent.children) == 1 and \ 

657 not self.hidden and \ 

658 len(self.children) == 0 

659 

660 def canLiveCoalesce(self, speed): 

661 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

662 isLeafCoalesceable() already""" 

663 feasibleSize = False 

664 allowedDownTime = \ 

665 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

666 vhd_size = self.getAllocatedSize() 

667 if speed: 

668 feasibleSize = \ 

669 vhd_size // speed < allowedDownTime 

670 else: 

671 feasibleSize = \ 

672 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

673 

674 return (feasibleSize or 

675 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

676 

677 def getAllPrunable(self): 

678 if len(self.children) == 0: # base case 

679 # it is possible to have a hidden leaf that was recently coalesced 

680 # onto its parent, its children already relinked but not yet 

681 # reloaded - in which case it may not be garbage collected yet: 

682 # some tapdisks could still be using the file. 

683 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

684 return [] 

685 if not self.scanError and self.hidden: 

686 return [self] 

687 return [] 

688 

689 thisPrunable = True 

690 vdiList = [] 

691 for child in self.children: 

692 childList = child.getAllPrunable() 

693 vdiList.extend(childList) 

694 if child not in childList: 

695 thisPrunable = False 

696 

697 # We can destroy the current VDI if all childs are hidden BUT the 

698 # current VDI must be hidden too to do that! 

699 # Example in this case (after a failed live leaf coalesce): 

700 # 

701 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

702 # SMGC: [32436] b5458d61(1.000G/4.127M) 

703 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

704 # 

705 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

706 # Normally we are not in this function when the delete action is 

707 # executed but in `_liveLeafCoalesce`. 

708 

709 if not self.scanError and not self.hidden and thisPrunable: 

710 vdiList.append(self) 

711 return vdiList 

712 

713 def getSizeVHD(self): 

714 return self._sizeVHD 

715 

716 def getAllocatedSize(self): 

717 return self._sizeAllocated 

718 

719 def getTreeRoot(self): 

720 "Get the root of the tree that self belongs to" 

721 root = self 

722 while root.parent: 

723 root = root.parent 

724 return root 

725 

726 def getTreeHeight(self): 

727 "Get the height of the subtree rooted at self" 

728 if len(self.children) == 0: 

729 return 1 

730 

731 maxChildHeight = 0 

732 for child in self.children: 

733 childHeight = child.getTreeHeight() 

734 if childHeight > maxChildHeight: 

735 maxChildHeight = childHeight 

736 

737 return maxChildHeight + 1 

738 

739 def getAllLeaves(self): 

740 "Get all leaf nodes in the subtree rooted at self" 

741 if len(self.children) == 0: 

742 return [self] 

743 

744 leaves = [] 

745 for child in self.children: 

746 leaves.extend(child.getAllLeaves()) 

747 return leaves 

748 

749 def updateBlockInfo(self): 

750 val = base64.b64encode(self._queryVHDBlocks()).decode() 

751 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

752 return val 

753 

754 def rename(self, uuid): 

755 "Rename the VDI file" 

756 assert(not self.sr.vdis.get(uuid)) 

757 self._clearRef() 

758 oldUuid = self.uuid 

759 self.uuid = uuid 

760 self.children = [] 

761 # updating the children themselves is the responsibility of the caller 

762 del self.sr.vdis[oldUuid] 

763 self.sr.vdis[self.uuid] = self 

764 

765 def delete(self): 

766 "Physically delete the VDI" 

767 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

768 lock.Lock.cleanupAll(self.uuid) 

769 self._clear() 

770 

771 def getParent(self): 

772 return vhdutil.getParent(self.path, lambda x: x.strip()) 772 ↛ exitline 772 didn't run the lambda on line 772

773 

774 def repair(self, parent): 

775 vhdutil.repair(parent) 

776 

777 def __str__(self): 

778 strHidden = "" 

779 if self.hidden: 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true

780 strHidden = "*" 

781 strSizeVirt = "?" 

782 if self.sizeVirt > 0: 782 ↛ 783line 782 didn't jump to line 783, because the condition on line 782 was never true

783 strSizeVirt = Util.num2str(self.sizeVirt) 

784 strSizeVHD = "?" 

785 if self._sizeVHD > 0: 785 ↛ 786line 785 didn't jump to line 786, because the condition on line 785 was never true

786 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

787 strSizeAllocated = "?" 

788 if self._sizeAllocated >= 0: 788 ↛ 789line 788 didn't jump to line 789, because the condition on line 788 was never true

789 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

790 strType = "" 

791 if self.raw: 

792 strType = "[RAW]" 

793 strSizeVHD = "" 

794 

795 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

796 strSizeVHD, strSizeAllocated, strType) 

797 

798 def validate(self, fast=False): 

799 if not vhdutil.check(self.path, fast=fast): 799 ↛ 800line 799 didn't jump to line 800, because the condition on line 799 was never true

800 raise util.SMException("VHD %s corrupted" % self) 

801 

802 def _clear(self): 

803 self.uuid = "" 

804 self.path = "" 

805 self.parentUuid = "" 

806 self.parent = None 

807 self._clearRef() 

808 

809 def _clearRef(self): 

810 self._vdiRef = None 

811 

812 def _doCoalesce(self): 

813 """Coalesce self onto parent. Only perform the actual coalescing of 

814 VHD, but not the subsequent relinking. We'll do that as the next step, 

815 after reloading the entire SR in case things have changed while we 

816 were coalescing""" 

817 self.validate() 

818 self.parent.validate(True) 

819 self.parent._increaseSizeVirt(self.sizeVirt) 

820 self.sr._updateSlavesOnResize(self.parent) 

821 self._coalesceVHD(0) 

822 self.parent.validate(True) 

823 #self._verifyContents(0) 

824 self.parent.updateBlockInfo() 

825 

826 def _verifyContents(self, timeOut): 

827 Util.log(" Coalesce verification on %s" % self) 

828 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

829 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

830 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

831 Util.log(" Coalesce verification succeeded") 

832 

833 def _runTapdiskDiff(self): 

834 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

835 (self.getDriverName(), self.path, \ 

836 self.parent.getDriverName(), self.parent.path) 

837 Util.doexec(cmd, 0) 

838 return True 

839 

840 def _reportCoalesceError(vdi, ce): 

841 """Reports a coalesce error to XenCenter. 

842 

843 vdi: the VDI object on which the coalesce error occured 

844 ce: the CommandException that was raised""" 

845 

846 msg_name = os.strerror(ce.code) 

847 if ce.code == errno.ENOSPC: 

848 # TODO We could add more information here, e.g. exactly how much 

849 # space is required for the particular coalesce, as well as actions 

850 # to be taken by the user and consequences of not taking these 

851 # actions. 

852 msg_body = 'Run out of space while coalescing.' 

853 elif ce.code == errno.EIO: 

854 msg_body = 'I/O error while coalescing.' 

855 else: 

856 msg_body = '' 

857 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

858 % (vdi.sr.uuid, msg_name, msg_body)) 

859 

860 # Create a XenCenter message, but don't spam. 

861 xapi = vdi.sr.xapi.session.xenapi 

862 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

863 oth_cfg = xapi.SR.get_other_config(sr_ref) 

864 if COALESCE_ERR_RATE_TAG in oth_cfg: 

865 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

866 else: 

867 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

868 

869 xcmsg = False 

870 if coalesce_err_rate == 0: 

871 xcmsg = True 

872 elif coalesce_err_rate > 0: 

873 now = datetime.datetime.now() 

874 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

875 if COALESCE_LAST_ERR_TAG in sm_cfg: 

876 # seconds per message (minimum distance in time between two 

877 # messages in seconds) 

878 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

879 last = datetime.datetime.fromtimestamp( 

880 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

881 if now - last >= spm: 

882 xapi.SR.remove_from_sm_config(sr_ref, 

883 COALESCE_LAST_ERR_TAG) 

884 xcmsg = True 

885 else: 

886 xcmsg = True 

887 if xcmsg: 

888 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

889 str(now.strftime('%s'))) 

890 if xcmsg: 

891 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

892 _reportCoalesceError = staticmethod(_reportCoalesceError) 

893 

894 def coalesce(self): 

895 # size is returned in sectors 

896 return vhdutil.coalesce(self.path) * 512 

897 

898 def _doCoalesceVHD(vdi): 

899 try: 

900 startTime = time.time() 

901 vhdSize = vdi.getAllocatedSize() 

902 coalesced_size = vdi.coalesce() 

903 endTime = time.time() 

904 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

905 except util.CommandException as ce: 

906 # We use try/except for the following piece of code because it runs 

907 # in a separate process context and errors will not be caught and 

908 # reported by anyone. 

909 try: 

910 # Report coalesce errors back to user via XC 

911 VDI._reportCoalesceError(vdi, ce) 

912 except Exception as e: 

913 util.SMlog('failed to create XenCenter message: %s' % e) 

914 raise ce 

915 except: 

916 raise 

917 _doCoalesceVHD = staticmethod(_doCoalesceVHD) 

918 

919 def _vdi_is_raw(self, vdi_path): 

920 """ 

921 Given path to vdi determine if it is raw 

922 """ 

923 uuid = self.extractUuid(vdi_path) 

924 return self.sr.vdis[uuid].raw 

925 

926 def _coalesceVHD(self, timeOut): 

927 Util.log(" Running VHD coalesce on %s" % self) 

928 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 928 ↛ exitline 928 didn't run the lambda on line 928

929 try: 

930 util.fistpoint.activate_custom_fn( 

931 "cleanup_coalesceVHD_inject_failure", 

932 util.inject_failure) 

933 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

934 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

935 except: 

936 #exception at this phase could indicate a failure in vhd coalesce 

937 # or a kill of vhd coalesce by runAbortable due to timeOut 

938 # Try a repair and reraise the exception 

939 parent = "" 

940 try: 

941 parent = self.getParent() 

942 if not self._vdi_is_raw(parent): 

943 # Repair error is logged and ignored. Error reraised later 

944 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

945 'parent %s' % (self.uuid, parent)) 

946 self.repair(parent) 

947 except Exception as e: 

948 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

949 'after failed coalesce on %s, err: %s' % 

950 (parent, self.path, e)) 

951 raise 

952 

953 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

954 

955 def _relinkSkip(self): 

956 """Relink children of this VDI to point to the parent of this VDI""" 

957 abortFlag = IPCFlag(self.sr.uuid) 

958 for child in self.children: 

959 if abortFlag.test(FLAG_TYPE_ABORT): 959 ↛ 960line 959 didn't jump to line 960, because the condition on line 959 was never true

960 raise AbortException("Aborting due to signal") 

961 Util.log(" Relinking %s from %s to %s" % \ 

962 (child, self, self.parent)) 

963 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

964 child._setParent(self.parent) 

965 self.children = [] 

966 

967 def _reloadChildren(self, vdiSkip): 

968 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

969 the VHD metadata for this file in any online VDI""" 

970 abortFlag = IPCFlag(self.sr.uuid) 

971 for child in self.children: 

972 if child == vdiSkip: 

973 continue 

974 if abortFlag.test(FLAG_TYPE_ABORT): 974 ↛ 975line 974 didn't jump to line 975, because the condition on line 974 was never true

975 raise AbortException("Aborting due to signal") 

976 Util.log(" Reloading VDI %s" % child) 

977 child._reload() 

978 

979 def _reload(self): 

980 """Pause & unpause to cause blktap to reload the VHD metadata""" 

981 for child in self.children: 981 ↛ 982line 981 didn't jump to line 982, because the loop on line 981 never started

982 child._reload() 

983 

984 # only leaves can be attached 

985 if len(self.children) == 0: 985 ↛ exitline 985 didn't return from function '_reload', because the condition on line 985 was never false

986 try: 

987 self.delConfig(VDI.DB_VDI_RELINKING) 

988 except XenAPI.Failure as e: 

989 if not util.isInvalidVDI(e): 

990 raise 

991 self.refresh() 

992 

993 def _tagChildrenForRelink(self): 

994 if len(self.children) == 0: 

995 retries = 0 

996 try: 

997 while retries < 15: 

998 retries += 1 

999 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1000 Util.log("VDI %s is activating, wait to relink" % 

1001 self.uuid) 

1002 else: 

1003 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1004 

1005 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1006 self.delConfig(VDI.DB_VDI_RELINKING) 

1007 Util.log("VDI %s started activating while tagging" % 

1008 self.uuid) 

1009 else: 

1010 return 

1011 time.sleep(2) 

1012 

1013 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1014 except XenAPI.Failure as e: 

1015 if not util.isInvalidVDI(e): 

1016 raise 

1017 

1018 for child in self.children: 

1019 child._tagChildrenForRelink() 

1020 

1021 def _loadInfoParent(self): 

1022 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1023 if ret: 

1024 self.parentUuid = ret 

1025 

1026 def _setParent(self, parent): 

1027 vhdutil.setParent(self.path, parent.path, False) 

1028 self.parent = parent 

1029 self.parentUuid = parent.uuid 

1030 parent.children.append(self) 

1031 try: 

1032 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1033 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1034 (self.uuid, self.parentUuid)) 

1035 except: 

1036 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1037 (self.uuid, self.parentUuid)) 

1038 

1039 def _loadInfoHidden(self): 

1040 hidden = vhdutil.getHidden(self.path) 

1041 self.hidden = (hidden != 0) 

1042 

1043 def _setHidden(self, hidden=True): 

1044 vhdutil.setHidden(self.path, hidden) 

1045 self.hidden = hidden 

1046 

1047 def _increaseSizeVirt(self, size, atomic=True): 

1048 """ensure the virtual size of 'self' is at least 'size'. Note that 

1049 resizing a VHD must always be offline and atomically: the file must 

1050 not be open by anyone and no concurrent operations may take place. 

1051 Thus we use the Agent API call for performing paused atomic 

1052 operations. If the caller is already in the atomic context, it must 

1053 call with atomic = False""" 

1054 if self.sizeVirt >= size: 1054 ↛ 1056line 1054 didn't jump to line 1056, because the condition on line 1054 was never false

1055 return 

1056 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1057 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1058 

1059 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1060 if (size <= msize): 

1061 vhdutil.setSizeVirtFast(self.path, size) 

1062 else: 

1063 if atomic: 

1064 vdiList = self._getAllSubtree() 

1065 self.sr.lock() 

1066 try: 

1067 self.sr.pauseVDIs(vdiList) 

1068 try: 

1069 self._setSizeVirt(size) 

1070 finally: 

1071 self.sr.unpauseVDIs(vdiList) 

1072 finally: 

1073 self.sr.unlock() 

1074 else: 

1075 self._setSizeVirt(size) 

1076 

1077 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1078 

1079 def _setSizeVirt(self, size): 

1080 """WARNING: do not call this method directly unless all VDIs in the 

1081 subtree are guaranteed to be unplugged (and remain so for the duration 

1082 of the operation): this operation is only safe for offline VHDs""" 

1083 jFile = os.path.join(self.sr.path, self.uuid) 

1084 vhdutil.setSizeVirt(self.path, size, jFile) 

1085 

1086 def _queryVHDBlocks(self): 

1087 return vhdutil.getBlockBitmap(self.path) 

1088 

1089 def _getCoalescedSizeData(self): 

1090 """Get the data size of the resulting VHD if we coalesce self onto 

1091 parent. We calculate the actual size by using the VHD block allocation 

1092 information (as opposed to just adding up the two VHD sizes to get an 

1093 upper bound)""" 

1094 # make sure we don't use stale BAT info from vdi_rec since the child 

1095 # was writable all this time 

1096 self.delConfig(VDI.DB_VHD_BLOCKS) 

1097 blocksChild = self.getVHDBlocks() 

1098 blocksParent = self.parent.getVHDBlocks() 

1099 numBlocks = Util.countBits(blocksChild, blocksParent) 

1100 Util.log("Num combined blocks = %d" % numBlocks) 

1101 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1102 assert(sizeData <= self.sizeVirt) 

1103 return sizeData 

1104 

1105 def _calcExtraSpaceForCoalescing(self): 

1106 sizeData = self._getCoalescedSizeData() 

1107 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1108 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1109 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1110 return sizeCoalesced - self.parent.getSizeVHD() 

1111 

1112 def _calcExtraSpaceForLeafCoalescing(self): 

1113 """How much extra space in the SR will be required to 

1114 [live-]leaf-coalesce this VDI""" 

1115 # the space requirements are the same as for inline coalesce 

1116 return self._calcExtraSpaceForCoalescing() 

1117 

1118 def _calcExtraSpaceForSnapshotCoalescing(self): 

1119 """How much extra space in the SR will be required to 

1120 snapshot-coalesce this VDI""" 

1121 return self._calcExtraSpaceForCoalescing() + \ 

1122 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1123 

1124 def _getAllSubtree(self): 

1125 """Get self and all VDIs in the subtree of self as a flat list""" 

1126 vdiList = [self] 

1127 for child in self.children: 

1128 vdiList.extend(child._getAllSubtree()) 

1129 return vdiList 

1130 

1131 

1132class FileVDI(VDI): 

1133 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1134 

1135 @staticmethod 

1136 def extractUuid(path): 

1137 path = os.path.basename(path.strip()) 

1138 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1138 ↛ 1140line 1138 didn't jump to line 1140, because the condition on line 1138 was never true

1139 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1140 return None 

1141 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1142 vhdutil.FILE_EXTN_RAW, "") 

1143 # TODO: validate UUID format 

1144 return uuid 

1145 

1146 def __init__(self, sr, uuid, raw): 

1147 VDI.__init__(self, sr, uuid, raw) 

1148 if self.raw: 1148 ↛ 1149line 1148 didn't jump to line 1149, because the condition on line 1148 was never true

1149 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1150 else: 

1151 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1152 

1153 def load(self, info=None): 

1154 if not info: 

1155 if not util.pathexists(self.path): 

1156 raise util.SMException("%s not found" % self.path) 

1157 try: 

1158 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1159 except util.SMException: 

1160 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1161 return 

1162 self.parent = None 

1163 self.children = [] 

1164 self.parentUuid = info.parentUuid 

1165 self.sizeVirt = info.sizeVirt 

1166 self._sizeVHD = info.sizePhys 

1167 self._sizeAllocated = info.sizeAllocated 

1168 self.hidden = info.hidden 

1169 self.scanError = False 

1170 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1171 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1172 

1173 def rename(self, uuid): 

1174 oldPath = self.path 

1175 VDI.rename(self, uuid) 

1176 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1177 self.path = os.path.join(self.sr.path, self.fileName) 

1178 assert(not util.pathexists(self.path)) 

1179 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1180 os.rename(oldPath, self.path) 

1181 

1182 def delete(self): 

1183 if len(self.children) > 0: 1183 ↛ 1184line 1183 didn't jump to line 1184, because the condition on line 1183 was never true

1184 raise util.SMException("VDI %s has children, can't delete" % \ 

1185 self.uuid) 

1186 try: 

1187 self.sr.lock() 

1188 try: 

1189 os.unlink(self.path) 

1190 self.sr.forgetVDI(self.uuid) 

1191 finally: 

1192 self.sr.unlock() 

1193 except OSError: 

1194 raise util.SMException("os.unlink(%s) failed" % self.path) 

1195 VDI.delete(self) 

1196 

1197 

1198class LVHDVDI(VDI): 

1199 """Object representing a VDI in an LVHD SR""" 

1200 

1201 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1202 DRIVER_NAME_RAW = "aio" 

1203 

1204 def load(self, vdiInfo): 

1205 self.parent = None 

1206 self.children = [] 

1207 self._sizeVHD = -1 

1208 self._sizeAllocated = -1 

1209 self.scanError = vdiInfo.scanError 

1210 self.sizeLV = vdiInfo.sizeLV 

1211 self.sizeVirt = vdiInfo.sizeVirt 

1212 self.fileName = vdiInfo.lvName 

1213 self.lvActive = vdiInfo.lvActive 

1214 self.lvOpen = vdiInfo.lvOpen 

1215 self.lvReadonly = vdiInfo.lvReadonly 

1216 self.hidden = vdiInfo.hidden 

1217 self.parentUuid = vdiInfo.parentUuid 

1218 self.path = os.path.join(self.sr.path, self.fileName) 

1219 

1220 @staticmethod 

1221 def extractUuid(path): 

1222 return lvhdutil.extractUuid(path) 

1223 

1224 def getDriverName(self): 

1225 if self.raw: 

1226 return self.DRIVER_NAME_RAW 

1227 return self.DRIVER_NAME_VHD 

1228 

1229 def inflate(self, size): 

1230 """inflate the LV containing the VHD to 'size'""" 

1231 if self.raw: 

1232 return 

1233 self._activate() 

1234 self.sr.lock() 

1235 try: 

1236 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1237 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1238 finally: 

1239 self.sr.unlock() 

1240 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1241 self._sizeVHD = -1 

1242 self._sizeAllocated = -1 

1243 

1244 def deflate(self): 

1245 """deflate the LV containing the VHD to minimum""" 

1246 if self.raw: 

1247 return 

1248 self._activate() 

1249 self.sr.lock() 

1250 try: 

1251 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1252 finally: 

1253 self.sr.unlock() 

1254 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1255 self._sizeVHD = -1 

1256 self._sizeAllocated = -1 

1257 

1258 def inflateFully(self): 

1259 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1260 

1261 def inflateParentForCoalesce(self): 

1262 """Inflate the parent only as much as needed for the purposes of 

1263 coalescing""" 

1264 if self.parent.raw: 

1265 return 

1266 inc = self._calcExtraSpaceForCoalescing() 

1267 if inc > 0: 

1268 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1269 self.parent.inflate(self.parent.sizeLV + inc) 

1270 

1271 def updateBlockInfo(self): 

1272 if not self.raw: 

1273 return VDI.updateBlockInfo(self) 

1274 

1275 def rename(self, uuid): 

1276 oldUuid = self.uuid 

1277 oldLVName = self.fileName 

1278 VDI.rename(self, uuid) 

1279 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1280 if self.raw: 

1281 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1282 self.path = os.path.join(self.sr.path, self.fileName) 

1283 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1284 

1285 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1286 if self.sr.lvActivator.get(oldUuid, False): 

1287 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1288 

1289 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1290 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1291 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1292 RefCounter.reset(oldUuid, ns) 

1293 

1294 def delete(self): 

1295 if len(self.children) > 0: 

1296 raise util.SMException("VDI %s has children, can't delete" % \ 

1297 self.uuid) 

1298 self.sr.lock() 

1299 try: 

1300 self.sr.lvmCache.remove(self.fileName) 

1301 self.sr.forgetVDI(self.uuid) 

1302 finally: 

1303 self.sr.unlock() 

1304 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1305 VDI.delete(self) 

1306 

1307 def getSizeVHD(self): 

1308 if self._sizeVHD == -1: 

1309 self._loadInfoSizeVHD() 

1310 return self._sizeVHD 

1311 

1312 def _loadInfoSizeVHD(self): 

1313 """Get the physical utilization of the VHD file. We do it individually 

1314 (and not using the VHD batch scanner) as an optimization: this info is 

1315 relatively expensive and we need it only for VDI's involved in 

1316 coalescing.""" 

1317 if self.raw: 

1318 return 

1319 self._activate() 

1320 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1321 if self._sizeVHD <= 0: 

1322 raise util.SMException("phys size of %s = %d" % \ 

1323 (self, self._sizeVHD)) 

1324 

1325 def getAllocatedSize(self): 

1326 if self._sizeAllocated == -1: 

1327 self._loadInfoSizeAllocated() 

1328 return self._sizeAllocated 

1329 

1330 def _loadInfoSizeAllocated(self): 

1331 """ 

1332 Get the allocated size of the VHD volume. 

1333 """ 

1334 if self.raw: 

1335 return 

1336 self._activate() 

1337 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1338 

1339 def _loadInfoHidden(self): 

1340 if self.raw: 

1341 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1342 else: 

1343 VDI._loadInfoHidden(self) 

1344 

1345 def _setHidden(self, hidden=True): 

1346 if self.raw: 

1347 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1348 self.hidden = hidden 

1349 else: 

1350 VDI._setHidden(self, hidden) 

1351 

1352 def __str__(self): 

1353 strType = "VHD" 

1354 if self.raw: 

1355 strType = "RAW" 

1356 strHidden = "" 

1357 if self.hidden: 

1358 strHidden = "*" 

1359 strSizeVHD = "" 

1360 if self._sizeVHD > 0: 

1361 strSizeVHD = Util.num2str(self._sizeVHD) 

1362 strSizeAllocated = "" 

1363 if self._sizeAllocated >= 0: 

1364 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1365 strActive = "n" 

1366 if self.lvActive: 

1367 strActive = "a" 

1368 if self.lvOpen: 

1369 strActive += "o" 

1370 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1371 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1372 Util.num2str(self.sizeLV), strActive) 

1373 

1374 def validate(self, fast=False): 

1375 if not self.raw: 

1376 VDI.validate(self, fast) 

1377 

1378 def _doCoalesce(self): 

1379 """LVHD parents must first be activated, inflated, and made writable""" 

1380 try: 

1381 self._activateChain() 

1382 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1383 self.parent.validate() 

1384 self.inflateParentForCoalesce() 

1385 VDI._doCoalesce(self) 

1386 finally: 

1387 self.parent._loadInfoSizeVHD() 

1388 self.parent.deflate() 

1389 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1390 

1391 def _setParent(self, parent): 

1392 self._activate() 

1393 if self.lvReadonly: 

1394 self.sr.lvmCache.setReadonly(self.fileName, False) 

1395 

1396 try: 

1397 vhdutil.setParent(self.path, parent.path, parent.raw) 

1398 finally: 

1399 if self.lvReadonly: 

1400 self.sr.lvmCache.setReadonly(self.fileName, True) 

1401 self._deactivate() 

1402 self.parent = parent 

1403 self.parentUuid = parent.uuid 

1404 parent.children.append(self) 

1405 try: 

1406 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1407 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1408 (self.uuid, self.parentUuid)) 

1409 except: 

1410 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1411 (self.parentUuid, self.uuid)) 

1412 

1413 def _activate(self): 

1414 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1415 

1416 def _activateChain(self): 

1417 vdi = self 

1418 while vdi: 

1419 vdi._activate() 

1420 vdi = vdi.parent 

1421 

1422 def _deactivate(self): 

1423 self.sr.lvActivator.deactivate(self.uuid, False) 

1424 

1425 def _increaseSizeVirt(self, size, atomic=True): 

1426 "ensure the virtual size of 'self' is at least 'size'" 

1427 self._activate() 

1428 if not self.raw: 

1429 VDI._increaseSizeVirt(self, size, atomic) 

1430 return 

1431 

1432 # raw VDI case 

1433 offset = self.sizeLV 

1434 if self.sizeVirt < size: 

1435 oldSize = self.sizeLV 

1436 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1437 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1438 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1439 offset = oldSize 

1440 unfinishedZero = False 

1441 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1442 if jval: 

1443 unfinishedZero = True 

1444 offset = int(jval) 

1445 length = self.sizeLV - offset 

1446 if not length: 

1447 return 

1448 

1449 if unfinishedZero: 

1450 Util.log(" ==> Redoing unfinished zeroing out") 

1451 else: 

1452 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1453 str(offset)) 

1454 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1455 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1456 func = lambda: util.zeroOut(self.path, offset, length) 

1457 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1458 VDI.POLL_INTERVAL, 0) 

1459 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1460 

1461 def _setSizeVirt(self, size): 

1462 """WARNING: do not call this method directly unless all VDIs in the 

1463 subtree are guaranteed to be unplugged (and remain so for the duration 

1464 of the operation): this operation is only safe for offline VHDs""" 

1465 self._activate() 

1466 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1467 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1468 try: 

1469 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1470 size, jFile) 

1471 finally: 

1472 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1473 

1474 def _queryVHDBlocks(self): 

1475 self._activate() 

1476 return VDI._queryVHDBlocks(self) 

1477 

1478 def _calcExtraSpaceForCoalescing(self): 

1479 if self.parent.raw: 

1480 return 0 # raw parents are never deflated in the first place 

1481 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1482 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1483 return sizeCoalesced - self.parent.sizeLV 

1484 

1485 def _calcExtraSpaceForLeafCoalescing(self): 

1486 """How much extra space in the SR will be required to 

1487 [live-]leaf-coalesce this VDI""" 

1488 # we can deflate the leaf to minimize the space requirements 

1489 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1490 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1491 

1492 def _calcExtraSpaceForSnapshotCoalescing(self): 

1493 return self._calcExtraSpaceForCoalescing() + \ 

1494 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1495 

1496 

1497class LinstorVDI(VDI): 

1498 """Object representing a VDI in a LINSTOR SR""" 

1499 

1500 VOLUME_LOCK_TIMEOUT = 30 

1501 

1502 def load(self, info=None): 

1503 self.parentUuid = info.parentUuid 

1504 self.scanError = True 

1505 self.parent = None 

1506 self.children = [] 

1507 

1508 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1509 self.path = self.sr._linstor.build_device_path(self.fileName) 

1510 

1511 if not info: 

1512 try: 

1513 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1514 except util.SMException: 

1515 Util.log( 

1516 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1517 ) 

1518 return 

1519 

1520 self.parentUuid = info.parentUuid 

1521 self.sizeVirt = info.sizeVirt 

1522 self._sizeVHD = -1 

1523 self._sizeAllocated = -1 

1524 self.drbd_size = -1 

1525 self.hidden = info.hidden 

1526 self.scanError = False 

1527 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1528 

1529 def getSizeVHD(self, fetch=False): 

1530 if self._sizeVHD < 0 or fetch: 

1531 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1532 return self._sizeVHD 

1533 

1534 def getDrbdSize(self, fetch=False): 

1535 if self.drbd_size < 0 or fetch: 

1536 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1537 return self.drbd_size 

1538 

1539 def getAllocatedSize(self): 

1540 if self._sizeAllocated == -1: 

1541 if not self.raw: 

1542 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1543 return self._sizeAllocated 

1544 

1545 def inflate(self, size): 

1546 if self.raw: 

1547 return 

1548 self.sr.lock() 

1549 try: 

1550 # Ensure we use the real DRBD size and not the cached one. 

1551 # Why? Because this attribute can be changed if volume is resized by user. 

1552 self.drbd_size = self.getDrbdSize(fetch=True) 

1553 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1554 finally: 

1555 self.sr.unlock() 

1556 self.drbd_size = -1 

1557 self._sizeVHD = -1 

1558 self._sizeAllocated = -1 

1559 

1560 def deflate(self): 

1561 if self.raw: 

1562 return 

1563 self.sr.lock() 

1564 try: 

1565 # Ensure we use the real sizes and not the cached info. 

1566 self.drbd_size = self.getDrbdSize(fetch=True) 

1567 self._sizeVHD = self.getSizeVHD(fetch=True) 

1568 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1569 finally: 

1570 self.sr.unlock() 

1571 self.drbd_size = -1 

1572 self._sizeVHD = -1 

1573 self._sizeAllocated = -1 

1574 

1575 def inflateFully(self): 

1576 if not self.raw: 

1577 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1578 

1579 def rename(self, uuid): 

1580 Util.log('Renaming {} -> {} (path={})'.format( 

1581 self.uuid, uuid, self.path 

1582 )) 

1583 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1584 VDI.rename(self, uuid) 

1585 

1586 def delete(self): 

1587 if len(self.children) > 0: 

1588 raise util.SMException( 

1589 'VDI {} has children, can\'t delete'.format(self.uuid) 

1590 ) 

1591 self.sr.lock() 

1592 try: 

1593 self.sr._linstor.destroy_volume(self.uuid) 

1594 self.sr.forgetVDI(self.uuid) 

1595 finally: 

1596 self.sr.unlock() 

1597 VDI.delete(self) 

1598 

1599 def validate(self, fast=False): 

1600 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1601 raise util.SMException('VHD {} corrupted'.format(self)) 

1602 

1603 def pause(self, failfast=False): 

1604 self.sr._linstor.ensure_volume_is_not_locked( 

1605 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1606 ) 

1607 return super(LinstorVDI, self).pause(failfast) 

1608 

1609 def coalesce(self): 

1610 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1611 # Using another exception we can't execute the next coalesce calls. 

1612 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1613 

1614 def getParent(self): 

1615 return self.sr._vhdutil.get_parent( 

1616 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1617 ) 

1618 

1619 def repair(self, parent_uuid): 

1620 self.sr._vhdutil.force_repair( 

1621 self.sr._linstor.get_device_path(parent_uuid) 

1622 ) 

1623 

1624 def _relinkSkip(self): 

1625 abortFlag = IPCFlag(self.sr.uuid) 

1626 for child in self.children: 

1627 if abortFlag.test(FLAG_TYPE_ABORT): 

1628 raise AbortException('Aborting due to signal') 

1629 Util.log( 

1630 ' Relinking {} from {} to {}'.format( 

1631 child, self, self.parent 

1632 ) 

1633 ) 

1634 

1635 session = child.sr.xapi.session 

1636 sr_uuid = child.sr.uuid 

1637 vdi_uuid = child.uuid 

1638 try: 

1639 self.sr._linstor.ensure_volume_is_not_locked( 

1640 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1641 ) 

1642 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1643 child._setParent(self.parent) 

1644 finally: 

1645 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1646 self.children = [] 

1647 

1648 def _setParent(self, parent): 

1649 self.sr._linstor.get_device_path(self.uuid) 

1650 self.sr._vhdutil.force_parent(self.path, parent.path) 

1651 self.parent = parent 

1652 self.parentUuid = parent.uuid 

1653 parent.children.append(self) 

1654 try: 

1655 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1656 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1657 (self.uuid, self.parentUuid)) 

1658 except: 

1659 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1660 (self.uuid, self.parentUuid)) 

1661 

1662 def _doCoalesce(self): 

1663 try: 

1664 self._activateChain() 

1665 self.parent.validate() 

1666 self._inflateParentForCoalesce() 

1667 VDI._doCoalesce(self) 

1668 finally: 

1669 self.parent.deflate() 

1670 

1671 def _activateChain(self): 

1672 vdi = self 

1673 while vdi: 

1674 try: 

1675 p = self.sr._linstor.get_device_path(vdi.uuid) 

1676 except Exception as e: 

1677 # Use SMException to skip coalesce. 

1678 # Otherwise the GC is stopped... 

1679 raise util.SMException(str(e)) 

1680 vdi = vdi.parent 

1681 

1682 def _setHidden(self, hidden=True): 

1683 HIDDEN_TAG = 'hidden' 

1684 

1685 if self.raw: 

1686 self.sr._linstor.update_volume_metadata(self.uuid, { 

1687 HIDDEN_TAG: hidden 

1688 }) 

1689 self.hidden = hidden 

1690 else: 

1691 VDI._setHidden(self, hidden) 

1692 

1693 def _setSizeVirt(self, size): 

1694 jfile = self.uuid + '-jvhd' 

1695 self.sr._linstor.create_volume( 

1696 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1697 ) 

1698 try: 

1699 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1700 self.sr._vhdutil.set_size_virt(size, jfile) 

1701 finally: 

1702 try: 

1703 self.sr._linstor.destroy_volume(jfile) 

1704 except Exception: 

1705 # We can ignore it, in any case this volume is not persistent. 

1706 pass 

1707 

1708 def _queryVHDBlocks(self): 

1709 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1710 

1711 def _inflateParentForCoalesce(self): 

1712 if self.parent.raw: 

1713 return 

1714 inc = self._calcExtraSpaceForCoalescing() 

1715 if inc > 0: 

1716 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1717 

1718 def _calcExtraSpaceForCoalescing(self): 

1719 if self.parent.raw: 

1720 return 0 

1721 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1722 self._getCoalescedSizeData(), self.vdi_type 

1723 ) 

1724 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1725 return size_coalesced - self.parent.getDrbdSize() 

1726 

1727 def _calcExtraSpaceForLeafCoalescing(self): 

1728 assert self.getDrbdSize() > 0 

1729 assert self.getSizeVHD() > 0 

1730 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1731 assert deflate_diff >= 0 

1732 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1733 

1734 def _calcExtraSpaceForSnapshotCoalescing(self): 

1735 assert self.getSizeVHD() > 0 

1736 return self._calcExtraSpaceForCoalescing() + \ 

1737 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1738 

1739################################################################################ 

1740# 

1741# SR 

1742# 

1743class SR(object): 

1744 class LogFilter: 

1745 def __init__(self, sr): 

1746 self.sr = sr 

1747 self.stateLogged = False 

1748 self.prevState = {} 

1749 self.currState = {} 

1750 

1751 def logState(self): 

1752 changes = "" 

1753 self.currState.clear() 

1754 for vdi in self.sr.vdiTrees: 

1755 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1756 if not self.prevState.get(vdi.uuid) or \ 

1757 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1758 changes += self.currState[vdi.uuid] 

1759 

1760 for uuid in self.prevState: 

1761 if not self.currState.get(uuid): 

1762 changes += "Tree %s gone\n" % uuid 

1763 

1764 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1765 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1766 

1767 if len(changes) > 0: 

1768 if self.stateLogged: 

1769 result += "showing only VHD trees that changed:" 

1770 result += "\n%s" % changes 

1771 else: 

1772 result += "no changes" 

1773 

1774 for line in result.split("\n"): 

1775 Util.log("%s" % line) 

1776 self.prevState.clear() 

1777 for key, val in self.currState.items(): 

1778 self.prevState[key] = val 

1779 self.stateLogged = True 

1780 

1781 def logNewVDI(self, uuid): 

1782 if self.stateLogged: 

1783 Util.log("Found new VDI when scanning: %s" % uuid) 

1784 

1785 def _getTreeStr(self, vdi, indent=8): 

1786 treeStr = "%s%s\n" % (" " * indent, vdi) 

1787 for child in vdi.children: 

1788 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1789 return treeStr 

1790 

1791 TYPE_FILE = "file" 

1792 TYPE_LVHD = "lvhd" 

1793 TYPE_LINSTOR = "linstor" 

1794 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1795 

1796 LOCK_RETRY_INTERVAL = 3 

1797 LOCK_RETRY_ATTEMPTS = 20 

1798 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1799 

1800 SCAN_RETRY_ATTEMPTS = 3 

1801 

1802 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1803 TMP_RENAME_PREFIX = "OLD_" 

1804 

1805 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1806 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1807 

1808 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1809 xapi = XAPI(xapiSession, uuid) 

1810 type = normalizeType(xapi.srRecord["type"]) 

1811 if type == SR.TYPE_FILE: 

1812 return FileSR(uuid, xapi, createLock, force) 

1813 elif type == SR.TYPE_LVHD: 

1814 return LVHDSR(uuid, xapi, createLock, force) 

1815 elif type == SR.TYPE_LINSTOR: 

1816 return LinstorSR(uuid, xapi, createLock, force) 

1817 raise util.SMException("SR type %s not recognized" % type) 

1818 getInstance = staticmethod(getInstance) 

1819 

1820 def __init__(self, uuid, xapi, createLock, force): 

1821 self.logFilter = self.LogFilter(self) 

1822 self.uuid = uuid 

1823 self.path = "" 

1824 self.name = "" 

1825 self.vdis = {} 

1826 self.vdiTrees = [] 

1827 self.journaler = None 

1828 self.xapi = xapi 

1829 self._locked = 0 

1830 self._srLock = None 

1831 if createLock: 1831 ↛ 1832line 1831 didn't jump to line 1832, because the condition on line 1831 was never true

1832 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1833 else: 

1834 Util.log("Requested no SR locking") 

1835 self.name = self.xapi.srRecord["name_label"] 

1836 self._failedCoalesceTargets = [] 

1837 

1838 if not self.xapi.isPluggedHere(): 

1839 if force: 1839 ↛ 1840line 1839 didn't jump to line 1840, because the condition on line 1839 was never true

1840 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1841 else: 

1842 if not self.wait_for_plug(): 

1843 raise util.SMException("SR %s not attached on this host" % uuid) 

1844 

1845 if force: 1845 ↛ 1846line 1845 didn't jump to line 1846, because the condition on line 1845 was never true

1846 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1847 elif not self.xapi.isMaster(): 1847 ↛ 1848line 1847 didn't jump to line 1848, because the condition on line 1847 was never true

1848 raise util.SMException("This host is NOT master, will not run") 

1849 

1850 def wait_for_plug(self): 

1851 for _ in range(1, 10): 

1852 time.sleep(2) 

1853 if self.xapi.isPluggedHere(): 

1854 return True 

1855 return False 

1856 

1857 def gcEnabled(self, refresh=True): 

1858 if refresh: 

1859 self.xapi.srRecord = \ 

1860 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1861 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1862 Util.log("GC is disabled for this SR, abort") 

1863 return False 

1864 return True 

1865 

1866 def scan(self, force=False): 

1867 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1868 update VDI objects if they already exist""" 

1869 pass # abstract 

1870 

1871 def scanLocked(self, force=False): 

1872 self.lock() 

1873 try: 

1874 self.scan(force) 

1875 finally: 

1876 self.unlock() 

1877 

1878 def getVDI(self, uuid): 

1879 return self.vdis.get(uuid) 

1880 

1881 def hasWork(self): 

1882 if len(self.findGarbage()) > 0: 

1883 return True 

1884 if self.findCoalesceable(): 

1885 return True 

1886 if self.findLeafCoalesceable(): 

1887 return True 

1888 if self.needUpdateBlockInfo(): 

1889 return True 

1890 return False 

1891 

1892 def findCoalesceable(self): 

1893 """Find a coalesceable VDI. Return a vdi that should be coalesced 

1894 (choosing one among all coalesceable candidates according to some 

1895 criteria) or None if there is no VDI that could be coalesced""" 

1896 

1897 candidates = [] 

1898 

1899 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

1900 if srSwitch == "false": 

1901 Util.log("Coalesce disabled for this SR") 

1902 return candidates 

1903 

1904 # finish any VDI for which a relink journal entry exists first 

1905 journals = self.journaler.getAll(VDI.JRN_RELINK) 

1906 for uuid in journals: 

1907 vdi = self.getVDI(uuid) 

1908 if vdi and vdi not in self._failedCoalesceTargets: 

1909 return vdi 

1910 

1911 for vdi in self.vdis.values(): 

1912 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

1913 candidates.append(vdi) 

1914 Util.log("%s is coalescable" % vdi.uuid) 

1915 

1916 self.xapi.update_task_progress("coalescable", len(candidates)) 

1917 

1918 # pick one in the tallest tree 

1919 treeHeight = dict() 

1920 for c in candidates: 

1921 height = c.getTreeRoot().getTreeHeight() 

1922 if treeHeight.get(height): 

1923 treeHeight[height].append(c) 

1924 else: 

1925 treeHeight[height] = [c] 

1926 

1927 freeSpace = self.getFreeSpace() 

1928 heights = list(treeHeight.keys()) 

1929 heights.sort(reverse=True) 

1930 for h in heights: 

1931 for c in treeHeight[h]: 

1932 spaceNeeded = c._calcExtraSpaceForCoalescing() 

1933 if spaceNeeded <= freeSpace: 

1934 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

1935 return c 

1936 else: 

1937 Util.log("No space to coalesce %s (free space: %d)" % \ 

1938 (c, freeSpace)) 

1939 return None 

1940 

1941 def getSwitch(self, key): 

1942 return self.xapi.srRecord["other_config"].get(key) 

1943 

1944 def forbiddenBySwitch(self, switch, condition, fail_msg): 

1945 srSwitch = self.getSwitch(switch) 

1946 ret = False 

1947 if srSwitch: 

1948 ret = srSwitch == condition 

1949 

1950 if ret: 

1951 Util.log(fail_msg) 

1952 

1953 return ret 

1954 

1955 def leafCoalesceForbidden(self): 

1956 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

1957 "false", 

1958 "Coalesce disabled for this SR") or 

1959 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

1960 VDI.LEAFCLSC_DISABLED, 

1961 "Leaf-coalesce disabled for this SR")) 

1962 

1963 def findLeafCoalesceable(self): 

1964 """Find leaf-coalesceable VDIs in each VHD tree""" 

1965 

1966 candidates = [] 

1967 if self.leafCoalesceForbidden(): 

1968 return candidates 

1969 

1970 self.gatherLeafCoalesceable(candidates) 

1971 

1972 self.xapi.update_task_progress("coalescable", len(candidates)) 

1973 

1974 freeSpace = self.getFreeSpace() 

1975 for candidate in candidates: 

1976 # check the space constraints to see if leaf-coalesce is actually 

1977 # feasible for this candidate 

1978 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

1979 spaceNeededLive = spaceNeeded 

1980 if spaceNeeded > freeSpace: 

1981 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

1982 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

1983 spaceNeeded = spaceNeededLive 

1984 

1985 if spaceNeeded <= freeSpace: 

1986 Util.log("Leaf-coalesce candidate: %s" % candidate) 

1987 return candidate 

1988 else: 

1989 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

1990 (candidate, freeSpace)) 

1991 if spaceNeededLive <= freeSpace: 

1992 Util.log("...but enough space if skip snap-coalesce") 

1993 candidate.setConfig(VDI.DB_LEAFCLSC, 

1994 VDI.LEAFCLSC_OFFLINE) 

1995 

1996 return None 

1997 

1998 def gatherLeafCoalesceable(self, candidates): 

1999 for vdi in self.vdis.values(): 

2000 if not vdi.isLeafCoalesceable(): 

2001 continue 

2002 if vdi in self._failedCoalesceTargets: 

2003 continue 

2004 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2005 Util.log("Skipping reset-on-boot %s" % vdi) 

2006 continue 

2007 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2008 Util.log("Skipping allow_caching=true %s" % vdi) 

2009 continue 

2010 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2011 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2012 continue 

2013 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2014 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2015 continue 

2016 candidates.append(vdi) 

2017 

2018 def coalesce(self, vdi, dryRun=False): 

2019 """Coalesce vdi onto parent""" 

2020 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2021 if dryRun: 2021 ↛ 2022line 2021 didn't jump to line 2022, because the condition on line 2021 was never true

2022 return 

2023 

2024 try: 

2025 self._coalesce(vdi) 

2026 except util.SMException as e: 

2027 if isinstance(e, AbortException): 2027 ↛ 2028line 2027 didn't jump to line 2028, because the condition on line 2027 was never true

2028 self.cleanup() 

2029 raise 

2030 else: 

2031 self._failedCoalesceTargets.append(vdi) 

2032 Util.logException("coalesce") 

2033 Util.log("Coalesce failed, skipping") 

2034 self.cleanup() 

2035 

2036 def coalesceLeaf(self, vdi, dryRun=False): 

2037 """Leaf-coalesce vdi onto parent""" 

2038 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2039 if dryRun: 

2040 return 

2041 

2042 try: 

2043 uuid = vdi.uuid 

2044 try: 

2045 # "vdi" object will no longer be valid after this call 

2046 self._coalesceLeaf(vdi) 

2047 finally: 

2048 vdi = self.getVDI(uuid) 

2049 if vdi: 

2050 vdi.delConfig(vdi.DB_LEAFCLSC) 

2051 except AbortException: 

2052 self.cleanup() 

2053 raise 

2054 except (util.SMException, XenAPI.Failure) as e: 

2055 self._failedCoalesceTargets.append(vdi) 

2056 Util.logException("leaf-coalesce") 

2057 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2058 self.cleanup() 

2059 

2060 def garbageCollect(self, dryRun=False): 

2061 vdiList = self.findGarbage() 

2062 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2063 for vdi in vdiList: 

2064 Util.log(" %s" % vdi) 

2065 if not dryRun: 

2066 self.deleteVDIs(vdiList) 

2067 self.cleanupJournals(dryRun) 

2068 

2069 def findGarbage(self): 

2070 vdiList = [] 

2071 for vdi in self.vdiTrees: 

2072 vdiList.extend(vdi.getAllPrunable()) 

2073 return vdiList 

2074 

2075 def deleteVDIs(self, vdiList): 

2076 for vdi in vdiList: 

2077 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2078 raise AbortException("Aborting due to signal") 

2079 Util.log("Deleting unlinked VDI %s" % vdi) 

2080 self.deleteVDI(vdi) 

2081 

2082 def deleteVDI(self, vdi): 

2083 assert(len(vdi.children) == 0) 

2084 del self.vdis[vdi.uuid] 

2085 if vdi.parent: 2085 ↛ 2087line 2085 didn't jump to line 2087, because the condition on line 2085 was never false

2086 vdi.parent.children.remove(vdi) 

2087 if vdi in self.vdiTrees: 2087 ↛ 2088line 2087 didn't jump to line 2088, because the condition on line 2087 was never true

2088 self.vdiTrees.remove(vdi) 

2089 vdi.delete() 

2090 

2091 def forgetVDI(self, vdiUuid): 

2092 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2093 

2094 def pauseVDIs(self, vdiList): 

2095 paused = [] 

2096 failed = False 

2097 for vdi in vdiList: 

2098 try: 

2099 vdi.pause() 

2100 paused.append(vdi) 

2101 except: 

2102 Util.logException("pauseVDIs") 

2103 failed = True 

2104 break 

2105 

2106 if failed: 

2107 self.unpauseVDIs(paused) 

2108 raise util.SMException("Failed to pause VDIs") 

2109 

2110 def unpauseVDIs(self, vdiList): 

2111 failed = False 

2112 for vdi in vdiList: 

2113 try: 

2114 vdi.unpause() 

2115 except: 

2116 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2117 failed = True 

2118 if failed: 

2119 raise util.SMException("Failed to unpause VDIs") 

2120 

2121 def getFreeSpace(self): 

2122 return 0 

2123 

2124 def cleanup(self): 

2125 Util.log("In cleanup") 

2126 return 

2127 

2128 def __str__(self): 

2129 if self.name: 

2130 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2131 else: 

2132 ret = "%s" % self.uuid 

2133 return ret 

2134 

2135 def lock(self): 

2136 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2137 signal to avoid deadlocking (trying to acquire the SR lock while the 

2138 lock is held by a process that is trying to abort us)""" 

2139 if not self._srLock: 

2140 return 

2141 

2142 if self._locked == 0: 

2143 abortFlag = IPCFlag(self.uuid) 

2144 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2145 if self._srLock.acquireNoblock(): 

2146 self._locked += 1 

2147 return 

2148 if abortFlag.test(FLAG_TYPE_ABORT): 

2149 raise AbortException("Abort requested") 

2150 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2151 raise util.SMException("Unable to acquire the SR lock") 

2152 

2153 self._locked += 1 

2154 

2155 def unlock(self): 

2156 if not self._srLock: 2156 ↛ 2158line 2156 didn't jump to line 2158, because the condition on line 2156 was never false

2157 return 

2158 assert(self._locked > 0) 

2159 self._locked -= 1 

2160 if self._locked == 0: 

2161 self._srLock.release() 

2162 

2163 def needUpdateBlockInfo(self): 

2164 for vdi in self.vdis.values(): 

2165 if vdi.scanError or len(vdi.children) == 0: 

2166 continue 

2167 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2168 return True 

2169 return False 

2170 

2171 def updateBlockInfo(self): 

2172 for vdi in self.vdis.values(): 

2173 if vdi.scanError or len(vdi.children) == 0: 

2174 continue 

2175 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2176 vdi.updateBlockInfo() 

2177 

2178 def cleanupCoalesceJournals(self): 

2179 """Remove stale coalesce VDI indicators""" 

2180 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2181 for uuid, jval in entries.items(): 

2182 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2183 

2184 def cleanupJournals(self, dryRun=False): 

2185 """delete journal entries for non-existing VDIs""" 

2186 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2187 entries = self.journaler.getAll(t) 

2188 for uuid, jval in entries.items(): 

2189 if self.getVDI(uuid): 

2190 continue 

2191 if t == SR.JRN_CLONE: 

2192 baseUuid, clonUuid = jval.split("_") 

2193 if self.getVDI(baseUuid): 

2194 continue 

2195 Util.log(" Deleting stale '%s' journal entry for %s " 

2196 "(%s)" % (t, uuid, jval)) 

2197 if not dryRun: 

2198 self.journaler.remove(t, uuid) 

2199 

2200 def cleanupCache(self, maxAge=-1): 

2201 return 0 

2202 

2203 def _coalesce(self, vdi): 

2204 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2204 ↛ 2207line 2204 didn't jump to line 2207, because the condition on line 2204 was never true

2205 # this means we had done the actual coalescing already and just 

2206 # need to finish relinking and/or refreshing the children 

2207 Util.log("==> Coalesce apparently already done: skipping") 

2208 else: 

2209 # JRN_COALESCE is used to check which VDI is being coalesced in 

2210 # order to decide whether to abort the coalesce. We remove the 

2211 # journal as soon as the VHD coalesce step is done, because we 

2212 # don't expect the rest of the process to take long 

2213 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2214 vdi._doCoalesce() 

2215 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2216 

2217 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2218 

2219 # we now need to relink the children: lock the SR to prevent ops 

2220 # like SM.clone from manipulating the VDIs we'll be relinking and 

2221 # rescan the SR first in case the children changed since the last 

2222 # scan 

2223 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2224 

2225 self.lock() 

2226 try: 

2227 vdi.parent._tagChildrenForRelink() 

2228 self.scan() 

2229 vdi._relinkSkip() 

2230 finally: 

2231 self.unlock() 

2232 # Reload the children to leave things consistent 

2233 vdi.parent._reloadChildren(vdi) 

2234 

2235 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2236 self.deleteVDI(vdi) 

2237 

2238 class CoalesceTracker: 

2239 GRACE_ITERATIONS = 1 

2240 MAX_ITERATIONS_NO_PROGRESS = 3 

2241 MAX_ITERATIONS = 10 

2242 MAX_INCREASE_FROM_MINIMUM = 1.2 

2243 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2244 " --> Final size {finSize}" 

2245 

2246 def __init__(self, sr): 

2247 self.itsNoProgress = 0 

2248 self.its = 0 

2249 self.minSize = float("inf") 

2250 self.history = [] 

2251 self.reason = "" 

2252 self.startSize = None 

2253 self.finishSize = None 

2254 self.sr = sr 

2255 

2256 def abortCoalesce(self, prevSize, curSize): 

2257 res = False 

2258 

2259 self.its += 1 

2260 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2261 initSize=prevSize, 

2262 finSize=curSize)) 

2263 

2264 self.finishSize = curSize 

2265 

2266 if self.startSize is None: 

2267 self.startSize = prevSize 

2268 

2269 if curSize < self.minSize: 

2270 self.minSize = curSize 

2271 

2272 if prevSize < self.minSize: 

2273 self.minSize = prevSize 

2274 

2275 if prevSize < curSize: 

2276 self.itsNoProgress += 1 

2277 Util.log("No progress, attempt:" 

2278 " {attempt}".format(attempt=self.itsNoProgress)) 

2279 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2280 

2281 if (not res) and (self.its > self.MAX_ITERATIONS): 

2282 max = self.MAX_ITERATIONS 

2283 self.reason = \ 

2284 "Max iterations ({max}) exceeded".format(max=max) 

2285 res = True 

2286 

2287 if (not res) and (self.itsNoProgress > 

2288 self.MAX_ITERATIONS_NO_PROGRESS): 

2289 max = self.MAX_ITERATIONS_NO_PROGRESS 

2290 self.reason = \ 

2291 "No progress made for {max} iterations".format(max=max) 

2292 res = True 

2293 

2294 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2295 if (self.its > self.GRACE_ITERATIONS and 

2296 (not res) and (curSize > maxSizeFromMin)): 

2297 self.reason = "Unexpected bump in size," \ 

2298 " compared to minimum acheived" 

2299 res = True 

2300 

2301 return res 

2302 

2303 def printReasoning(self): 

2304 Util.log("Aborted coalesce") 

2305 for hist in self.history: 

2306 Util.log(hist) 

2307 Util.log(self.reason) 

2308 Util.log("Starting size was {size}" 

2309 .format(size=self.startSize)) 

2310 Util.log("Final size was {size}" 

2311 .format(size=self.finishSize)) 

2312 Util.log("Minimum size acheived was {size}" 

2313 .format(size=self.minSize)) 

2314 

2315 def _coalesceLeaf(self, vdi): 

2316 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2317 complete due to external changes, namely vdi_delete and vdi_snapshot 

2318 that alter leaf-coalescibility of vdi""" 

2319 tracker = self.CoalesceTracker(self) 

2320 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2321 prevSizeVHD = vdi.getSizeVHD() 

2322 if not self._snapshotCoalesce(vdi): 2322 ↛ 2323line 2322 didn't jump to line 2323, because the condition on line 2322 was never true

2323 return False 

2324 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2325 tracker.printReasoning() 

2326 raise util.SMException("VDI {uuid} could not be coalesced" 

2327 .format(uuid=vdi.uuid)) 

2328 return self._liveLeafCoalesce(vdi) 

2329 

2330 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2331 speed = None 

2332 total_time = endTime - startTime 

2333 if total_time > 0: 

2334 speed = float(vhdSize) / float(total_time) 

2335 return speed 

2336 

2337 def writeSpeedToFile(self, speed): 

2338 content = [] 

2339 speedFile = None 

2340 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2341 self.lock() 

2342 try: 

2343 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2344 lines = "" 

2345 if not os.path.isfile(path): 

2346 lines = str(speed) + "\n" 

2347 else: 

2348 speedFile = open(path, "r+") 

2349 content = speedFile.readlines() 

2350 content.append(str(speed) + "\n") 

2351 if len(content) > N_RUNNING_AVERAGE: 

2352 del content[0] 

2353 lines = "".join(content) 

2354 

2355 util.atomicFileWrite(path, VAR_RUN, lines) 

2356 finally: 

2357 if speedFile is not None: 

2358 speedFile.close() 

2359 Util.log("Closing file: {myfile}".format(myfile=path)) 

2360 self.unlock() 

2361 

2362 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2363 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2364 if speed is None: 

2365 return 

2366 

2367 self.writeSpeedToFile(speed) 

2368 

2369 def getStorageSpeed(self): 

2370 speedFile = None 

2371 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2372 self.lock() 

2373 try: 

2374 speed = None 

2375 if os.path.isfile(path): 

2376 speedFile = open(path) 

2377 content = speedFile.readlines() 

2378 try: 

2379 content = [float(i) for i in content] 

2380 except ValueError: 

2381 Util.log("Something bad in the speed log:{log}". 

2382 format(log=speedFile.readlines())) 

2383 return speed 

2384 

2385 if len(content): 

2386 speed = sum(content) / float(len(content)) 

2387 if speed <= 0: 2387 ↛ 2389line 2387 didn't jump to line 2389, because the condition on line 2387 was never true

2388 # Defensive, should be impossible. 

2389 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2390 format(speed=speed, uuid=self.uuid)) 

2391 speed = None 

2392 else: 

2393 Util.log("Speed file empty for SR: {uuid}". 

2394 format(uuid=self.uuid)) 

2395 else: 

2396 Util.log("Speed log missing for SR: {uuid}". 

2397 format(uuid=self.uuid)) 

2398 return speed 

2399 finally: 

2400 if not (speedFile is None): 

2401 speedFile.close() 

2402 self.unlock() 

2403 

2404 def _snapshotCoalesce(self, vdi): 

2405 # Note that because we are not holding any locks here, concurrent SM 

2406 # operations may change this tree under our feet. In particular, vdi 

2407 # can be deleted, or it can be snapshotted. 

2408 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2409 Util.log("Single-snapshotting %s" % vdi) 

2410 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2411 try: 

2412 ret = self.xapi.singleSnapshotVDI(vdi) 

2413 Util.log("Single-snapshot returned: %s" % ret) 

2414 except XenAPI.Failure as e: 

2415 if util.isInvalidVDI(e): 

2416 Util.log("The VDI appears to have been concurrently deleted") 

2417 return False 

2418 raise 

2419 self.scanLocked() 

2420 tempSnap = vdi.parent 

2421 if not tempSnap.isCoalesceable(): 

2422 Util.log("The VDI appears to have been concurrently snapshotted") 

2423 return False 

2424 Util.log("Coalescing parent %s" % tempSnap) 

2425 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2426 vhdSize = vdi.getSizeVHD() 

2427 self._coalesce(tempSnap) 

2428 if not vdi.isLeafCoalesceable(): 

2429 Util.log("The VDI tree appears to have been altered since") 

2430 return False 

2431 return True 

2432 

2433 def _liveLeafCoalesce(self, vdi): 

2434 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2435 self.lock() 

2436 try: 

2437 self.scan() 

2438 if not self.getVDI(vdi.uuid): 

2439 Util.log("The VDI appears to have been deleted meanwhile") 

2440 return False 

2441 if not vdi.isLeafCoalesceable(): 

2442 Util.log("The VDI is no longer leaf-coalesceable") 

2443 return False 

2444 

2445 uuid = vdi.uuid 

2446 vdi.pause(failfast=True) 

2447 try: 

2448 try: 

2449 # "vdi" object will no longer be valid after this call 

2450 self._doCoalesceLeaf(vdi) 

2451 except: 

2452 Util.logException("_doCoalesceLeaf") 

2453 self._handleInterruptedCoalesceLeaf() 

2454 raise 

2455 finally: 

2456 vdi = self.getVDI(uuid) 

2457 if vdi: 

2458 vdi.ensureUnpaused() 

2459 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2460 if vdiOld: 

2461 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2462 self.deleteVDI(vdiOld) 

2463 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2464 finally: 

2465 self.cleanup() 

2466 self.unlock() 

2467 self.logFilter.logState() 

2468 return True 

2469 

2470 def _doCoalesceLeaf(self, vdi): 

2471 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2472 offline/atomic context""" 

2473 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2474 self._prepareCoalesceLeaf(vdi) 

2475 vdi.parent._setHidden(False) 

2476 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2477 vdi.validate(True) 

2478 vdi.parent.validate(True) 

2479 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2480 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2481 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2482 Util.log("Leaf-coalesce forced, will not use timeout") 

2483 timeout = 0 

2484 vdi._coalesceVHD(timeout) 

2485 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2486 vdi.parent.validate(True) 

2487 #vdi._verifyContents(timeout / 2) 

2488 

2489 # rename 

2490 vdiUuid = vdi.uuid 

2491 oldName = vdi.fileName 

2492 origParentUuid = vdi.parent.uuid 

2493 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2494 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2495 vdi.parent.rename(vdiUuid) 

2496 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2497 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2498 

2499 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2500 # garbage 

2501 

2502 # update the VDI record 

2503 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2504 if vdi.parent.raw: 

2505 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2506 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2507 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2508 

2509 self._updateNode(vdi) 

2510 

2511 # delete the obsolete leaf & inflate the parent (in that order, to 

2512 # minimize free space requirements) 

2513 parent = vdi.parent 

2514 vdi._setHidden(True) 

2515 vdi.parent.children = [] 

2516 vdi.parent = None 

2517 

2518 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2519 freeSpace = self.getFreeSpace() 

2520 if freeSpace < extraSpace: 

2521 # don't delete unless we need the space: deletion is time-consuming 

2522 # because it requires contacting the slaves, and we're paused here 

2523 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2524 self.deleteVDI(vdi) 

2525 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2526 

2527 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2528 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2529 

2530 self.forgetVDI(origParentUuid) 

2531 self._finishCoalesceLeaf(parent) 

2532 self._updateSlavesOnResize(parent) 

2533 

2534 def _calcExtraSpaceNeeded(self, child, parent): 

2535 assert(not parent.raw) # raw parents not supported 

2536 extra = child.getSizeVHD() - parent.getSizeVHD() 

2537 if extra < 0: 

2538 extra = 0 

2539 return extra 

2540 

2541 def _prepareCoalesceLeaf(self, vdi): 

2542 pass 

2543 

2544 def _updateNode(self, vdi): 

2545 pass 

2546 

2547 def _finishCoalesceLeaf(self, parent): 

2548 pass 

2549 

2550 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2551 pass 

2552 

2553 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): 

2554 pass 

2555 

2556 def _updateSlavesOnResize(self, vdi): 

2557 pass 

2558 

2559 def _removeStaleVDIs(self, uuidsPresent): 

2560 for uuid in list(self.vdis.keys()): 

2561 if not uuid in uuidsPresent: 

2562 Util.log("VDI %s disappeared since last scan" % \ 

2563 self.vdis[uuid]) 

2564 del self.vdis[uuid] 

2565 

2566 def _handleInterruptedCoalesceLeaf(self): 

2567 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2568 inconsistent state. If the old-leaf VDI is still present, we revert the 

2569 operation (in case the original error is persistent); otherwise we must 

2570 finish the operation""" 

2571 # abstract 

2572 pass 

2573 

2574 def _buildTree(self, force): 

2575 self.vdiTrees = [] 

2576 for vdi in self.vdis.values(): 

2577 if vdi.parentUuid: 

2578 parent = self.getVDI(vdi.parentUuid) 

2579 if not parent: 

2580 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2581 self.vdiTrees.append(vdi) 

2582 continue 

2583 if force: 

2584 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2585 (vdi.parentUuid, vdi.uuid)) 

2586 self.vdiTrees.append(vdi) 

2587 continue 

2588 else: 

2589 raise util.SMException("Parent VDI %s of %s not " \ 

2590 "found" % (vdi.parentUuid, vdi.uuid)) 

2591 vdi.parent = parent 

2592 parent.children.append(vdi) 

2593 else: 

2594 self.vdiTrees.append(vdi) 

2595 

2596 

2597class FileSR(SR): 

2598 TYPE = SR.TYPE_FILE 

2599 CACHE_FILE_EXT = ".vhdcache" 

2600 # cache cleanup actions 

2601 CACHE_ACTION_KEEP = 0 

2602 CACHE_ACTION_REMOVE = 1 

2603 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2604 

2605 def __init__(self, uuid, xapi, createLock, force): 

2606 SR.__init__(self, uuid, xapi, createLock, force) 

2607 self.path = "/var/run/sr-mount/%s" % self.uuid 

2608 self.journaler = fjournaler.Journaler(self.path) 

2609 

2610 def scan(self, force=False): 

2611 if not util.pathexists(self.path): 

2612 raise util.SMException("directory %s not found!" % self.uuid) 

2613 vhds = self._scan(force) 

2614 for uuid, vhdInfo in vhds.items(): 

2615 vdi = self.getVDI(uuid) 

2616 if not vdi: 

2617 self.logFilter.logNewVDI(uuid) 

2618 vdi = FileVDI(self, uuid, False) 

2619 self.vdis[uuid] = vdi 

2620 vdi.load(vhdInfo) 

2621 uuidsPresent = list(vhds.keys()) 

2622 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2623 for rawName in rawList: 

2624 uuid = FileVDI.extractUuid(rawName) 

2625 uuidsPresent.append(uuid) 

2626 vdi = self.getVDI(uuid) 

2627 if not vdi: 

2628 self.logFilter.logNewVDI(uuid) 

2629 vdi = FileVDI(self, uuid, True) 

2630 self.vdis[uuid] = vdi 

2631 self._removeStaleVDIs(uuidsPresent) 

2632 self._buildTree(force) 

2633 self.logFilter.logState() 

2634 self._handleInterruptedCoalesceLeaf() 

2635 

2636 def getFreeSpace(self): 

2637 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2638 

2639 def deleteVDIs(self, vdiList): 

2640 rootDeleted = False 

2641 for vdi in vdiList: 

2642 if not vdi.parent: 

2643 rootDeleted = True 

2644 break 

2645 SR.deleteVDIs(self, vdiList) 

2646 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2647 self.xapi.markCacheSRsDirty() 

2648 

2649 def cleanupCache(self, maxAge=-1): 

2650 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2651 removed when the leaf node no longer exists or its allow-caching 

2652 attribute is not set. Caches for parent nodes are removed when the 

2653 parent node no longer exists or it hasn't been used in more than 

2654 <maxAge> hours. 

2655 Return number of caches removed. 

2656 """ 

2657 numRemoved = 0 

2658 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2659 Util.log("Found %d cache files" % len(cacheFiles)) 

2660 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2661 for cacheFile in cacheFiles: 

2662 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2663 action = self.CACHE_ACTION_KEEP 

2664 rec = self.xapi.getRecordVDI(uuid) 

2665 if not rec: 

2666 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2667 action = self.CACHE_ACTION_REMOVE 

2668 elif rec["managed"] and not rec["allow_caching"]: 

2669 Util.log("Cache %s: caching disabled" % uuid) 

2670 action = self.CACHE_ACTION_REMOVE 

2671 elif not rec["managed"] and maxAge >= 0: 

2672 lastAccess = datetime.datetime.fromtimestamp( \ 

2673 os.path.getatime(os.path.join(self.path, cacheFile))) 

2674 if lastAccess < cutoff: 

2675 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2676 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2677 

2678 if action == self.CACHE_ACTION_KEEP: 

2679 Util.log("Keeping cache %s" % uuid) 

2680 continue 

2681 

2682 lockId = uuid 

2683 parentUuid = None 

2684 if rec and rec["managed"]: 

2685 parentUuid = rec["sm_config"].get("vhd-parent") 

2686 if parentUuid: 

2687 lockId = parentUuid 

2688 

2689 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2690 cacheLock.acquire() 

2691 try: 

2692 if self._cleanupCache(uuid, action): 

2693 numRemoved += 1 

2694 finally: 

2695 cacheLock.release() 

2696 return numRemoved 

2697 

2698 def _cleanupCache(self, uuid, action): 

2699 assert(action != self.CACHE_ACTION_KEEP) 

2700 rec = self.xapi.getRecordVDI(uuid) 

2701 if rec and rec["allow_caching"]: 

2702 Util.log("Cache %s appears to have become valid" % uuid) 

2703 return False 

2704 

2705 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2706 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2707 if tapdisk: 

2708 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2709 Util.log("Cache %s still in use" % uuid) 

2710 return False 

2711 Util.log("Shutting down tapdisk for %s" % fullPath) 

2712 tapdisk.shutdown() 

2713 

2714 Util.log("Deleting file %s" % fullPath) 

2715 os.unlink(fullPath) 

2716 return True 

2717 

2718 def _isCacheFileName(self, name): 

2719 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2720 name.endswith(self.CACHE_FILE_EXT) 

2721 

2722 def _scan(self, force): 

2723 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2724 error = False 

2725 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2726 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2727 for uuid, vhdInfo in vhds.items(): 

2728 if vhdInfo.error: 

2729 error = True 

2730 break 

2731 if not error: 

2732 return vhds 

2733 Util.log("Scan error on attempt %d" % i) 

2734 if force: 

2735 return vhds 

2736 raise util.SMException("Scan error") 

2737 

2738 def deleteVDI(self, vdi): 

2739 self._checkSlaves(vdi) 

2740 SR.deleteVDI(self, vdi) 

2741 

2742 def _checkSlaves(self, vdi): 

2743 onlineHosts = self.xapi.getOnlineHosts() 

2744 abortFlag = IPCFlag(self.uuid) 

2745 for pbdRecord in self.xapi.getAttachedPBDs(): 

2746 hostRef = pbdRecord["host"] 

2747 if hostRef == self.xapi._hostRef: 

2748 continue 

2749 if abortFlag.test(FLAG_TYPE_ABORT): 

2750 raise AbortException("Aborting due to signal") 

2751 try: 

2752 self._checkSlave(hostRef, vdi) 

2753 except util.CommandException: 

2754 if hostRef in onlineHosts: 

2755 raise 

2756 

2757 def _checkSlave(self, hostRef, vdi): 

2758 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2759 Util.log("Checking with slave: %s" % repr(call)) 

2760 _host = self.xapi.session.xenapi.host 

2761 text = _host.call_plugin( * call) 

2762 

2763 def _handleInterruptedCoalesceLeaf(self): 

2764 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2765 for uuid, parentUuid in entries.items(): 

2766 fileList = os.listdir(self.path) 

2767 childName = uuid + vhdutil.FILE_EXTN_VHD 

2768 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2769 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2770 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2771 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2772 if parentPresent or tmpChildName in fileList: 

2773 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2774 else: 

2775 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2776 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2777 vdi = self.getVDI(uuid) 

2778 if vdi: 

2779 vdi.ensureUnpaused() 

2780 

2781 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2782 Util.log("*** UNDO LEAF-COALESCE") 

2783 parent = self.getVDI(parentUuid) 

2784 if not parent: 

2785 parent = self.getVDI(childUuid) 

2786 if not parent: 

2787 raise util.SMException("Neither %s nor %s found" % \ 

2788 (parentUuid, childUuid)) 

2789 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2790 parent.rename(parentUuid) 

2791 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2792 

2793 child = self.getVDI(childUuid) 

2794 if not child: 

2795 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2796 if not child: 

2797 raise util.SMException("Neither %s nor %s found" % \ 

2798 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2799 Util.log("Renaming child back to %s" % childUuid) 

2800 child.rename(childUuid) 

2801 Util.log("Updating the VDI record") 

2802 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2803 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2804 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2805 

2806 if child.hidden: 

2807 child._setHidden(False) 

2808 if not parent.hidden: 

2809 parent._setHidden(True) 

2810 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2811 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2812 Util.log("*** leaf-coalesce undo successful") 

2813 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2814 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2815 

2816 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2817 Util.log("*** FINISH LEAF-COALESCE") 

2818 vdi = self.getVDI(childUuid) 

2819 if not vdi: 

2820 raise util.SMException("VDI %s not found" % childUuid) 

2821 try: 

2822 self.forgetVDI(parentUuid) 

2823 except XenAPI.Failure: 

2824 pass 

2825 self._updateSlavesOnResize(vdi) 

2826 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2827 Util.log("*** finished leaf-coalesce successfully") 

2828 

2829 

2830class LVHDSR(SR): 

2831 TYPE = SR.TYPE_LVHD 

2832 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2833 

2834 def __init__(self, uuid, xapi, createLock, force): 

2835 SR.__init__(self, uuid, xapi, createLock, force) 

2836 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2837 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2838 self.lvmCache = lvmcache.LVMCache(self.vgName) 

2839 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2840 self.journaler = journaler.Journaler(self.lvmCache) 

2841 

2842 def deleteVDI(self, vdi): 

2843 if self.lvActivator.get(vdi.uuid, False): 

2844 self.lvActivator.deactivate(vdi.uuid, False) 

2845 self._checkSlaves(vdi) 

2846 SR.deleteVDI(self, vdi) 

2847 

2848 def forgetVDI(self, vdiUuid): 

2849 SR.forgetVDI(self, vdiUuid) 

2850 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2851 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2852 

2853 def getFreeSpace(self): 

2854 stats = lvutil._getVGstats(self.vgName) 

2855 return stats['physical_size'] - stats['physical_utilisation'] 

2856 

2857 def cleanup(self): 

2858 if not self.lvActivator.deactivateAll(): 

2859 Util.log("ERROR deactivating LVs while cleaning up") 

2860 

2861 def needUpdateBlockInfo(self): 

2862 for vdi in self.vdis.values(): 

2863 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2864 continue 

2865 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2866 return True 

2867 return False 

2868 

2869 def updateBlockInfo(self): 

2870 numUpdated = 0 

2871 for vdi in self.vdis.values(): 

2872 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2873 continue 

2874 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2875 vdi.updateBlockInfo() 

2876 numUpdated += 1 

2877 if numUpdated: 

2878 # deactivate the LVs back sooner rather than later. If we don't 

2879 # now, by the time this thread gets to deactivations, another one 

2880 # might have leaf-coalesced a node and deleted it, making the child 

2881 # inherit the refcount value and preventing the correct decrement 

2882 self.cleanup() 

2883 

2884 def scan(self, force=False): 

2885 vdis = self._scan(force) 

2886 for uuid, vdiInfo in vdis.items(): 

2887 vdi = self.getVDI(uuid) 

2888 if not vdi: 

2889 self.logFilter.logNewVDI(uuid) 

2890 vdi = LVHDVDI(self, uuid, 

2891 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

2892 self.vdis[uuid] = vdi 

2893 vdi.load(vdiInfo) 

2894 self._removeStaleVDIs(vdis.keys()) 

2895 self._buildTree(force) 

2896 self.logFilter.logState() 

2897 self._handleInterruptedCoalesceLeaf() 

2898 

2899 def _scan(self, force): 

2900 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2901 error = False 

2902 self.lvmCache.refresh() 

2903 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

2904 for uuid, vdiInfo in vdis.items(): 

2905 if vdiInfo.scanError: 

2906 error = True 

2907 break 

2908 if not error: 

2909 return vdis 

2910 Util.log("Scan error, retrying (%d)" % i) 

2911 if force: 

2912 return vdis 

2913 raise util.SMException("Scan error") 

2914 

2915 def _removeStaleVDIs(self, uuidsPresent): 

2916 for uuid in list(self.vdis.keys()): 

2917 if not uuid in uuidsPresent: 

2918 Util.log("VDI %s disappeared since last scan" % \ 

2919 self.vdis[uuid]) 

2920 del self.vdis[uuid] 

2921 if self.lvActivator.get(uuid, False): 

2922 self.lvActivator.remove(uuid, False) 

2923 

2924 def _liveLeafCoalesce(self, vdi): 

2925 """If the parent is raw and the child was resized (virt. size), then 

2926 we'll need to resize the parent, which can take a while due to zeroing 

2927 out of the extended portion of the LV. Do it before pausing the child 

2928 to avoid a protracted downtime""" 

2929 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

2930 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2931 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

2932 

2933 return SR._liveLeafCoalesce(self, vdi) 

2934 

2935 def _prepareCoalesceLeaf(self, vdi): 

2936 vdi._activateChain() 

2937 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2938 vdi.deflate() 

2939 vdi.inflateParentForCoalesce() 

2940 

2941 def _updateNode(self, vdi): 

2942 # fix the refcounts: the remaining node should inherit the binary 

2943 # refcount from the leaf (because if it was online, it should remain 

2944 # refcounted as such), but the normal refcount from the parent (because 

2945 # this node is really the parent node) - minus 1 if it is online (since 

2946 # non-leaf nodes increment their normal counts when they are online and 

2947 # we are now a leaf, storing that 1 in the binary refcount). 

2948 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2949 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

2950 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

2951 pCnt = pCnt - cBcnt 

2952 assert(pCnt >= 0) 

2953 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

2954 

2955 def _finishCoalesceLeaf(self, parent): 

2956 if not parent.isSnapshot() or parent.isAttachedRW(): 

2957 parent.inflateFully() 

2958 else: 

2959 parent.deflate() 

2960 

2961 def _calcExtraSpaceNeeded(self, child, parent): 

2962 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

2963 

2964 def _handleInterruptedCoalesceLeaf(self): 

2965 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2966 for uuid, parentUuid in entries.items(): 

2967 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

2968 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2969 self.TMP_RENAME_PREFIX + uuid 

2970 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

2971 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

2972 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

2973 self.lvmCache.checkLV(parentLV2)) 

2974 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

2975 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2976 else: 

2977 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2978 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2979 vdi = self.getVDI(uuid) 

2980 if vdi: 

2981 vdi.ensureUnpaused() 

2982 

2983 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2984 Util.log("*** UNDO LEAF-COALESCE") 

2985 parent = self.getVDI(parentUuid) 

2986 if not parent: 

2987 parent = self.getVDI(childUuid) 

2988 if not parent: 

2989 raise util.SMException("Neither %s nor %s found" % \ 

2990 (parentUuid, childUuid)) 

2991 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2992 parent.rename(parentUuid) 

2993 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2994 

2995 child = self.getVDI(childUuid) 

2996 if not child: 

2997 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2998 if not child: 

2999 raise util.SMException("Neither %s nor %s found" % \ 

3000 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3001 Util.log("Renaming child back to %s" % childUuid) 

3002 child.rename(childUuid) 

3003 Util.log("Updating the VDI record") 

3004 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3005 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3006 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3007 

3008 # refcount (best effort - assume that it had succeeded if the 

3009 # second rename succeeded; if not, this adjustment will be wrong, 

3010 # leading to a non-deactivation of the LV) 

3011 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3012 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3013 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3014 pCnt = pCnt + cBcnt 

3015 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3016 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3017 

3018 parent.deflate() 

3019 child.inflateFully() 

3020 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3021 if child.hidden: 

3022 child._setHidden(False) 

3023 if not parent.hidden: 

3024 parent._setHidden(True) 

3025 if not parent.lvReadonly: 

3026 self.lvmCache.setReadonly(parent.fileName, True) 

3027 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3028 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3029 Util.log("*** leaf-coalesce undo successful") 

3030 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3031 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3032 

3033 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3034 Util.log("*** FINISH LEAF-COALESCE") 

3035 vdi = self.getVDI(childUuid) 

3036 if not vdi: 

3037 raise util.SMException("VDI %s not found" % childUuid) 

3038 vdi.inflateFully() 

3039 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3040 try: 

3041 self.forgetVDI(parentUuid) 

3042 except XenAPI.Failure: 

3043 pass 

3044 self._updateSlavesOnResize(vdi) 

3045 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3046 Util.log("*** finished leaf-coalesce successfully") 

3047 

3048 def _checkSlaves(self, vdi): 

3049 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3050 try to check all slaves, including those that the Agent believes are 

3051 offline, but ignore failures for offline hosts. This is to avoid cases 

3052 where the Agent thinks a host is offline but the host is up.""" 

3053 args = {"vgName": self.vgName, 

3054 "action1": "deactivateNoRefcount", 

3055 "lvName1": vdi.fileName, 

3056 "action2": "cleanupLockAndRefcount", 

3057 "uuid2": vdi.uuid, 

3058 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3059 onlineHosts = self.xapi.getOnlineHosts() 

3060 abortFlag = IPCFlag(self.uuid) 

3061 for pbdRecord in self.xapi.getAttachedPBDs(): 

3062 hostRef = pbdRecord["host"] 

3063 if hostRef == self.xapi._hostRef: 

3064 continue 

3065 if abortFlag.test(FLAG_TYPE_ABORT): 

3066 raise AbortException("Aborting due to signal") 

3067 Util.log("Checking with slave %s (path %s)" % ( 

3068 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3069 try: 

3070 self.xapi.ensureInactive(hostRef, args) 

3071 except XenAPI.Failure: 

3072 if hostRef in onlineHosts: 

3073 raise 

3074 

3075 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

3076 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3077 if not slaves: 

3078 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3079 child) 

3080 return 

3081 

3082 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3083 self.TMP_RENAME_PREFIX + child.uuid 

3084 args = {"vgName": self.vgName, 

3085 "action1": "deactivateNoRefcount", 

3086 "lvName1": tmpName, 

3087 "action2": "deactivateNoRefcount", 

3088 "lvName2": child.fileName, 

3089 "action3": "refresh", 

3090 "lvName3": child.fileName, 

3091 "action4": "refresh", 

3092 "lvName4": parent.fileName} 

3093 for slave in slaves: 

3094 Util.log("Updating %s, %s, %s on slave %s" % \ 

3095 (tmpName, child.fileName, parent.fileName, 

3096 self.xapi.getRecordHost(slave)['hostname'])) 

3097 text = self.xapi.session.xenapi.host.call_plugin( \ 

3098 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3099 Util.log("call-plugin returned: '%s'" % text) 

3100 

3101 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): 

3102 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3103 if not slaves: 

3104 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3105 return 

3106 

3107 args = {"vgName": self.vgName, 

3108 "action1": "deactivateNoRefcount", 

3109 "lvName1": oldNameLV, 

3110 "action2": "refresh", 

3111 "lvName2": vdi.fileName, 

3112 "action3": "cleanupLockAndRefcount", 

3113 "uuid3": origParentUuid, 

3114 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3115 for slave in slaves: 

3116 Util.log("Updating %s to %s on slave %s" % \ 

3117 (oldNameLV, vdi.fileName, 

3118 self.xapi.getRecordHost(slave)['hostname'])) 

3119 text = self.xapi.session.xenapi.host.call_plugin( \ 

3120 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3121 Util.log("call-plugin returned: '%s'" % text) 

3122 

3123 def _updateSlavesOnResize(self, vdi): 

3124 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3125 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3126 if not slaves: 

3127 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3128 return 

3129 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3130 vdi.fileName, vdi.uuid, slaves) 

3131 

3132 

3133class LinstorSR(SR): 

3134 TYPE = SR.TYPE_LINSTOR 

3135 

3136 def __init__(self, uuid, xapi, createLock, force): 

3137 if not LINSTOR_AVAILABLE: 

3138 raise util.SMException( 

3139 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3140 ) 

3141 

3142 SR.__init__(self, uuid, xapi, createLock, force) 

3143 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3144 self._reloadLinstor() 

3145 

3146 def deleteVDI(self, vdi): 

3147 self._checkSlaves(vdi) 

3148 SR.deleteVDI(self, vdi) 

3149 

3150 def getFreeSpace(self): 

3151 return self._linstor.max_volume_size_allowed 

3152 

3153 def scan(self, force=False): 

3154 all_vdi_info = self._scan(force) 

3155 for uuid, vdiInfo in all_vdi_info.items(): 

3156 # When vdiInfo is None, the VDI is RAW. 

3157 vdi = self.getVDI(uuid) 

3158 if not vdi: 

3159 self.logFilter.logNewVDI(uuid) 

3160 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3161 self.vdis[uuid] = vdi 

3162 if vdiInfo: 

3163 vdi.load(vdiInfo) 

3164 self._removeStaleVDIs(all_vdi_info.keys()) 

3165 self._buildTree(force) 

3166 self.logFilter.logState() 

3167 self._handleInterruptedCoalesceLeaf() 

3168 

3169 def pauseVDIs(self, vdiList): 

3170 self._linstor.ensure_volume_list_is_not_locked( 

3171 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3172 ) 

3173 return super(LinstorSR, self).pauseVDIs(vdiList) 

3174 

3175 def _reloadLinstor(self): 

3176 session = self.xapi.session 

3177 host_ref = util.get_this_host_ref(session) 

3178 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3179 

3180 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3181 if pbd is None: 

3182 raise util.SMException('Failed to find PBD') 

3183 

3184 dconf = session.xenapi.PBD.get_device_config(pbd) 

3185 group_name = dconf['group-name'] 

3186 

3187 controller_uri = get_controller_uri() 

3188 self.journaler = LinstorJournaler( 

3189 controller_uri, group_name, logger=util.SMlog 

3190 ) 

3191 

3192 self._linstor = LinstorVolumeManager( 

3193 controller_uri, 

3194 group_name, 

3195 repair=True, 

3196 logger=util.SMlog 

3197 ) 

3198 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3199 

3200 def _scan(self, force): 

3201 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3202 self._reloadLinstor() 

3203 error = False 

3204 try: 

3205 all_vdi_info = self._load_vdi_info() 

3206 for uuid, vdiInfo in all_vdi_info.items(): 

3207 if vdiInfo and vdiInfo.error: 

3208 error = True 

3209 break 

3210 if not error: 

3211 return all_vdi_info 

3212 Util.log('Scan error, retrying ({})'.format(i)) 

3213 except Exception as e: 

3214 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3215 Util.log(traceback.format_exc()) 

3216 

3217 if force: 

3218 return all_vdi_info 

3219 raise util.SMException('Scan error') 

3220 

3221 def _load_vdi_info(self): 

3222 all_vdi_info = {} 

3223 

3224 # TODO: Ensure metadata contains the right info. 

3225 

3226 all_volume_info = self._linstor.get_volumes_with_info() 

3227 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3228 for vdi_uuid, volume_info in all_volume_info.items(): 

3229 try: 

3230 volume_metadata = volumes_metadata[vdi_uuid] 

3231 if not volume_info.name and not list(volume_metadata.items()): 

3232 continue # Ignore it, probably deleted. 

3233 

3234 if vdi_uuid.startswith('DELETED_'): 

3235 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3236 # We must remove this VDI now without adding it in the VDI list. 

3237 # Otherwise `Relinking` calls and other actions can be launched on it. 

3238 # We don't want that... 

3239 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3240 

3241 self.lock() 

3242 try: 

3243 self._linstor.destroy_volume(vdi_uuid) 

3244 try: 

3245 self.forgetVDI(vdi_uuid) 

3246 except: 

3247 pass 

3248 except Exception as e: 

3249 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3250 finally: 

3251 self.unlock() 

3252 continue 

3253 

3254 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3255 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3256 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3257 # Always RAW! 

3258 info = None 

3259 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3260 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3261 else: 

3262 # Ensure it's not a VHD... 

3263 try: 

3264 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3265 except: 

3266 try: 

3267 self._vhdutil.force_repair( 

3268 self._linstor.get_device_path(vdi_uuid) 

3269 ) 

3270 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3271 except: 

3272 info = None 

3273 

3274 except Exception as e: 

3275 Util.log( 

3276 ' [VDI {}: failed to load VDI info]: {}' 

3277 .format(vdi_uuid, e) 

3278 ) 

3279 info = vhdutil.VHDInfo(vdi_uuid) 

3280 info.error = 1 

3281 

3282 all_vdi_info[vdi_uuid] = info 

3283 

3284 return all_vdi_info 

3285 

3286 def _prepareCoalesceLeaf(self, vdi): 

3287 vdi._activateChain() 

3288 vdi.deflate() 

3289 vdi._inflateParentForCoalesce() 

3290 

3291 def _finishCoalesceLeaf(self, parent): 

3292 if not parent.isSnapshot() or parent.isAttachedRW(): 

3293 parent.inflateFully() 

3294 else: 

3295 parent.deflate() 

3296 

3297 def _calcExtraSpaceNeeded(self, child, parent): 

3298 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3299 

3300 def _hasValidDevicePath(self, uuid): 

3301 try: 

3302 self._linstor.get_device_path(uuid) 

3303 except Exception: 

3304 # TODO: Maybe log exception. 

3305 return False 

3306 return True 

3307 

3308 def _liveLeafCoalesce(self, vdi): 

3309 self.lock() 

3310 try: 

3311 self._linstor.ensure_volume_is_not_locked( 

3312 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3313 ) 

3314 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3315 finally: 

3316 self.unlock() 

3317 

3318 def _handleInterruptedCoalesceLeaf(self): 

3319 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3320 for uuid, parentUuid in entries.items(): 

3321 if self._hasValidDevicePath(parentUuid) or \ 

3322 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3323 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3324 else: 

3325 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3326 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3327 vdi = self.getVDI(uuid) 

3328 if vdi: 

3329 vdi.ensureUnpaused() 

3330 

3331 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3332 Util.log('*** UNDO LEAF-COALESCE') 

3333 parent = self.getVDI(parentUuid) 

3334 if not parent: 

3335 parent = self.getVDI(childUuid) 

3336 if not parent: 

3337 raise util.SMException( 

3338 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3339 ) 

3340 Util.log( 

3341 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3342 ) 

3343 parent.rename(parentUuid) 

3344 

3345 child = self.getVDI(childUuid) 

3346 if not child: 

3347 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3348 if not child: 

3349 raise util.SMException( 

3350 'Neither {} nor {} found'.format( 

3351 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3352 ) 

3353 ) 

3354 Util.log('Renaming child back to {}'.format(childUuid)) 

3355 child.rename(childUuid) 

3356 Util.log('Updating the VDI record') 

3357 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3358 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3359 

3360 # TODO: Maybe deflate here. 

3361 

3362 if child.hidden: 

3363 child._setHidden(False) 

3364 if not parent.hidden: 

3365 parent._setHidden(True) 

3366 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3367 Util.log('*** leaf-coalesce undo successful') 

3368 

3369 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3370 Util.log('*** FINISH LEAF-COALESCE') 

3371 vdi = self.getVDI(childUuid) 

3372 if not vdi: 

3373 raise util.SMException('VDI {} not found'.format(childUuid)) 

3374 # TODO: Maybe inflate. 

3375 try: 

3376 self.forgetVDI(parentUuid) 

3377 except XenAPI.Failure: 

3378 pass 

3379 self._updateSlavesOnResize(vdi) 

3380 Util.log('*** finished leaf-coalesce successfully') 

3381 

3382 def _checkSlaves(self, vdi): 

3383 try: 

3384 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3385 for openers in all_openers.values(): 

3386 for opener in openers.values(): 

3387 if opener['process-name'] != 'tapdisk': 

3388 raise util.SMException( 

3389 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3390 ) 

3391 except LinstorVolumeManagerError as e: 

3392 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3393 raise 

3394 

3395 

3396################################################################################ 

3397# 

3398# Helpers 

3399# 

3400def daemonize(): 

3401 pid = os.fork() 

3402 if pid: 3402 ↛ 3406line 3402 didn't jump to line 3406, because the condition on line 3402 was never false

3403 os.waitpid(pid, 0) 

3404 Util.log("New PID [%d]" % pid) 

3405 return False 

3406 os.chdir("/") 

3407 os.setsid() 

3408 pid = os.fork() 

3409 if pid: 

3410 Util.log("Will finish as PID [%d]" % pid) 

3411 os._exit(0) 

3412 for fd in [0, 1, 2]: 

3413 try: 

3414 os.close(fd) 

3415 except OSError: 

3416 pass 

3417 # we need to fill those special fd numbers or pread won't work 

3418 sys.stdin = open("/dev/null", 'r') 

3419 sys.stderr = open("/dev/null", 'w') 

3420 sys.stdout = open("/dev/null", 'w') 

3421 # As we're a new process we need to clear the lock objects 

3422 lock.Lock.clearAll() 

3423 return True 

3424 

3425 

3426def normalizeType(type): 

3427 if type in LVHDSR.SUBTYPES: 

3428 type = SR.TYPE_LVHD 

3429 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3430 # temporary while LVHD is symlinked as LVM 

3431 type = SR.TYPE_LVHD 

3432 if type in [ 

3433 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3434 "moosefs", "xfs", "zfs", "largeblock" 

3435 ]: 

3436 type = SR.TYPE_FILE 

3437 if type in ["linstor"]: 

3438 type = SR.TYPE_LINSTOR 

3439 if type not in SR.TYPES: 

3440 raise util.SMException("Unsupported SR type: %s" % type) 

3441 return type 

3442 

3443GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3444 

3445 

3446def _gc_init_file(sr_uuid): 

3447 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3448 

3449 

3450def _create_init_file(sr_uuid): 

3451 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3452 with open(os.path.join( 

3453 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3454 f.write('1') 

3455 

3456 

3457def _gcLoopPause(sr, dryRun=False, immediate=False): 

3458 if immediate: 

3459 return 

3460 

3461 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3462 # point will just return. Otherwise, fall back on an abortable sleep. 

3463 

3464 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3465 

3466 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3466 ↛ exitline 3466 didn't jump to the function exit

3467 lambda *args: None) 

3468 elif os.path.exists(_gc_init_file(sr.uuid)): 

3469 def abortTest(): 

3470 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3471 

3472 # If time.sleep hangs we are in deep trouble, however for 

3473 # completeness we set the timeout of the abort thread to 

3474 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3475 Util.log("GC active, about to go quiet") 

3476 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3476 ↛ exitline 3476 didn't run the lambda on line 3476

3477 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3478 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3479 Util.log("GC active, quiet period ended") 

3480 

3481 

3482def _gcLoop(sr, dryRun=False, immediate=False): 

3483 if not lockActive.acquireNoblock(): 3483 ↛ 3484line 3483 didn't jump to line 3484, because the condition on line 3483 was never true

3484 Util.log("Another GC instance already active, exiting") 

3485 return 

3486 # Track how many we do 

3487 coalesced = 0 

3488 task_status = "success" 

3489 try: 

3490 # Check if any work needs to be done 

3491 if not sr.xapi.isPluggedHere(): 

3492 Util.log("SR no longer attached, exiting") 

3493 return 

3494 sr.scanLocked() 

3495 if not sr.hasWork(): 

3496 Util.log("No work, exiting") 

3497 return 

3498 sr.xapi.create_task( 

3499 "Garbage Collection", 

3500 "Garbage collection for SR %s" % sr.uuid) 

3501 _gcLoopPause(sr, dryRun, immediate=immediate) 

3502 while True: 

3503 if not sr.xapi.isPluggedHere(): 3503 ↛ 3504line 3503 didn't jump to line 3504, because the condition on line 3503 was never true

3504 Util.log("SR no longer attached, exiting") 

3505 break 

3506 sr.scanLocked() 

3507 if not sr.hasWork(): 

3508 Util.log("No work, exiting") 

3509 break 

3510 

3511 if not lockRunning.acquireNoblock(): 3511 ↛ 3512line 3511 didn't jump to line 3512, because the condition on line 3511 was never true

3512 Util.log("Unable to acquire GC running lock.") 

3513 return 

3514 try: 

3515 if not sr.gcEnabled(): 3515 ↛ 3516line 3515 didn't jump to line 3516, because the condition on line 3515 was never true

3516 break 

3517 

3518 sr.xapi.update_task_progress("done", coalesced) 

3519 

3520 sr.cleanupCoalesceJournals() 

3521 # Create the init file here in case startup is waiting on it 

3522 _create_init_file(sr.uuid) 

3523 sr.scanLocked() 

3524 sr.updateBlockInfo() 

3525 

3526 howmany = len(sr.findGarbage()) 

3527 if howmany > 0: 

3528 Util.log("Found %d orphaned vdis" % howmany) 

3529 sr.lock() 

3530 try: 

3531 sr.garbageCollect(dryRun) 

3532 finally: 

3533 sr.unlock() 

3534 sr.xapi.srUpdate() 

3535 

3536 candidate = sr.findCoalesceable() 

3537 if candidate: 

3538 util.fistpoint.activate( 

3539 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3540 sr.coalesce(candidate, dryRun) 

3541 sr.xapi.srUpdate() 

3542 coalesced += 1 

3543 continue 

3544 

3545 candidate = sr.findLeafCoalesceable() 

3546 if candidate: 3546 ↛ 3553line 3546 didn't jump to line 3553, because the condition on line 3546 was never false

3547 sr.coalesceLeaf(candidate, dryRun) 

3548 sr.xapi.srUpdate() 

3549 coalesced += 1 

3550 continue 

3551 

3552 finally: 

3553 lockRunning.release() 3553 ↛ 3558line 3553 didn't jump to line 3558, because the break on line 3516 wasn't executed

3554 except: 

3555 task_status = "failure" 

3556 raise 

3557 finally: 

3558 sr.xapi.set_task_status(task_status) 

3559 Util.log("GC process exiting, no work left") 

3560 _create_init_file(sr.uuid) 

3561 lockActive.release() 

3562 

3563 

3564def _xapi_enabled(session, hostref): 

3565 host = session.xenapi.host.get_record(hostref) 

3566 return host['enabled'] 

3567 

3568 

3569def _ensure_xapi_initialised(session): 

3570 """ 

3571 Don't want to start GC until Xapi is fully initialised 

3572 """ 

3573 local_session = None 

3574 if session is None: 

3575 local_session = util.get_localAPI_session() 

3576 session = local_session 

3577 

3578 try: 

3579 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3580 while not _xapi_enabled(session, hostref): 

3581 util.SMlog("Xapi not ready, GC waiting") 

3582 time.sleep(15) 

3583 finally: 

3584 if local_session is not None: 

3585 local_session.xenapi.session.logout() 

3586 

3587def _gc(session, srUuid, dryRun=False, immediate=False): 

3588 init(srUuid) 

3589 _ensure_xapi_initialised(session) 

3590 sr = SR.getInstance(srUuid, session) 

3591 if not sr.gcEnabled(False): 3591 ↛ 3592line 3591 didn't jump to line 3592, because the condition on line 3591 was never true

3592 return 

3593 

3594 sr.cleanupCache() 

3595 try: 

3596 _gcLoop(sr, dryRun, immediate=immediate) 

3597 finally: 

3598 sr.cleanup() 

3599 sr.logFilter.logState() 

3600 del sr.xapi 

3601 

3602 

3603def _abort(srUuid, soft=False): 

3604 """Aborts an GC/coalesce. 

3605 

3606 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3607 soft: If set to True and there is a pending abort signal, the function 

3608 doesn't do anything. If set to False, a new abort signal is issued. 

3609 

3610 returns: If soft is set to False, we return True holding lockActive. If 

3611 soft is set to False and an abort signal is pending, we return False 

3612 without holding lockActive. An exception is raised in case of error.""" 

3613 Util.log("=== SR %s: abort ===" % (srUuid)) 

3614 init(srUuid) 

3615 if not lockActive.acquireNoblock(): 

3616 gotLock = False 

3617 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3618 abortFlag = IPCFlag(srUuid) 

3619 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3620 return False 

3621 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3622 gotLock = lockActive.acquireNoblock() 

3623 if gotLock: 

3624 break 

3625 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3626 abortFlag.clear(FLAG_TYPE_ABORT) 

3627 if not gotLock: 

3628 raise util.CommandException(code=errno.ETIMEDOUT, 

3629 reason="SR %s: error aborting existing process" % srUuid) 

3630 return True 

3631 

3632 

3633def init(srUuid): 

3634 global lockRunning 

3635 if not lockRunning: 3635 ↛ 3636line 3635 didn't jump to line 3636, because the condition on line 3635 was never true

3636 lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) 

3637 global lockActive 

3638 if not lockActive: 3638 ↛ 3639line 3638 didn't jump to line 3639, because the condition on line 3638 was never true

3639 lockActive = LockActive(srUuid) 

3640 

3641 

3642class LockActive: 

3643 """ 

3644 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3645 if another process holds the SR lock. 

3646 """ 

3647 def __init__(self, srUuid): 

3648 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3649 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3650 

3651 def acquireNoblock(self): 

3652 self._srLock.acquire() 

3653 

3654 try: 

3655 return self._lock.acquireNoblock() 

3656 finally: 

3657 self._srLock.release() 

3658 

3659 def release(self): 

3660 self._lock.release() 

3661 

3662 

3663def usage(): 

3664 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3665 

3666Parameters: 

3667 -u --uuid UUID SR UUID 

3668 and one of: 

3669 -g --gc garbage collect, coalesce, and repeat while there is work 

3670 -G --gc_force garbage collect once, aborting any current operations 

3671 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3672 max_age hours 

3673 -a --abort abort any currently running operation (GC or coalesce) 

3674 -q --query query the current state (GC'ing, coalescing or not running) 

3675 -x --disable disable GC/coalesce (will be in effect until you exit) 

3676 -t --debug see Debug below 

3677 

3678Options: 

3679 -b --background run in background (return immediately) (valid for -g only) 

3680 -f --force continue in the presence of VHDs with errors (when doing 

3681 GC, this might cause removal of any such VHDs) (only valid 

3682 for -G) (DANGEROUS) 

3683 

3684Debug: 

3685 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3686 purposes. ** NEVER USE IT ON A LIVE VM ** 

3687 The following parameters are required: 

3688 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3689 "deflate". 

3690 -v --vdi_uuid VDI UUID 

3691 """ 

3692 #-d --dry-run don't actually perform any SR-modifying operations 

3693 print(output) 

3694 Util.log("(Invalid usage)") 

3695 sys.exit(1) 

3696 

3697 

3698############################################################################## 

3699# 

3700# API 

3701# 

3702def abort(srUuid, soft=False): 

3703 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3704 """ 

3705 if _abort(srUuid, soft): 

3706 Util.log("abort: releasing the process lock") 

3707 lockActive.release() 

3708 return True 

3709 else: 

3710 return False 

3711 

3712 

3713def gc(session, srUuid, inBackground, dryRun=False): 

3714 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3715 immediately if inBackground=True. 

3716 

3717 The following algorithm is used: 

3718 1. If we are already GC'ing in this SR, return 

3719 2. If we are already coalescing a VDI pair: 

3720 a. Scan the SR and determine if the VDI pair is GC'able 

3721 b. If the pair is not GC'able, return 

3722 c. If the pair is GC'able, abort coalesce 

3723 3. Scan the SR 

3724 4. If there is nothing to collect, nor to coalesce, return 

3725 5. If there is something to collect, GC all, then goto 3 

3726 6. If there is something to coalesce, coalesce one pair, then goto 3 

3727 """ 

3728 Util.log("=== SR %s: gc ===" % srUuid) 

3729 if inBackground: 

3730 if daemonize(): 

3731 # we are now running in the background. Catch & log any errors 

3732 # because there is no other way to propagate them back at this 

3733 # point 

3734 

3735 try: 

3736 _gc(None, srUuid, dryRun) 

3737 except AbortException: 

3738 Util.log("Aborted") 

3739 except Exception: 

3740 Util.logException("gc") 

3741 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3742 os._exit(0) 

3743 else: 

3744 _gc(session, srUuid, dryRun, immediate=True) 

3745 

3746 

3747def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3748 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3749 the SR lock is held. 

3750 The following algorithm is used: 

3751 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3752 2. Scan the SR 

3753 3. GC 

3754 4. return 

3755 """ 

3756 Util.log("=== SR %s: gc_force ===" % srUuid) 

3757 init(srUuid) 

3758 sr = SR.getInstance(srUuid, session, lockSR, True) 

3759 if not lockActive.acquireNoblock(): 

3760 abort(srUuid) 

3761 else: 

3762 Util.log("Nothing was running, clear to proceed") 

3763 

3764 if force: 

3765 Util.log("FORCED: will continue even if there are VHD errors") 

3766 sr.scanLocked(force) 

3767 sr.cleanupCoalesceJournals() 

3768 

3769 try: 

3770 sr.cleanupCache() 

3771 sr.garbageCollect(dryRun) 

3772 finally: 

3773 sr.cleanup() 

3774 sr.logFilter.logState() 

3775 lockActive.release() 

3776 

3777 

3778def get_state(srUuid): 

3779 """Return whether GC/coalesce is currently running or not. The information 

3780 is not guaranteed for any length of time if the call is not protected by 

3781 locking. 

3782 """ 

3783 init(srUuid) 

3784 if lockActive.acquireNoblock(): 

3785 lockActive.release() 

3786 return False 

3787 return True 

3788 

3789 

3790def should_preempt(session, srUuid): 

3791 sr = SR.getInstance(srUuid, session) 

3792 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3793 if len(entries) == 0: 

3794 return False 

3795 elif len(entries) > 1: 

3796 raise util.SMException("More than one coalesce entry: " + str(entries)) 

3797 sr.scanLocked() 

3798 coalescedUuid = entries.popitem()[0] 

3799 garbage = sr.findGarbage() 

3800 for vdi in garbage: 

3801 if vdi.uuid == coalescedUuid: 

3802 return True 

3803 return False 

3804 

3805 

3806def get_coalesceable_leaves(session, srUuid, vdiUuids): 

3807 coalesceable = [] 

3808 sr = SR.getInstance(srUuid, session) 

3809 sr.scanLocked() 

3810 for uuid in vdiUuids: 

3811 vdi = sr.getVDI(uuid) 

3812 if not vdi: 

3813 raise util.SMException("VDI %s not found" % uuid) 

3814 if vdi.isLeafCoalesceable(): 

3815 coalesceable.append(uuid) 

3816 return coalesceable 

3817 

3818 

3819def cache_cleanup(session, srUuid, maxAge): 

3820 sr = SR.getInstance(srUuid, session) 

3821 return sr.cleanupCache(maxAge) 

3822 

3823 

3824def debug(sr_uuid, cmd, vdi_uuid): 

3825 Util.log("Debug command: %s" % cmd) 

3826 sr = SR.getInstance(sr_uuid, None) 

3827 if not isinstance(sr, LVHDSR): 

3828 print("Error: not an LVHD SR") 

3829 return 

3830 sr.scanLocked() 

3831 vdi = sr.getVDI(vdi_uuid) 

3832 if not vdi: 

3833 print("Error: VDI %s not found") 

3834 return 

3835 print("Running %s on SR %s" % (cmd, sr)) 

3836 print("VDI before: %s" % vdi) 

3837 if cmd == "activate": 

3838 vdi._activate() 

3839 print("VDI file: %s" % vdi.path) 

3840 if cmd == "deactivate": 

3841 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

3842 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

3843 if cmd == "inflate": 

3844 vdi.inflateFully() 

3845 sr.cleanup() 

3846 if cmd == "deflate": 

3847 vdi.deflate() 

3848 sr.cleanup() 

3849 sr.scanLocked() 

3850 print("VDI after: %s" % vdi) 

3851 

3852 

3853def abort_optional_reenable(uuid): 

3854 print("Disabling GC/coalesce for %s" % uuid) 

3855 ret = _abort(uuid) 

3856 input("Press enter to re-enable...") 

3857 print("GC/coalesce re-enabled") 

3858 lockRunning.release() 

3859 if ret: 

3860 lockActive.release() 

3861 

3862 

3863############################################################################## 

3864# 

3865# CLI 

3866# 

3867def main(): 

3868 action = "" 

3869 uuid = "" 

3870 background = False 

3871 force = False 

3872 dryRun = False 

3873 debug_cmd = "" 

3874 vdi_uuid = "" 

3875 shortArgs = "gGc:aqxu:bfdt:v:" 

3876 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

3877 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

3878 

3879 try: 

3880 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

3881 except getopt.GetoptError: 

3882 usage() 

3883 for o, a in opts: 

3884 if o in ("-g", "--gc"): 

3885 action = "gc" 

3886 if o in ("-G", "--gc_force"): 

3887 action = "gc_force" 

3888 if o in ("-c", "--clean_cache"): 

3889 action = "clean_cache" 

3890 maxAge = int(a) 

3891 if o in ("-a", "--abort"): 

3892 action = "abort" 

3893 if o in ("-q", "--query"): 

3894 action = "query" 

3895 if o in ("-x", "--disable"): 

3896 action = "disable" 

3897 if o in ("-u", "--uuid"): 

3898 uuid = a 

3899 if o in ("-b", "--background"): 

3900 background = True 

3901 if o in ("-f", "--force"): 

3902 force = True 

3903 if o in ("-d", "--dry-run"): 

3904 Util.log("Dry run mode") 

3905 dryRun = True 

3906 if o in ("-t", "--debug"): 

3907 action = "debug" 

3908 debug_cmd = a 

3909 if o in ("-v", "--vdi_uuid"): 

3910 vdi_uuid = a 

3911 

3912 if not action or not uuid: 

3913 usage() 

3914 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

3915 action != "debug" and (debug_cmd or vdi_uuid): 

3916 usage() 

3917 

3918 if action != "query" and action != "debug": 

3919 print("All output goes to log") 

3920 

3921 if action == "gc": 

3922 gc(None, uuid, background, dryRun) 

3923 elif action == "gc_force": 

3924 gc_force(None, uuid, force, dryRun, True) 

3925 elif action == "clean_cache": 

3926 cache_cleanup(None, uuid, maxAge) 

3927 elif action == "abort": 

3928 abort(uuid) 

3929 elif action == "query": 

3930 print("Currently running: %s" % get_state(uuid)) 

3931 elif action == "disable": 

3932 abort_optional_reenable(uuid) 

3933 elif action == "debug": 

3934 debug(uuid, debug_cmd, vdi_uuid) 

3935 

3936 

3937if __name__ == '__main__': 3937 ↛ 3938line 3937 didn't jump to line 3938, because the condition on line 3937 was never true

3938 main()