001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver; 020 021import java.io.Closeable; 022import java.io.IOException; 023import java.util.Map.Entry; 024import java.util.concurrent.ConcurrentHashMap; 025import java.util.concurrent.atomic.AtomicBoolean; 026import java.util.concurrent.locks.ReentrantLock; 027 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.Server; 030import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL; 031import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 032import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 033import org.apache.hadoop.hbase.util.Bytes; 034import org.apache.hadoop.hbase.util.HasThread; 035import org.apache.hadoop.hbase.wal.WAL; 036import org.apache.hadoop.ipc.RemoteException; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 041 042/** 043 * Runs periodically to determine if the WAL should be rolled. 044 * 045 * NOTE: This class extends Thread rather than Chore because the sleep time 046 * can be interrupted when there is something to do, rather than the Chore 047 * sleep time which is invariant. 048 * 049 * TODO: change to a pool of threads 050 */ 051@InterfaceAudience.Private 052@VisibleForTesting 053public class LogRoller extends HasThread implements Closeable { 054 private static final Logger LOG = LoggerFactory.getLogger(LogRoller.class); 055 private final ReentrantLock rollLock = new ReentrantLock(); 056 private final AtomicBoolean rollLog = new AtomicBoolean(false); 057 private final ConcurrentHashMap<WAL, Boolean> walNeedsRoll = new ConcurrentHashMap<>(); 058 private final Server server; 059 protected final RegionServerServices services; 060 private volatile long lastrolltime = System.currentTimeMillis(); 061 // Period to roll log. 062 private final long rollperiod; 063 private final int threadWakeFrequency; 064 // The interval to check low replication on hlog's pipeline 065 private long checkLowReplicationInterval; 066 067 private volatile boolean running = true; 068 069 public void addWAL(final WAL wal) { 070 if (null == walNeedsRoll.putIfAbsent(wal, Boolean.FALSE)) { 071 wal.registerWALActionsListener(new WALActionsListener() { 072 @Override 073 public void logRollRequested(boolean lowReplicas) { 074 walNeedsRoll.put(wal, Boolean.TRUE); 075 // TODO logs will contend with each other here, replace with e.g. DelayedQueue 076 synchronized(rollLog) { 077 rollLog.set(true); 078 rollLog.notifyAll(); 079 } 080 } 081 }); 082 } 083 } 084 085 public void requestRollAll() { 086 for (WAL wal : walNeedsRoll.keySet()) { 087 walNeedsRoll.put(wal, Boolean.TRUE); 088 } 089 synchronized(rollLog) { 090 rollLog.set(true); 091 rollLog.notifyAll(); 092 } 093 } 094 095 /** @param server */ 096 public LogRoller(final Server server, final RegionServerServices services) { 097 super("LogRoller"); 098 this.server = server; 099 this.services = services; 100 this.rollperiod = this.server.getConfiguration(). 101 getLong("hbase.regionserver.logroll.period", 3600000); 102 this.threadWakeFrequency = this.server.getConfiguration(). 103 getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); 104 this.checkLowReplicationInterval = this.server.getConfiguration().getLong( 105 "hbase.regionserver.hlog.check.lowreplication.interval", 30 * 1000); 106 } 107 108 @Override 109 public void interrupt() { 110 // Wake up if we are waiting on rollLog. For tests. 111 synchronized (rollLog) { 112 this.rollLog.notify(); 113 } 114 super.interrupt(); 115 } 116 117 /** 118 * we need to check low replication in period, see HBASE-18132 119 */ 120 void checkLowReplication(long now) { 121 try { 122 for (Entry<WAL, Boolean> entry : walNeedsRoll.entrySet()) { 123 WAL wal = entry.getKey(); 124 boolean needRollAlready = entry.getValue(); 125 if (needRollAlready || !(wal instanceof AbstractFSWAL)) { 126 continue; 127 } 128 ((AbstractFSWAL<?>) wal).checkLogLowReplication(checkLowReplicationInterval); 129 } 130 } catch (Throwable e) { 131 LOG.warn("Failed checking low replication", e); 132 } 133 } 134 135 private void abort(String reason, Throwable cause) { 136 // close all WALs before calling abort on RS. 137 // This is because AsyncFSWAL replies on us for rolling a new writer to make progress, and if we 138 // failed, AsyncFSWAL may be stuck, so we need to close it to let the upper layer know that it 139 // is already broken. 140 for (WAL wal : walNeedsRoll.keySet()) { 141 // shutdown rather than close here since we are going to abort the RS and the wals need to be 142 // split when recovery 143 try { 144 wal.shutdown(); 145 } catch (IOException e) { 146 LOG.warn("Failed to shutdown wal", e); 147 } 148 } 149 server.abort(reason, cause); 150 } 151 152 @Override 153 public void run() { 154 while (running) { 155 long now = System.currentTimeMillis(); 156 checkLowReplication(now); 157 boolean periodic = false; 158 if (!rollLog.get()) { 159 periodic = (now - this.lastrolltime) > this.rollperiod; 160 if (!periodic) { 161 synchronized (rollLog) { 162 try { 163 if (!rollLog.get()) { 164 rollLog.wait(this.threadWakeFrequency); 165 } 166 } catch (InterruptedException e) { 167 // Fall through 168 } 169 } 170 continue; 171 } 172 // Time for periodic roll 173 LOG.debug("Wal roll period {} ms elapsed", this.rollperiod); 174 } else { 175 LOG.debug("WAL roll requested"); 176 } 177 rollLock.lock(); // FindBugs UL_UNRELEASED_LOCK_EXCEPTION_PATH 178 try { 179 this.lastrolltime = now; 180 for (Entry<WAL, Boolean> entry : walNeedsRoll.entrySet()) { 181 final WAL wal = entry.getKey(); 182 // Force the roll if the logroll.period is elapsed or if a roll was requested. 183 // The returned value is an array of actual region names. 184 final byte [][] regionsToFlush = wal.rollWriter(periodic || 185 entry.getValue().booleanValue()); 186 walNeedsRoll.put(wal, Boolean.FALSE); 187 if (regionsToFlush != null) { 188 for (byte[] r : regionsToFlush) { 189 scheduleFlush(r); 190 } 191 } 192 } 193 } catch (FailedLogCloseException e) { 194 abort("Failed log close in log roller", e); 195 } catch (java.net.ConnectException e) { 196 abort("Failed log close in log roller", e); 197 } catch (IOException ex) { 198 // Abort if we get here. We probably won't recover an IOE. HBASE-1132 199 abort("IOE in log roller", 200 ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex); 201 } catch (Exception ex) { 202 LOG.error("Log rolling failed", ex); 203 abort("Log rolling failed", ex); 204 } finally { 205 try { 206 rollLog.set(false); 207 } finally { 208 rollLock.unlock(); 209 } 210 } 211 } 212 LOG.info("LogRoller exiting."); 213 } 214 215 /** 216 * @param encodedRegionName Encoded name of region to flush. 217 */ 218 private void scheduleFlush(final byte [] encodedRegionName) { 219 boolean scheduled = false; 220 HRegion r = (HRegion) this.services.getRegion(Bytes.toString(encodedRegionName)); 221 FlushRequester requester = null; 222 if (r != null) { 223 requester = this.services.getFlushRequester(); 224 if (requester != null) { 225 // force flushing all stores to clean old logs 226 requester.requestFlush(r, true, FlushLifeCycleTracker.DUMMY); 227 scheduled = true; 228 } 229 } 230 if (!scheduled) { 231 LOG.warn("Failed to schedule flush of {}, region={}, requester={}", 232 Bytes.toString(encodedRegionName), r, requester); 233 } 234 } 235 236 /** 237 * For testing only 238 * @return true if all WAL roll finished 239 */ 240 @VisibleForTesting 241 public boolean walRollFinished() { 242 for (boolean needRoll : walNeedsRoll.values()) { 243 if (needRoll) { 244 return false; 245 } 246 } 247 return true; 248 } 249 250 @Override 251 public void close() { 252 running = false; 253 interrupt(); 254 } 255}