001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver;
020
021import java.io.Closeable;
022import java.io.IOException;
023import java.util.Map.Entry;
024import java.util.concurrent.ConcurrentHashMap;
025import java.util.concurrent.atomic.AtomicBoolean;
026import java.util.concurrent.locks.ReentrantLock;
027
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.Server;
030import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
031import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
032import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
033import org.apache.hadoop.hbase.util.Bytes;
034import org.apache.hadoop.hbase.util.HasThread;
035import org.apache.hadoop.hbase.wal.WAL;
036import org.apache.hadoop.ipc.RemoteException;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
041
042/**
043 * Runs periodically to determine if the WAL should be rolled.
044 *
045 * NOTE: This class extends Thread rather than Chore because the sleep time
046 * can be interrupted when there is something to do, rather than the Chore
047 * sleep time which is invariant.
048 *
049 * TODO: change to a pool of threads
050 */
051@InterfaceAudience.Private
052@VisibleForTesting
053public class LogRoller extends HasThread implements Closeable {
054  private static final Logger LOG = LoggerFactory.getLogger(LogRoller.class);
055  private final ReentrantLock rollLock = new ReentrantLock();
056  private final AtomicBoolean rollLog = new AtomicBoolean(false);
057  private final ConcurrentHashMap<WAL, Boolean> walNeedsRoll = new ConcurrentHashMap<>();
058  private final Server server;
059  protected final RegionServerServices services;
060  private volatile long lastrolltime = System.currentTimeMillis();
061  // Period to roll log.
062  private final long rollperiod;
063  private final int threadWakeFrequency;
064  // The interval to check low replication on hlog's pipeline
065  private long checkLowReplicationInterval;
066
067  private volatile boolean running = true;
068
069  public void addWAL(final WAL wal) {
070    if (null == walNeedsRoll.putIfAbsent(wal, Boolean.FALSE)) {
071      wal.registerWALActionsListener(new WALActionsListener() {
072        @Override
073        public void logRollRequested(boolean lowReplicas) {
074          walNeedsRoll.put(wal, Boolean.TRUE);
075          // TODO logs will contend with each other here, replace with e.g. DelayedQueue
076          synchronized(rollLog) {
077            rollLog.set(true);
078            rollLog.notifyAll();
079          }
080        }
081      });
082    }
083  }
084
085  public void requestRollAll() {
086    for (WAL wal : walNeedsRoll.keySet()) {
087      walNeedsRoll.put(wal, Boolean.TRUE);
088    }
089    synchronized(rollLog) {
090      rollLog.set(true);
091      rollLog.notifyAll();
092    }
093  }
094
095  /** @param server */
096  public LogRoller(final Server server, final RegionServerServices services) {
097    super("LogRoller");
098    this.server = server;
099    this.services = services;
100    this.rollperiod = this.server.getConfiguration().
101      getLong("hbase.regionserver.logroll.period", 3600000);
102    this.threadWakeFrequency = this.server.getConfiguration().
103      getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
104    this.checkLowReplicationInterval = this.server.getConfiguration().getLong(
105        "hbase.regionserver.hlog.check.lowreplication.interval", 30 * 1000);
106  }
107
108  @Override
109  public void interrupt() {
110    // Wake up if we are waiting on rollLog. For tests.
111    synchronized (rollLog) {
112      this.rollLog.notify();
113    }
114    super.interrupt();
115  }
116
117  /**
118   * we need to check low replication in period, see HBASE-18132
119   */
120  void checkLowReplication(long now) {
121    try {
122      for (Entry<WAL, Boolean> entry : walNeedsRoll.entrySet()) {
123        WAL wal = entry.getKey();
124        boolean needRollAlready = entry.getValue();
125        if (needRollAlready || !(wal instanceof AbstractFSWAL)) {
126          continue;
127        }
128        ((AbstractFSWAL<?>) wal).checkLogLowReplication(checkLowReplicationInterval);
129      }
130    } catch (Throwable e) {
131      LOG.warn("Failed checking low replication", e);
132    }
133  }
134
135  private void abort(String reason, Throwable cause) {
136    // close all WALs before calling abort on RS.
137    // This is because AsyncFSWAL replies on us for rolling a new writer to make progress, and if we
138    // failed, AsyncFSWAL may be stuck, so we need to close it to let the upper layer know that it
139    // is already broken.
140    for (WAL wal : walNeedsRoll.keySet()) {
141      // shutdown rather than close here since we are going to abort the RS and the wals need to be
142      // split when recovery
143      try {
144        wal.shutdown();
145      } catch (IOException e) {
146        LOG.warn("Failed to shutdown wal", e);
147      }
148    }
149    server.abort(reason, cause);
150  }
151
152  @Override
153  public void run() {
154    while (running) {
155      long now = System.currentTimeMillis();
156      checkLowReplication(now);
157      boolean periodic = false;
158      if (!rollLog.get()) {
159        periodic = (now - this.lastrolltime) > this.rollperiod;
160        if (!periodic) {
161          synchronized (rollLog) {
162            try {
163              if (!rollLog.get()) {
164                rollLog.wait(this.threadWakeFrequency);
165              }
166            } catch (InterruptedException e) {
167              // Fall through
168            }
169          }
170          continue;
171        }
172        // Time for periodic roll
173        LOG.debug("Wal roll period {} ms elapsed", this.rollperiod);
174      } else {
175        LOG.debug("WAL roll requested");
176      }
177      rollLock.lock(); // FindBugs UL_UNRELEASED_LOCK_EXCEPTION_PATH
178      try {
179        this.lastrolltime = now;
180        for (Entry<WAL, Boolean> entry : walNeedsRoll.entrySet()) {
181          final WAL wal = entry.getKey();
182          // Force the roll if the logroll.period is elapsed or if a roll was requested.
183          // The returned value is an array of actual region names.
184          final byte [][] regionsToFlush = wal.rollWriter(periodic ||
185              entry.getValue().booleanValue());
186          walNeedsRoll.put(wal, Boolean.FALSE);
187          if (regionsToFlush != null) {
188            for (byte[] r : regionsToFlush) {
189              scheduleFlush(r);
190            }
191          }
192        }
193      } catch (FailedLogCloseException e) {
194        abort("Failed log close in log roller", e);
195      } catch (java.net.ConnectException e) {
196        abort("Failed log close in log roller", e);
197      } catch (IOException ex) {
198        // Abort if we get here.  We probably won't recover an IOE. HBASE-1132
199        abort("IOE in log roller",
200          ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex);
201      } catch (Exception ex) {
202        LOG.error("Log rolling failed", ex);
203        abort("Log rolling failed", ex);
204      } finally {
205        try {
206          rollLog.set(false);
207        } finally {
208          rollLock.unlock();
209        }
210      }
211    }
212    LOG.info("LogRoller exiting.");
213  }
214
215  /**
216   * @param encodedRegionName Encoded name of region to flush.
217   */
218  private void scheduleFlush(final byte [] encodedRegionName) {
219    boolean scheduled = false;
220    HRegion r = (HRegion) this.services.getRegion(Bytes.toString(encodedRegionName));
221    FlushRequester requester = null;
222    if (r != null) {
223      requester = this.services.getFlushRequester();
224      if (requester != null) {
225        // force flushing all stores to clean old logs
226        requester.requestFlush(r, true, FlushLifeCycleTracker.DUMMY);
227        scheduled = true;
228      }
229    }
230    if (!scheduled) {
231      LOG.warn("Failed to schedule flush of {}, region={}, requester={}",
232        Bytes.toString(encodedRegionName), r, requester);
233    }
234  }
235
236  /**
237   * For testing only
238   * @return true if all WAL roll finished
239   */
240  @VisibleForTesting
241  public boolean walRollFinished() {
242    for (boolean needRoll : walNeedsRoll.values()) {
243      if (needRoll) {
244        return false;
245      }
246    }
247    return true;
248  }
249
250  @Override
251  public void close() {
252    running = false;
253    interrupt();
254  }
255}