现在的位置: 首页 > 综合 > 正文

HBase☞ZooKeeperWatcher

2013年10月11日 ⁄ 综合 ⁄ 共 12951字 ⁄ 字号 评论关闭
 声明:本人看的源代码,翻译的一些注释,抛砖引玉

/**
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    
http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.zookeeper;

import java.util.concurrent.CopyOnWriteArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.util.Threads;

/**
 * Acts as the single ZooKeeper Watcher.  One instance of this is instantiated
 * for each Master, RegionServer, and client process.
 *作为单个的ZooKeeper的监视器.一个被实例化成每个master RegionServer 和客户端进程的实例
 * <p>This is the only class that implements
{@link Watcher}.  Other internal
 * classes which need to be notified of ZooKeeper events must register with
 * the local instance of this watcher via
{@link #registerListener}.
 * 这个只实现了Watcher类,其他需要被ZooKeeper events通知的内部类必须注册在本地化实例的watcher 通过#registerListener实现
 * <p>This class also holds and manages the connection to ZooKeeper.  Code to
 * deal with connection related events and exceptions are handled here.
 * 这个类也持有和管理ZooKeeper的连接.处理连接的相关事件和异常
 */
public class ZooKeeperWatcher implements Watcher, Abortable {
  private static final Log LOG = LogFactory.getLog(ZooKeeperWatcher.class);

  // Identifiier for this watcher (for logging only).  Its made of the prefix
  //识别这个监视 (针对日志) 它带有前缀
  // passed on construction and the zookeeper sessionid.
  //传递结构和ZooKeeper 会话ID
  private String identifier;

  // zookeeper quorum
  private String quorum;

  // zookeeper connection
  private ZooKeeper zooKeeper;

  // abortable in case of zk failure
  private Abortable abortable;

  // listeners to be notified
  //监听事件列表
  private final List<ZooKeeperListener> listeners =
    new CopyOnWriteArrayList<ZooKeeperListener>();

  // set of unassigned nodes watched
  private Set<String> unassignedNodes = new HashSet<String>();

  // node names

  // base znode for this cluster
  // 集群的顶层Znode
  public String baseZNode;
  // znode containing location of server hosting root region
  // z-node 包含服务器托管根区的位置

  public String rootServerZNode;
  // znode containing ephemeral nodes of the regionservers
  //域服务临时节点
  public String rsZNode;
  // znode of currently active master
  //当前工作的master节点
  public String masterAddressZNode;
  // znode containing the current cluster state
  // 包含当前集群的状态
  public String clusterStateZNode;
  // znode used for region transitioning and assignment
  // z-node 域的过渡和分配
  public String assignmentZNode;
  // znode used for table disabling/enabling
  public String tableZNode;

  private final Configuration conf;

  private final Exception constructorCaller;

  /**
   * Instantiate a ZooKeeper connection and watcher.
   * 实例化一个ZooKeeper 连接和监视
   * @param descriptor Descriptive string that is added to zookeeper sessionid
   * and used as identifier for this instance.
   * @throws IOException
   * @throws ZooKeeperConnectionException
   */
  public ZooKeeperWatcher(Configuration conf, String descriptor,
      Abortable abortable)
  throws IOException, ZooKeeperConnectionException {
    this.conf = conf;
    // Capture a stack trace now.  Will print it out later if problem so we can
    // distingush amongst the myriad ZKWs.
    try {
      throw new Exception("ZKW CONSTRUCTOR STACK TRACE FOR DEBUGGING");
    } catch (Exception e) {
      this.constructorCaller = e;
    }
    this.quorum = ZKConfig.getZKQuorumServersString(conf);
    // Identifier will get the sessionid appended later below down when we
    // handle the syncconnect event.
    this.identifier = descriptor;
    this.abortable = abortable;
    setNodeNames(conf);
    this.zooKeeper = ZKUtil.connect(conf, quorum, this, descriptor);
    try {
      // Create all the necessary "directories" of znodes 创建必要的节点目录
      // TODO: Move this to an init method somewhere so not everyone calls it?
      // 初始化 对外是隐蔽的
      // The first call against zk can fail with connection loss.  Seems common.
      // Apparently this is recoverable.  Retry a while.
      // 连接失败后第一次调用ZK
      // See
http://wiki.apache.org/hadoop/ZooKeeper/ErrorHandling
      // TODO: Generalize out in ZKUtil.
      long wait = conf.getLong("hbase.zookeeper.recoverable.waittime", 10000);
      long finished = System.currentTimeMillis() + wait;
      KeeperException ke = null;
      do {
        try {
          ZKUtil.createAndFailSilent(this, baseZNode);
          ke = null;
          break;
        } catch (KeeperException.ConnectionLossException e) {
          if (LOG.isDebugEnabled() && (isFinishedRetryingRecoverable(finished))) {
            LOG.debug("Retrying zk create for another " +
              (finished - System.currentTimeMillis()) +
              "ms; set 'hbase.zookeeper.recoverable.waittime' to change " +
              "wait time); " + e.getMessage());
          }
          ke = e;
        }
      } while (isFinishedRetryingRecoverable(finished));
      // Convert connectionloss exception to ZKCE.
      if (ke != null) {
        try {
          // If we don't close it, the zk connection managers won't be killed
          this.zooKeeper.close();
        } catch (InterruptedException e) {
          Thread.currentThread().interrupt();
          LOG.warn("Interrupted while closing", e);
        }
        throw new ZooKeeperConnectionException("HBase is able to connect to" +
            " ZooKeeper but the connection closes immediately. This could be" +
            " a sign that the server has too many connections (30 is the" +
            " default). Consider inspecting your ZK server logs for that" +
            " error and then make sure you are reusing HBaseConfiguration" +
            " as often as you can. See HTable's javadoc for more information.",
            ke);
      }
      ZKUtil.createAndFailSilent(this, assignmentZNode);
      ZKUtil.createAndFailSilent(this, rsZNode);
      ZKUtil.createAndFailSilent(this, tableZNode);
    } catch (KeeperException e) {
      throw new ZooKeeperConnectionException(
          prefix("Unexpected KeeperException creating base node"), e);
    }
  }
//通过时间判断
  private boolean isFinishedRetryingRecoverable(final long finished) {
    return System.currentTimeMillis() < finished;
  }

  @Override
  public String toString() {
    return this.identifier;
  }

  /**
   * Adds this instance's identifier as a prefix to the passed <code>str</code>
   * @param str String to amend.
   * @return A new string with this instance's identifier as prefix: e.g.
   * if passed 'hello world', the returned string could be
   */
  public String prefix(final String str) {
    return this.toString() + " " + str;
  }

  /**
   * Set the local variable node names using the specified configuration.
   * 设置本地变量的节点名字通过configuration配置
   *
   * 这是关键跟H-base配置相关连 HConstants的常量配置
   */
  private void setNodeNames(Configuration conf) {
    baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
        HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
    rootServerZNode = ZKUtil.joinZNode(baseZNode,
        conf.get("zookeeper.znode.rootserver", "root-region-server"));
    rsZNode = ZKUtil.joinZNode(baseZNode,
        conf.get("zookeeper.znode.rs", "rs"));
    masterAddressZNode = ZKUtil.joinZNode(baseZNode,
        conf.get("zookeeper.znode.master", "master"));
    clusterStateZNode = ZKUtil.joinZNode(baseZNode,
        conf.get("zookeeper.znode.state", "shutdown"));
    assignmentZNode = ZKUtil.joinZNode(baseZNode,
        conf.get("zookeeper.znode.unassigned", "unassigned"));
    tableZNode = ZKUtil.joinZNode(baseZNode,
        conf.get("zookeeper.znode.tableEnableDisable", "table"));
  }

  /**
   * Register the specified listener to receive ZooKeeper events.
   * @param listener
   */
  public void registerListener(ZooKeeperListener listener) {
    listeners.add(listener);
  }

  /**
   * Register the specified listener to receive ZooKeeper events and add it as
   * the first in the list of current listeners.
   * @param listener
   */
  public void registerListenerFirst(ZooKeeperListener listener) {
    listeners.add(0, listener);
  }

  /**
   * Get the connection to ZooKeeper.
   * @return connection reference to zookeeper
   */
  public ZooKeeper getZooKeeper() {
    return zooKeeper;
  }

  /**
   * Get the quorum address of this instance.
   * @return quorum string of this zookeeper connection instance
   */
  public String getQuorum() {
    return quorum;
  }

  /**
   * Method called from ZooKeeper for events and connection status.
   *
   * Valid events are passed along to listeners.  Connection status changes
   * are dealt with locally.
   */
  @Override
  public void process(WatchedEvent event) {
    LOG.debug(prefix("Received ZooKeeper Event, " +
        "type=" + event.getType() + ", " +
        "state=" + event.getState() + ", " +
        "path=" + event.getPath()));

    switch(event.getType()) {

      // If event type is NONE, this is a connection status change
      case None: {
        connectionEvent(event);
        break;
      }

      // Otherwise pass along to the listeners

      case NodeCreated: {
        for(ZooKeeperListener listener : listeners) {
          listener.nodeCreated(event.getPath());
        }
        break;
      }

      case NodeDeleted: {
        for(ZooKeeperListener listener : listeners) {
          listener.nodeDeleted(event.getPath());
        }
        break;
      }

      case NodeDataChanged: {
        for(ZooKeeperListener listener : listeners) {
          listener.nodeDataChanged(event.getPath());
        }
        break;
      }

      case NodeChildrenChanged: {
        for(ZooKeeperListener listener : listeners) {
          listener.nodeChildrenChanged(event.getPath());
        }
        break;
      }
    }
  }

  // Connection management

  /**
   * Called when there is a connection-related event via the Watcher callback.
   * 当有相关连接事件时通过Watcher的回调调用这个方法
   * If Disconnected or Expired, this should shutdown the cluster. But, since
   * we send a KeeperException.SessionExpiredException along with the abort
   * call, it's possible for the Abortable to catch it and try to create a new
   * session with ZooKeeper. This is what the client does in HCM.
   *如果失去连接或者过期,应该关闭集群,但是,可以发送KeeperException,SessionExireException异常去放弃这个方法
   *它也可能不捕获异常而创建新会话连接与ZooKeeper ,这个客户端在HCM
   * @param event
   */
  private void connectionEvent(WatchedEvent event) {
    switch(event.getState()) {
      case SyncConnected:
        // Now, this callback can be invoked before the this.zookeeper is set.
        // Wait a little while.
        long finished = System.currentTimeMillis() +
          this.conf.getLong("hbase.zookeeper.watcher.sync.connected.wait", 2000);
        while (System.currentTimeMillis() < finished) {
          Threads.sleep(1);
          if (this.zooKeeper != null) break;
        }
        if (this.zooKeeper == null) {
          LOG.error("ZK is null on connection event -- see stack trace " +
            "for the stack trace when constructor was called on this zkw",
            this.constructorCaller);
          throw new NullPointerException("ZK is null");
        }
        this.identifier = this.identifier + "-0x" +
          Long.toHexString(this.zooKeeper.getSessionId());
        // Update our identifier.  Otherwise ignore.
        LOG.debug(this.identifier + " connected");
        break;

      // Abort the server if Disconnected or Expired
           case Disconnected:
        LOG.debug(prefix("Received Disconnected from ZooKeeper, ignoring"));
        break;

      case Expired:
        String msg = prefix(this.identifier + " received expired from " +
          "ZooKeeper, aborting");
        // TODO: One thought is to add call to ZooKeeperListener so say,
        // ZooKeperNodeTracker can zero out its data values.
        if (this.abortable != null) this.abortable.abort(msg,
            new KeeperException.SessionExpiredException());
        break;
    }
  }

  /**
   * Forces a synchronization of this ZooKeeper client connection.
   * 与ZooKeeper同步客户端连接
   * <p>
   * Executing this method before running other methods will ensure that the
   * subsequent operations are up-to-date and consistent as of the time that
   * the sync is complete.
   * 执行这个方法在运行其他方法之前确保顺序操作是及时更新和一致的 在同步完成之后
   * <p>
   * This is used for compareAndSwap type operations where we need to read the
   * data of an existing node and delete or transition that node, utilizing the
   * previously read version and data.  We want to ensure that the version read
   * is up-to-date from when we begin the operation.
   * 比较和交换类型操作在已经存在的节点读数据和删除或者转换
   *
   */
  public void sync(String path) {
    this.zooKeeper.sync(path, null, null);
  }

  /**
   * Get the set of already watched unassigned nodes.
   * @return Set of Nodes.
   */
  public Set<String> getNodes() {
    return unassignedNodes;
  }

  /**
   * Handles KeeperExceptions in client calls.
   *
   * This may be temporary but for now this gives one place to deal with these.
   *
   * TODO: Currently this method rethrows the exception to let the caller handle
   *
   * @param ke
   * @throws KeeperException
   */
  public void keeperException(KeeperException ke)
  throws KeeperException {
    LOG.error(prefix("Received unexpected KeeperException, re-throwing exception"), ke);
    throw ke;
  }

  /**
   * Handles InterruptedExceptions in client calls.
   *
   * This may be temporary but for now this gives one place to deal with these.
   *
   * TODO: Currently, this method does nothing.
   *       Is this ever expected to happen?  Do we abort or can we let it run?
   *       Maybe this should be logged as WARN?  It shouldn't happen?
   *
   * @param ie
   */
  public void interruptedException(InterruptedException ie) {
    LOG.debug(prefix("Received InterruptedException, doing nothing here"), ie);
    // At least preserver interrupt.
    Thread.currentThread().interrupt();
    // no-op
  }

  /**
   * Close the connection to ZooKeeper.
   * @throws InterruptedException
   */
  public void close() {
    try {
      if (zooKeeper != null) {
        zooKeeper.close();
//        super.close();
      }
    } catch (InterruptedException e) {
    }
  }

  @Override
  public void abort(String why, Throwable e) {
    this.abortable.abort(why, e);
  }
}

抱歉!评论已关闭.