现在的位置: 首页 > 综合 > 正文

HandlerSocket

2013年11月14日 ⁄ 综合 ⁄ 共 7015字 ⁄ 字号 评论关闭

HandlerSocket是mysql的一个插件,主要是越过parser,optimizer这一层,直接对数据库进行key/value的存储,对于大内存的数据库,能极大的提高性能.

 

通过阅读代码,发现它主要用到了mysql的下面的结构体

struct tablevec_entry {
  TABLE *table;
  size_t refcount;
  bool modified;
  tablevec_entry() : table(0), refcount(0), modified(false) { }
};

 

 

struct expr_user_lock : private noncopyable {
  expr_user_lock(THD *thd, int timeout)
    : lck_key("handlersocket_wr", 16, &my_charset_latin1),
      lck_timeout(timeout),
      lck_func_get_lock(&lck_key, &lck_timeout),
      lck_func_release_lock(&lck_key)
  {
    lck_key.fix_fields(thd, 0);
    lck_timeout.fix_fields(thd, 0);
    lck_func_get_lock.fix_fields(thd, 0);
    lck_func_release_lock.fix_fields(thd, 0);
  }
  long long get_lock() {
    return lck_func_get_lock.val_int();
  }
  long long release_lock() {
    return lck_func_release_lock.val_int();
  }
 private:
  Item_string lck_key;
  Item_int lck_timeout;
  Item_func_get_lock lck_func_get_lock;
  Item_func_release_lock lck_func_release_lock;
};

 

 

struct dbcontext : public dbcontext_i, private noncopyable {
  dbcontext(volatile database *d, bool for_write);
  virtual ~dbcontext();
  virtual void init_thread(const void *stack_botton,
    volatile int& shutdown_flag);
  virtual void term_thread();
  virtual bool check_alive();
  virtual void lock_tables_if();
  virtual void unlock_tables_if();
  virtual bool get_commit_error();
  virtual void clear_error();
  virtual void close_tables_if();
  virtual void table_addref(size_t tbl_id);
  virtual void table_release(size_t tbl_id);
  virtual void cmd_open(dbcallback_i& cb, const cmd_open_args& args);
  virtual void cmd_exec(dbcallback_i& cb, const cmd_exec_args& args);
  virtual void set_statistics(size_t num_conns, size_t num_active);
 private:
  int set_thread_message(const char *fmt, ...)
    __attribute__((format (printf, 2, 3)));
  bool parse_fields(TABLE *const table, const char *str,
    prep_stmt::fields_type& flds);
  void cmd_insert_internal(dbcallback_i& cb, const prep_stmt& pst,
    const string_ref *fvals, size_t fvalslen);
  void cmd_sql_internal(dbcallback_i& cb, const prep_stmt& pst,
    const string_ref *fvals, size_t fvalslen);
  void cmd_find_internal(dbcallback_i& cb, const prep_stmt& pst,
    ha_rkey_function find_flag, const cmd_exec_args& args);
  size_t calc_filter_buf_size(TABLE *table, const prep_stmt& pst,
    const record_filter *filters);
  bool fill_filter_buf(TABLE *table, const prep_stmt& pst,
    const record_filter *filters, uchar *filter_buf, size_t len);
  int check_filter(dbcallback_i& cb, TABLE *table, const prep_stmt& pst,
    const record_filter *filters, const uchar *filter_buf);
  void resp_record(dbcallback_i& cb, TABLE *const table, const prep_stmt& pst);
  void dump_record(dbcallback_i& cb, TABLE *const table, const prep_stmt& pst);
  int modify_record(dbcallback_i& cb, TABLE *const table,
    const prep_stmt& pst, const cmd_exec_args& args, char mod_op,
    size_t& modified_count);
 private:
  typedef std::vector<tablevec_entry> table_vec_type;
  typedef std::pair<std::string, std::string> table_name_type;
  typedef std::map<table_name_type, size_t> table_map_type;
 private:
  volatile database *const dbref;
  bool for_write_flag;
  THD *thd;
  MYSQL_LOCK *lock;
  bool lock_failed;
  std::auto_ptr<expr_user_lock> user_lock;
  int user_level_lock_timeout;
  bool user_level_lock_locked;
  bool commit_error;
  std::vector<char> info_message_buf;
  table_vec_type table_vec;
  table_map_type table_map;
};

对表加锁,调用的是mysql的

MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)

 

表解锁

void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)

 

修改完后,提交事务

bool trans_commit_stmt(THD *thd)

 

关闭表

void close_thread_tables(THD *thd)

 

打开表

bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
                Open_table_context *ot_ctx)

 

//最核心的函数
void
dbcontext::cmd_find_internal(dbcallback_i& cb, const prep_stmt& pst,
  ha_rkey_function find_flag, const cmd_exec_args& args)
{
  const bool debug_out = (verbose_level >= 100);
  bool need_resp_record = true;
  char mod_op = 0;
  const string_ref& mod_op_str = args.mod_op;
  if (mod_op_str.size() != 0) {
    if (!for_write_flag) {
      return cb.dbcb_resp_short(2, "readonly");
    }
    mod_op = mod_op_str.begin()[0];
    need_resp_record = mod_op_str.size() > 1 && mod_op_str.begin()[1] == '?';
    switch (mod_op) {
    case 'U': /* update */
    case 'D': /* delete */
    case '+': /* increment */
    case '-': /* decrement */
      break;
    default:
      if (debug_out) {
	fprintf(stderr, "unknown modop: %c\n", mod_op);
      }
      return cb.dbcb_resp_short(2, "modop");
    }
  }
  lock_tables_if();
  if (lock == 0) {
    return cb.dbcb_resp_short(1, "lock_tables");
  }
  if (pst.get_table_id() >= table_vec.size()) {
    return cb.dbcb_resp_short(2, "tblnum");
  }
  TABLE *const table = table_vec[pst.get_table_id()].table;
  /* keys */
  if (pst.get_idxnum() >= table->s->keys) {
    return cb.dbcb_resp_short(2, "idxnum");
  }
  KEY& kinfo = table->key_info[pst.get_idxnum()];
  if (args.kvalslen > kinfo.key_parts) {
    return cb.dbcb_resp_short(2, "kpnum");
  }
  uchar *const key_buf = DENA_ALLOCA_ALLOCATE(uchar, kinfo.key_length);
  size_t invalues_idx = 0;
  size_t kplen_sum = prepare_keybuf(args, key_buf, table, kinfo, invalues_idx);
  /* filters */
  uchar *filter_buf = 0;
  if (args.filters != 0) {
    const size_t filter_buf_len = calc_filter_buf_size(table, pst,
      args.filters);
    filter_buf = DENA_ALLOCA_ALLOCATE(uchar, filter_buf_len);
    if (!fill_filter_buf(table, pst, args.filters, filter_buf,
      filter_buf_len)) {
      return cb.dbcb_resp_short(2, "filterblob");
    }
  }
  /* handler */
  table->read_set = &table->s->all_set;
  handler *const hnd = table->file;
  if (!for_write_flag) {
    hnd->init_table_handle_for_HANDLER();
  }
  hnd->ha_index_or_rnd_end();
  hnd->ha_index_init(pst.get_idxnum(), 1);
  if (need_resp_record) {
    cb.dbcb_resp_begin(pst.get_ret_fields().size());
  }
  const uint32_t limit = args.limit ? args.limit : 1;
  uint32_t skip = args.skip;
  size_t modified_count = 0;
  int r = 0;
  bool is_first = true;
  for (uint32_t cnt = 0; cnt < limit + skip;) {
    if (is_first) {
      is_first = false;
      const key_part_map kpm = (1U << args.kvalslen) - 1;
      r = hnd->index_read_map(table->record[0], key_buf, kpm, find_flag);
    } else if (args.invalues_keypart >= 0) {
      if (++invalues_idx >= args.invalueslen) {
	break;
      }
      kplen_sum = prepare_keybuf(args, key_buf, table, kinfo, invalues_idx);
      const key_part_map kpm = (1U << args.kvalslen) - 1;
      r = hnd->index_read_map(table->record[0], key_buf, kpm, find_flag);
    } else {
      switch (find_flag) {
      case HA_READ_BEFORE_KEY:
      case HA_READ_KEY_OR_PREV:
	r = hnd->index_prev(table->record[0]);
	break;
      case HA_READ_AFTER_KEY:
      case HA_READ_KEY_OR_NEXT:
	r = hnd->index_next(table->record[0]);
	break;
      case HA_READ_KEY_EXACT:
	r = hnd->index_next_same(table->record[0], key_buf, kplen_sum);
	break;
      default:
	r = HA_ERR_END_OF_FILE; /* to finish the loop */
	break;
      }
    }
    if (debug_out) {
      fprintf(stderr, "r=%d\n", r);
      if (r == 0 || r == HA_ERR_RECORD_DELETED) { 
	dump_record(cb, table, pst);
      }
    }
    int filter_res = 0;
    if (r != 0) {
      /* no-count */
    } else if (args.filters != 0 && (filter_res = check_filter(cb, table, 
      pst, args.filters, filter_buf)) != 0) {
      if (filter_res < 0) {
	break;
      }
    } else if (skip > 0) {
      --skip;
    } else {
      /* hit */
      if (need_resp_record) {
	resp_record(cb, table, pst);
      }
      if (mod_op != 0) {
	r = modify_record(cb, table, pst, args, mod_op, modified_count);
      }
      ++cnt;
    }
    if (args.invalues_keypart >= 0 && r == HA_ERR_KEY_NOT_FOUND) {
      continue;
    }
    if (r != 0 && r != HA_ERR_RECORD_DELETED) {
      break;
    }
  }
  hnd->ha_index_or_rnd_end();
  if (r != 0 && r != HA_ERR_RECORD_DELETED && r != HA_ERR_KEY_NOT_FOUND &&
    r != HA_ERR_END_OF_FILE) {
    /* failed */
    if (need_resp_record) {
      /* revert dbcb_resp_begin() and dbcb_resp_entry() */
      cb.dbcb_resp_cancel();
    }
    cb.dbcb_resp_short_num(1, r);
  } else {
    /* succeeded */
    if (need_resp_record) {
      cb.dbcb_resp_end();
    } else {
      cb.dbcb_resp_short_num(0, modified_count);
    }
  }
  DENA_ALLOCA_FREE(filter_buf);
  DENA_ALLOCA_FREE(key_buf);
}

 

总的来说,HandlerSocket并没有直接调用到innodb的函数,还是调用了上面那层,用C++写的函数

把传进来的key,封装成索引的查找条件,之后去数据库中查找

锁的类度较大,是表级的,对于select较多的应较快,对于update多的话,性能不一定高吧

实质上类似于通过C语言直接调用oracle的OCI,性能提高主要还是mysql的实现好吧

 

,

 

抱歉!评论已关闭.