A simple, performant, connection-pooled, cached and promisified HBase client library for NodeJS.

API

Instantiating the HBase client

const config = { hosts : [ "master" ], port : "9090" , }; const HBase = require ( 'node-thrift2-hbase' )(config);

Get

var get = HBase.Get('row1'); //row1 is rowKey get .addFamily('cf'); // get .add('cf'); identical to addFamily get .addColumn('info', 'name'); // get .add('info', 'name'); identical to addColumn get .addTimestamp('info', 'name', 1414385447707); // get .add('info', 'name', 1414385447707); identical to addTimestamp get .setMaxVersions(3); //last ten days as timerange get .setTimeRange({ minStamp : Date .now() - 10 * 24 * 60 * 60 * 1000 , maxStamp : Date .now() }); HBase.getAsync( 'users' , get ) .then(function (data) { console .log( "Data for user with key 'row1':" ); console .log( '==============================' ); _.each(data[ 0 ].columnValues, function ( colVal, index ) { console .log( 'Column value #' , index); console .log( 'family:' , colVal.family.toString()); console .log( 'qualifier:' , colVal.qualifier.toString()); console .log( 'value:' , colVal.value.readInt32BE( 0 , 4 )); }); }) .catch( function ( err ) { console .log( 'error:' , err); }); HBase.get( 'users' , get , function (err, data) { if (err) { console .log( 'error:' , err); return ; } console .log( "Data for user with key 'row1':" ); console .log( '==============================' ); _.each(data[ 0 ].columnValues, function ( colVal, index ) { console .log( 'Column value #' , index); console .log( 'family:' , colVal.family.toString()); console .log( 'qualifier:' , colVal.qualifier.toString()); console .log( 'value:' , colVal.value.readInt32BE( 0 , 4 )); }); });

A shorthand version is the getRow function:

HBase.getRow( 'users' , 'row1' , [ 'info:name' , 'ecf' ], 1 , function ( err, data ) { if (err) { console .log( 'error:' , err); return ; } console .log( "Data for user with key 'row1':" ); console .log( '==============================' ); _.each(data[ 0 ].columnValues, function ( colVal, index ) { console .log( 'Column value #' , index); console .log( 'family:' , colVal.family.toString()); console .log( 'qualifier:' , colVal.qualifier.toString()); console .log( 'value:' , colVal.value.readInt32BE( 0 , 4 )); }); }); HBase.getRowAsync( 'users' , 'row1' , [ 'info:name' , 'ecf' ], 1 ) .then( function ( data ) { console .log( "Data for user with key 'row1':" ); console .log( '==============================' ); _.each(data[ 0 ].columnValues, function ( colVal, index ) { console .log( 'Column value #' , index); console .log( 'family:' , colVal.family.toString()); console .log( 'qualifier:' , colVal.qualifier.toString()); console .log( 'value:' , colVal.value.readInt32BE( 0 , 4 )); }); }) .catch( function ( err ) { console .log( 'error:' , err); });

Qualifier serialization/deserialization:

When adding a qualifier via add methods to Get , Put and Scan objects you can specify the type of value expected for that qualifier. That means that instead of reading the value of the qualifier's buffer yourself, the library can do it for you. For example:

var get = HBase.Get('row1'); //row1 is rowKey // column family "f", qualifier "q1", value is a float get .add('f', {name: 'q1' , type : 'float' }); get .add('f', {name: 'q2' , type : 'json' }); get .add('f', 'q3'); HBase. get ("table", get ).then(rowData => console.log(rowData)); shouldEqual(rowData, { rowkey : "row1" , d :{ q1 : 123.321 q2 : { prop : "val" }, q3 : "123.321" } });

The following types are supported: string (if type is unspecified, defaults to string ) json (parses the value as a JSON string) integer (alias: integer32 ) float double number (alias: integer48 ) UInteger48 int64

Put

var put = HBase.Put( 'row1' ); put.add( 'info' , 'money' , { type : 'float' , value : 12.34 }); put.add( 'info' , 'click' , { type : 'integer' , value : 100 }); put.add( 'ecf' , 'name' , 'zhudaxian' ); put.add( 'info' , 'name' , 'beijing' , new Date ().getTime()); HBase.put( 'users' , put, function ( err ) { if (err) { console .log( 'error:' , err); return ; } console .log( 'Put is successful.' ); }); HBase.putAsync( 'users' , put) .then( function ( ) { console .log( 'Put is successful.' ); }) .catch( function ( err ) { console .log( 'error:' , err); });

A shorthand version is the putRow function:

HBase.putRow( 'users' , 'row1' , 'info:name' , 'phoneqq.com' , 1414140874929 , function ( err ) { if (err) { console .log( 'error:' , err); return ; } console .log( 'Put is successfull.' ); }); HBase.putRowAsync( 'users' , 'row1' , 'info:name' , 'phoneqq.com' , 1414140874929 ) .then( function ( ) { console .log( 'Put is successfull.' ); }) .catch( function ( err ) { console .log( 'error:' , err); });

Inc

var inc = hbaseClient.Inc( 'row1' ); inc.add( 'info' , 'counter' ); inc.add( 'info' , 'counter2' ); hbaseClient.inc( 'users' ,inc, function ( err,data ) { if (err){ console .log( 'error:' ,err); return ; } console .log(err,data); });

Del

var del = hbaseClient.Del( 'row1' ); del.add( 'info' ); del.add( 'info' , 'name' ); del.add( 'info' , 'tel' , 1414136046864 ); del.add( 'ecf' ); del.add( 'ecf' , 'name' ); del.add( 'ecf' , 'tel' , 1414136119207 ); hbaseClient.del( 'users' ,del, function ( err ) { if (err){ console .log( 'error:' ,err); return ; } console .log(err, 'del is successfully' ); });

Scan

var scan = hbaseClient.Scan(); scan.addStartRow( 'row1' ); scan.addStopRow( 'row1p' ); scan.add( 'info' ); scan.add( 'info' , 'name' ); scan.add( 'ecf' ); scan.add( 'ecf' , 'name' ); scan.setMaxVersions( 1 ); scan.addNumRows( 10 ); hbaseClient.scan( 'users' ,scan, function ( err,data ) { if (err){ console .log( 'error:' ,err); return ; } console .log(err,data); });

Scan Stream

const tableName = 'test:test_table' ; const scanObject = hbaseClient.Scan({ family : 'f' , qualifier : 'test' , startRow : 'test.row.1' , stopRow : 'test.row.100' , numRows : 50 , chunkSize : 10 }); hbaseClient .createScanStream(tableName, scanObject) .on( 'data' , rows => { console .log( `Received ${rows.length} rows...` ); }) .on( 'error' , err => { errorHandler(err); }) .on( 'end' , () => { console .log( 'scan ended' ); });

Table Salting

What is "salting"? The term is taken from the encryption nomenclature, but for our purposes it just means adding a predictable string to a key. The way HBase stores rows means that if the keys are not spread across the string spectrum, then the data will physically be kept in a "not spread" manner - for example, having most rows of a table on very few Region Server s. So if your keys are well-spread, so is your data. This allows for faster and more parallel reads/writes en-masse. The only problem is keeping track of which table has its keys salted, and exactly how were the keys salted. We have a solution for that:

var hbase = require ( 'node-thrift2-hbase' )(hbaseConfig); hbase.saltMap = { 'myTable1' : hbase.saltFunctions.saltByLastKeyCharCode, 'myTable2' : hbase.saltFunctions.saltByLastKeyCharCode };

All get and put operations for tables specified in the saltMap will be salted using the given function. hbase.saltFunctions contains some ready-made salt functions. If you have a salt function you find useful, don't hesitate to make a PR adding it!

Working HBase-Thrift compiler combinations

The code supplied here used Thrift 0.9.3 to generate code for HBase 0.98.4. If you'd like to use this library with different versions, download the desired HBase Thrift definition file and compile it using the Thrift compiler of your choice into the project's gen-nodejs folder. If you are successfully working with different HBase/Thrift compiler combination please tell us and we'll add the info here.

HBase Thrift Compiler 0.98.4 0.9.3 1.1.2 0.9.3 1.1.2 0.10.0

This library was initially based on https://www.npmjs.com/package/node-thrift-hbase but due to that library's abandonment by the author we had to republish it with our contributions.