Basic HBase Java Classes and Methods – Part 4: Putting Data into a Table
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class PutTable {
private static byte[] PERSONAL_CF = Bytes.toBytes("personal");
private static byte[] PROFESSIONAL_CF = Bytes.toBytes("professional");
private static byte[] FIRST_NAME_COLUMN = Bytes.toBytes("first_name");
private static byte[] LAST_NAME_COLUMN = Bytes.toBytes("last_name");
private static byte[] AGE_COLUMN = Bytes.toBytes("age");
private static byte[] GENDER_COLUMN = Bytes.toBytes("gender");
private static byte[] MARITAL_STATUS_COLUMN = Bytes.toBytes("marital_status");
private static byte[] OCCUPATION_COLUMN = Bytes.toBytes("occupation");
private static byte[] EDUCATION_COLUMN = Bytes.toBytes("education");
Now we create our main method, create a connection to our Table, instantiate a Put object and add columns to it using our addColumn method. Finally we use the put method on the Table object to put the data into the table. We have the table defined outside of the try block because we need to check for it in the finally block later on, and we can't do that if its defined in the try block itself, as then it would be out of scope.
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Table table = null;
try {
table = connection.getTable(TableName.valueOf("employee"));
Put put1 = new Put(Bytes.toBytes("1"));
put1.addColumn(PERSONAL_CF, FIRST_NAME_COLUMN, Bytes.toBytes("John"));
put1.addColumn(PERSONAL_CF, LAST_NAME_COLUMN, Bytes.toBytes("Smith"));
put1.addColumn(PERSONAL_CF, AGE_COLUMN, Bytes.toBytes("50"));
put1.addColumn(PERSONAL_CF, GENDER_COLUMN, Bytes.toBytes("male"));
put1.addColumn(PERSONAL_CF, MARITAL_STATUS_COLUMN, Bytes.toBytes("married"));
put1.addColumn(PROFESSIONAL_CF, OCCUPATION_COLUMN, Bytes.toBytes("engineer"));
put1.addColumn(PROFESSIONAL_CF, EDUCATION_COLUMN, Bytes.toBytes("masters"));
table.put(put1);
System.out.println("Inserted row for John Smith");
This is a very simple case. We did not have to insert all of the columns, we could have left many blank just as we did before when using the HBase shell. The HBase table put method is overloaded and supports either passing in a Put object or a list of Put objects. We will now put more data in using a list of Put objects.
Put put2 = new Put(Bytes.toBytes("2"));
put2.addColumn(PERSONAL_CF, FIRST_NAME_COLUMN, Bytes.toBytes("Sally"));
put2.addColumn(PERSONAL_CF, LAST_NAME_COLUMN, Bytes.toBytes("Jones"));
put2.addColumn(PERSONAL_CF, AGE_COLUMN, Bytes.toBytes("32"));
put2.addColumn(PERSONAL_CF, GENDER_COLUMN, Bytes.toBytes("female"));
put2.addColumn(PERSONAL_CF, MARITAL_STATUS_COLUMN, Bytes.toBytes("divorced"));
put2.addColumn(PROFESSIONAL_CF, OCCUPATION_COLUMN, Bytes.toBytes("doctor"));
put2.addColumn(PROFESSIONAL_CF, EDUCATION_COLUMN, Bytes.toBytes("MD"));
Put put3 = new Put(Bytes.toBytes("3"));
put3.addColumn(PERSONAL_CF, FIRST_NAME_COLUMN, Bytes.toBytes("Alex"));
put3.addColumn(PERSONAL_CF, LAST_NAME_COLUMN, Bytes.toBytes("Wright"));
put3.addColumn(PERSONAL_CF, GENDER_COLUMN, Bytes.toBytes("male"));
put3.addColumn(PROFESSIONAL_CF, OCCUPATION_COLUMN, Bytes.toBytes("cab driver"));
put3.addColumn(PROFESSIONAL_CF, EDUCATION_COLUMN, Bytes.toBytes("high school"));
List<Put> putList = new ArrayList<Put>();
putList.add(put2);
putList.add(put3);
table.put(putList);
System.out.println("Inserted rows for Sally Jones and Alex Wright");
Last we will use our finally block to close our connection to HBase and check if we have an open table and if so close it.
} finally {
connection.close();
if (table != null) {
table.close();
}
}
}
}
So the completed program looks like so:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class PutTable {
private static byte[] PERSONAL_CF = Bytes.toBytes("personal");
private static byte[] PROFESSIONAL_CF = Bytes.toBytes("professional");
private static byte[] FIRST_NAME_COLUMN = Bytes.toBytes("first_name");
private static byte[] LAST_NAME_COLUMN = Bytes.toBytes("last_name");
private static byte[] AGE_COLUMN = Bytes.toBytes("age");
private static byte[] GENDER_COLUMN = Bytes.toBytes("gender");
private static byte[] MARITAL_STATUS_COLUMN = Bytes.toBytes("marital_status");
private static byte[] OCCUPATION_COLUMN = Bytes.toBytes("occupation");
private static byte[] EDUCATION_COLUMN = Bytes.toBytes("education");
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection connection = ConnectionFactory.createConnection(conf);
Table table = null;
try {
table = connection.getTable(TableName.valueOf("employee"));
Put put1 = new Put(Bytes.toBytes("1"));
put1.addColumn(PERSONAL_CF, FIRST_NAME_COLUMN, Bytes.toBytes("John"));
put1.addColumn(PERSONAL_CF, LAST_NAME_COLUMN, Bytes.toBytes("Smith"));
put1.addColumn(PERSONAL_CF, AGE_COLUMN, Bytes.toBytes("50"));
put1.addColumn(PERSONAL_CF, GENDER_COLUMN, Bytes.toBytes("male"));
put1.addColumn(PERSONAL_CF, MARITAL_STATUS_COLUMN, Bytes.toBytes("married"));
put1.addColumn(PROFESSIONAL_CF, OCCUPATION_COLUMN, Bytes.toBytes("engineer"));
put1.addColumn(PROFESSIONAL_CF, EDUCATION_COLUMN, Bytes.toBytes("masters"));
table.put(put1);
System.out.println("Inserted row for John Smith");
Put put2 = new Put(Bytes.toBytes("2"));
put2.addColumn(PERSONAL_CF, FIRST_NAME_COLUMN, Bytes.toBytes("Sally"));
put2.addColumn(PERSONAL_CF, LAST_NAME_COLUMN, Bytes.toBytes("Jones"));
put2.addColumn(PERSONAL_CF, AGE_COLUMN, Bytes.toBytes("32"));
put2.addColumn(PERSONAL_CF, GENDER_COLUMN, Bytes.toBytes("female"));
put2.addColumn(PERSONAL_CF, MARITAL_STATUS_COLUMN, Bytes.toBytes("divorced"));
put2.addColumn(PROFESSIONAL_CF, OCCUPATION_COLUMN, Bytes.toBytes("doctor"));
put2.addColumn(PROFESSIONAL_CF, EDUCATION_COLUMN, Bytes.toBytes("MD"));
Put put3 = new Put(Bytes.toBytes("3"));
put3.addColumn(PERSONAL_CF, FIRST_NAME_COLUMN, Bytes.toBytes("Alex"));
put3.addColumn(PERSONAL_CF, LAST_NAME_COLUMN, Bytes.toBytes("Wright"));
put3.addColumn(PERSONAL_CF, GENDER_COLUMN, Bytes.toBytes("male"));
put3.addColumn(PROFESSIONAL_CF, OCCUPATION_COLUMN, Bytes.toBytes("cab driver"));
put3.addColumn(PROFESSIONAL_CF, EDUCATION_COLUMN, Bytes.toBytes("high school"));
List<Put> putList = new ArrayList<Put>();
putList.add(put2);
putList.add(put3);
table.put(putList);
System.out.println("Inserted rows for Sally Jones and Alex Wright");
} finally {
connection.close();
if (table != null) {
table.close();
}
}
}
}
We can see that our data was added properly by checking with scan from the HBase shell.
hbase(main):002:0> scan 'employee'
ROW COLUMN+CELL
1 column=personal:age, timestamp=1520824732941, value=50
1 column=personal:first_name, timestamp=1520824732941, value=John
1 column=personal:gender, timestamp=1520824732941, value=male
1 column=personal:last_name, timestamp=1520824732941, value=Smith
1 column=personal:marital_status, timestamp=1520824732941, value=married
1 column=professional:education, timestamp=1520824732941, value=masters
1 column=professional:occupation, timestamp=1520824732941, value=engineer
2 column=personal:age, timestamp=1520824732954, value=32
2 column=personal:first_name, timestamp=1520824732954, value=Sally
2 column=personal:gender, timestamp=1520824732954, value=female
2 column=personal:last_name, timestamp=1520824732954, value=Jones
2 column=personal:marital_status, timestamp=1520824732954, value=divorced
2 column=professional:education, timestamp=1520824732954, value=MD
2 column=professional:occupation, timestamp=1520824732954, value=doctor
3 column=personal:first_name, timestamp=1520824732954, value=Alex
3 column=personal:gender, timestamp=1520824732954, value=male
3 column=personal:last_name, timestamp=1520824732954, value=Wright
3 column=professional:education, timestamp=1520824732954, value=high school
3 column=professional:occupation, timestamp=1520824732954, value=cab driver
3 row(s) in 0.2060 seconds
hbase(main):003:0>
Next we will explore how we can retrieve column data from the HBase table in Basic HBase Java Classes and Methods – Part 5: Getting Data from a Table.
Recent Posts
See AllOne of the biggest bottlenecks in Deep Learning is loading data. having fast drives and access to the data is important, especially if...
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName;...
Comments