Sample Hive Script

-- Register Data Normalization Modue [dnm] BDQ Hive UDF Jar 
ADD JAR <Directory path>/dnm.hive.${project.version}.jar;

-- Provide alias to UDF class (optional). String in quotes represent class names needed for this job to run.
-- Table Lookup is implemented as a UDF (User Defined function). Hence it processes one row at a time and generates a map of key value pairs for each row.
CREATE TEMPORARY FUNCTION tablelookup as 'com.pb.bdq.dnm.process.hive.tablelookup.TableLookUpUDF';

-- Set rule 
set rule='{"rules":[{"action":"Standardize", "source":"CityCode", "tableName":"State Name Abbreviations", "lookupMultipleWordTerms":false, "lookupIndividualTermsWithinField":false, "destination":"CityCode"}]}';

-- Set Reference Directory. This must be a local path on cluster machines and must be present on each node of the cluster at the same path.
set refdir='/home/hadoop/reference';

-- set header
set header ='AccountDescription,Address,ApartmentNumber,CityCode';


-- Execute Query on the desired table, to display the job output on console. This query returns a map of key value pairs containing output fields for each row.

SELECT bar.ret["StandardizationTermIdentified"],
	bar.ret["accountdescription"],
	bar.ret["address"],
	bar.ret["apartmentnumber"],
	bar.ret["citycode"] 
FROM (
	SELECT tablelookup(${hiveconf:rule}, ${hiveconf:refdir}, ${hiveconf:header}, accountdescription, address, apartmentnumber, citycode) 
	AS ret 
	FROM citizen_data
	) bar;

	
-- Query to dump output data to a file

INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/TableLookup/' row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE 
SELECT bar.ret["StandardizationTermIdentified"],
	bar.ret["accountdescription"],
	bar.ret["address"],
	bar.ret["apartmentnumber"],
	bar.ret["citycode"] 
FROM (
	SELECT tablelookup(${hiveconf:rule}, ${hiveconf:refdir}, ${hiveconf:header}, accountdescription, address, apartmentnumber, citycode) 
	AS ret 
	FROM citizen_data
	) bar;

--Sample input data
--+----------------------------------+-----------------------+-------------------------------+------------------------+
--| citizen_data.accountdescription  | citizen_data.address  | citizen_data.apartmentnumber  | citizen_data.citycode |  
--+----------------------------------+-----------------------+-------------------------------+------------------------+
--|  				                 | 400 E M0 St Apt 1405  |                               | NY                     |    
--|                                  | 190 E 72nd St         |                               | NY                     |   
--|                                  | 1381 3rd Ave Apt 4    | 4                             | TTYYY                  | 
--+----------------------------------+-----------------------+-------------------------------+------------------------+   

--sample output data
--+------------------------------+--------------------------+-----------------------+-----------------------+----------+   
--|StandardizationTermIdentified | 	accountdescription  	| address  				| apartmentnumber  		| citycode|
--+------------------------------+--------------------------+-----------------------+-----------------------+----------+ 
--|				yes				 |  				        | 400 E M0 St Apt 1405  |                       | NEW YORK |                        
--|				yes	     		 |                          | 190 E 72nd St         |                       | NEW YORK |                       
--|				yes 			 |                          | 1381 3rd Ave Apt 4    | 4                     | NEW YORK |
--+------------------------------+--------------------------+-----------------------+-----------------------+----------+