Sample Hive Script

-- Register Advance Matching Module[AMM] Hive UDF jar
ADD JAR <Directory path>/amm.hive.${project.version}.jar;

-- Provide alias to UDF class (optional). 
String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION matchkeygenerator as
 'com.pb.bdq.amm.process.hive.matchkeygenerator.MatchKeyGeneratorUDF';

-- Match Key Generator is implemented as a UDF (User Defined function).
 It processes one row at a time and generates a map of match keys for each row.

-- Set rule and header 
set hivevar:rule='{"matchKeys":[{"expressMatchKey":false,"matchKeyField":"MatchKey1",
                 "rules":[{"algorithm":"Soundex","field":"busniessname","startPosition":1,"length":0,
                 "active":true,"sortInput":null,"removeNoiseCharacters":false}]},
                 {"expressMatchKey":false,"matchKeyField":"MatchKey2",
                 "rules":[{"algorithm":"Koeln","field":"busniessname","startPosition":1,"length":0,
                 "active":true,"sortInput":null,"removeNoiseCharacters":false}]}]}';


set hivevar:header='busniessname,recordid';

-- Execute query on the desired table to display the job output on console.
 This query returns a map of key value for each row containing matchkeys as per rule passed.
SELECT busniessname, recordid, bar.ret["MatchKey1"] AS MatchKey1, bar.ret["MatchKey2"] 
AS MatchKey2 FROM (SELECT *, matchkeygenerator (${hivevar:rule}, ${hivevar:header},
 busniessname, recordid) AS ret FROM cust ) bar;


-- Query to dump output to a directory in file system
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/MatchKey/' row format
 delimited FIELDS TERMINATED BY ',' MAP FIELDS TERMINATED BY ':' COLLECTION ITEMS TERMINATED 
BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE 
SELECT busniessname, recordid, bar.ret["MatchKey1"] AS MatchKey1,
 bar.ret["MatchKey2"] AS MatchKey2 FROM (
SELECT *, matchkeygenerator (${hiveconf:rule}, ${hiveconf:header},
 busniessname, recordid) AS ret FROM cust ) bar;

--Sample data in input table customer
--+------------------------------------------+----------------+--+
--|            cust.busniessname             | cust.recordid  |
--+------------------------------------------+----------------+--+
--| Internal Revenue Service                 | 0              |
--| Juan F Vera-Monroig                      | 1              |
--| Leonardo Pagan-Reyes                     | 2              |
--| Academia San Joaquin Colegios/Academias  | 3              |
--| Nereida Portalatin-Padua                 | 4              |
--+------------------------------------------+----------------+--+


--Sample output for input query
+------------------------------------------+-----------+------------+-------------------+--+
|               busniessname               | recordid  | matchkey1  |     matchkey2     |
+------------------------------------------+-----------+------------+-------------------+--+
| Internal Revenue Service                 | 0         | I536       | 0627657368738     |
| Juan F Vera-Monroig                      | 1         | J511       | 063376674         |
| Leonardo Pagan-Reyes                     | 2         | L563       | 567214678         |
| Academia San Joaquin Colegios/Academias  | 3         | A235       | 0426864645484268  |
| Nereida Portalatin-Padua                 | 4         | N631       | 67217252612       |
+------------------------------------------+-----------+------------+-------------------+--+