Sample Hive Script

Reference data placed on local node

-- Register Universal Addressing Module - US [UAM-US] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;

-- Provide alias to UDAF class (optional). String in quotes represent 
class names needed for this job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.UAMUSAddressingUDF';
-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin) 
,G1RTS(path containing COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE in this configuration
set mapreduce.admin.user.env=LD_LIBRARY_PATH=/home/hduser/~/runtime/lib:/home/hduser/~/runtime/
bin:/home/hduser/~/server/modules/universaladdress/lib,ACU_RUNCBL_JNI_ONLOAD_DISABLE=1,G1RTS=/
home/hduser/~/ ;


-- set engine configuration
set hivevar:engineconf='{"referenceData":{"dataDir":"/home/hduser/ReferenceData/AddressQuality/
UAM/Data","referenceDataPathLocation":"LocaltoDataNodes"},"cobolRuntimePath":"","modulesDir":"",
"dpvDbPath":"/home/hduser/ReferenceData/AddressQuality/UAM/Data","suiteLinkDBPath":"/home/hduser/
ReferenceData/AddressQuality/UAM/Data","ewsDBPath":"/home/hduser/ReferenceData/AddressQuality/UAM/
Data","rdiDBPath":"/home/hduser/ReferenceData/AddressQuality/UAM/Data","lacsDBPath":"/home/hduser/
ReferenceData/AddressQuality/UAM/Data"}';

-- set input configuration
set hivevar:inputconf='{"processType":"VALIDATE","performUSProcessing":true,
"outputStandardAddress":true,"outputAddressElements":false,"outputPostalData":false,
"outputParsedInput":false,"outputAddressBlocks":false,"outputFormattedOnFail":false,
"outputCasing":"MIXED","outputPostalCodeSeparator":true,
"outputMultinationalCharacters":false,"performDPV":false,"performRDI":false,"performESM":false,
"performASM":false,"performEWS":false,\"performLACSLink":false,"performLOT":
false,"failOnCMRAMatch":false,"extractFirm":false,"extractUrb":false,"outputReport3553":false,
"outputReportSummary":true,"outputCASSDetail":false,
"outputFieldLevelReturnCodes":false,"keepMultimatch":false,"maximumResults":10,
"standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT",
"standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE","cityNameFormat":"CITY_FORMAT_STANDARD",
"vanityCityFormatLong":true,"outputCountryFormat":"ENGLISH","homeCountry":"United States",
"streetMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM","firmMatchingStrictness":
"MATCHING_STRICTNESS_MEDIUM","directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM",
"dualAddressLogic":"DUAL_NORMAL","dpvSuccessfulStatusCondition":"DPV_CONDITON_ALWAYS",
"reportListFileName":"","reportlistProcessorName":"","reportlistNumber":1,"reportMailerAddress":
"","reportMailerName":"","reportMailerCityLine":"","addressLineSearchOnFail":true,
"outputStreetAlias":true,"outputVeriMoveBlock":false,"dpvDetermineNoStat":false,
"dpvDetermineVacancy":false,"outputAbbreviatedAlias":false,"outputPreferredAlias":false,
"outputPreferredCity":"CITY_OVERRIDE_NAME_ZIP4","performSuiteLink":false,
"suppressZplusPhantomCarrierR777":false,"dpvSeedList":null,"lacsSeedList":null,
"zipInputSet":null,"reportName":null,"jobRequest":false,"properties":{"DPVDetermineVacancy":"N",
"DualAddressLogic":"N","PerformASM":"N","ExtractUrb":"N","OutputCasing":"M",
"AddressLineSearchOnFail":"Y","ReportListFileName":"","ReportMailerCityLine":"",
"OutputFormattedOnFail":"N","OutputFieldLevelReturnCodes":"N","OutputStreetNameAlias":"Y",
"ReportListProcessorName":"","OutputAddressBlocks":"N","ExtractFirm":"N",
"DirectionalMatchingStrictness":"M","OutputPreferredCity":"Z","ReportListNumber":"1",
"FirmMatchingStrictness":"M","KeepMultimatch":"N","StandardAddressPMBLine":"N",
"OutputMultinationalCharacters":"N","PerformSuiteLink":"N","OutputShortCityName":"S",
"DPVSuccessfulStatusCondition":"A","PerformLACSLink":"N","PerformEWS":"N",
"OutputPostalCodeSeparator":"Y","FailOnCMRAMatch":"N","PerformLOT":"N",
"StandardAddressFormat":"C","SuppressZplusPhantomCarrierR777":"N","OutputCountryFormat":"E",
"OutputRecordType":"A   ","HomeCountry":"United States","ReportMailerAddress":"",
"OutputReport3553":"N","OutputVeriMoveDataBlock":"N","PerformRDI":"N","ReportMailerName":"",
"OutputAbbreviatedAlias":"N","PerformESM":"N","PerformDPV":"N","OutputVanityCityFormatLong":"Y",
"OutputReportSummary":"Y","OutputPreferredAlias":"N","StreetMatchingStrictness":
"M","DPVDetermineNoStat":"N","MaximumResults":"10"}}';

-- set general configuration
set hivevar:generalconf='{"dFileType":"SPLIT","dMemoryModel":"MEDIUM","lacsLinkMemoryModel":
"MEDIUM","suiteLinkMemoryModel":"MEDIUM"}';


-- set reference path
set hivevar:location='/home/hduser/ReferenceData/AddressQuality/UAM/Data';

-- set process type
set hivevar:processtype='VALIDATE'; 

-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,City,DefectNumber,FirmName,PostalCode,
StateProvince';

-- Execute Query on the desired table, to display the job output on console. This query returns a 
map of key value pairs containing output fields for each row.
SELECT tmp2.record["Confidence"],tmp2.record["AddressLine1"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},${hivevar:inputconf},
${hivevar:header},inputkeyvalue,firmname,addressline1,addressline2,city,stateprovince,postalcode,
text) from uam_us) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;


-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/' row format delimited FIELDS 
TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["Confidence"],tmp2.record["AddressLine1"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},
${hivevar:inputconf},${hivevar:header},inputkeyvalue,firmname,addressline1,
addressline2,city,stateprovince,postalcode,text) from uam_us) as addressgroup 
LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;

+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid  |     address.addressline1     |  address.city   | address.stateprovince  | address.postalcode  | address.country  |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1                 | 18 Merivale St               | South Brisbane  | QLD                    | 4101                | AUS              |
| 2                 | 19 Serpentine Rd             | Albany          | WA                     | 6330                | AUS              |
| 3                 | 317 VICTORIA ST GR           | BRUNSWICK       | VIC                    | 3056                | AUS              |
| 4                 | DUPLEX 6/16-18 O'CONNELL ST  | AINSLIE         | ACT                    | 2602                | AUS              |
| 5                 | LOT 154 470 BRYGON CREEK DR  | UPPER COOMERA   | QLD                    | 4209                | AUS              |
| 6                 | 16 GREENE ST                 | WARRAWONG       | ACT                    | 2502                | AUS              |
| 7                 | UNIT 47/16 BLAIRMOUNT ST     | PARKINSON       | QLD                    | 4115                | AUS              |
| 8                 | 13-15 FRANCESCO CRES         | BELLA VISTA     | NSW                    | 2153                | AUS              |
| 9                 | 4 RYANS LANE                 | HEATHCOTE       | VIC                    | 3523                | AUS              |
| 10                | 1 CHRISTMAS LN               | NORTH POLE      | VIC                    | 1111                | AUS              |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+



+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName     |HouseNumber |     AddressLine1             | AddressType   |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00  	| MERIVALE      | 18 		 | 18 MERIVALE ST               | S    			|
| 99.42   	| SERPENTINE    | 19     	 | 19 SERPENTINE RD E           | S    			|
| 97.95   	| VICTORIA      | 317    	 | 317 VICTORIA ST              | S    			|
| 100.00  	| O'CONNELL     | 16-18  	 | DUP 6 16-18 O'CONNELL ST     | S    			|
| 0.00    	| BRYGON CREEK  | 470    	 | LOT 154 470 BRYGON CREEK DR  | U    			|
| 76.99   	| GREENE        | 16      	 | 16 GREENE ST                 | S    			|
| 100.00  	| BLAIRMOUNT    | 16     	 | U 47 16 BLAIRMOUNT ST        | S    			|
| 100.00  	| FRANCESCO     | 13-15  	 | 13-15 FRANCESCO CRES         | S    			|
| 100.00  	| RYANS         | 4      	 | 4 RYANS LANE                 | S    			|
| 0.00    	| CHRISTMAS     | 1      	 | 1 CHRISTMAS LN               | U    			|
+-----------+---------------+------------+------------------------------+---------------+

Reference data placed on HDFS and downloaded on local nodes for jobs

-- Register Universal Addressing Module - US [UAM-US] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;

-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this 
job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.
UAMUSAddressingUDF';

set hive.fetch.task.conversion=none;

-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin) 
,G1RTS(path containing COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE 
in this configuration
set mapreduce.admin.user.env=LD_LIBRARY_PATH=/home/hduser/acushareInstall/modules/c1p/lib:/home/
hduser/acushareInstall/runtime/bin:/home/hduser/acushareInstall/runtime/lib,
ACU_RUNCBL_JNI_ONLOAD_DISABLE=1,G1RTS=/home/hduser/acushareInstall/runtime;


-- set engine configuration
set hivevar:engineconf='{"referenceData":{"referenceDataPathLocation":"HDFS","dataDir":
"/user/hadoop/ReferenceData/UAM_US","dataDownloader":{"dataDownloader":"HDFS","localFSRepository":
"/opt/ReferenceData/UAM-US"}},"cobolRuntimePath":"","modulesDir":"","acushareServiceRunning":false,
"unixVersion":"REDHAT7","acushareLicensePath":"","dpvDbPath":"/user/hadoop/ReferenceData/UAM_US",
"suiteLinkDBPath":"/user/hadoop/ReferenceData/UAM_US","ewsDBPath":"/user/hadoop/ReferenceData/
UAM_US","rdiDBPath":"/user/hadoop/ReferenceData/UAM_US","lacsDBPath":"/user/hadoop/ReferenceData/
UAM_US"}';

-- set input configuration
set hivevar:inputconf='{"processType":"VALIDATE","performUSProcessing":true,
"outputStandardAddress":true,"outputAddressElements":false,"outputPostalData":false,
"outputParsedInput":false,"outputAddressBlocks":false,"outputFormattedOnFail":false,
"outputCasing":"MIXED","outputPostalCodeSeparator":true,"outputMultinationalCharacters":false,
"performDPV":false,"performRDI":false,"performESM":false,"performASM":false,
"performEWS":false,"performLACSLink":false,"performLOT":false,"failOnCMRAMatch":false,
"extractFirm":false,"extractUrb":false,"outputReport3553":false,"outputReportSummary":true,
"outputCASSDetail":false,"outputFieldLevelReturnCodes":false,"keepMultimatch":false,
"maximumResults":10,"standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT",
"standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE","cityNameFormat":
"CITY_FORMAT_STANDARD","vanityCityFormatLong":true,"outputCountryFormat":
"ENGLISH","homeCountry":"United States","streetMatchingStrictness":
"MATCHING_STRICTNESS_MEDIUM","firmMatchingStrictness"
:"MATCHING_STRICTNESS_MEDIUM","directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM",
"dualAddressLogic":"DUAL_NORMAL","dpvSuccessfulStatusCondition":
"DPV_CONDITON_ALWAYS","reportListFileName":"","reportlistProcessorName":"",
"reportlistNumber":1,"reportMailerAddress":"","reportMailerName":"",
"reportMailerCityLine":"","addressLineSearchOnFail":true,"outputStreetAlias":true,
"outputVeriMoveBlock":false,"dpvDetermineNoStat":false,"dpvDetermineVacancy":false,
"outputAbbreviatedAlias":false,"outputPreferredAlias":false,"outputPreferredCity":
"CITY_OVERRIDE_NAME_ZIP4","performSuiteLink":false,"suppressZplusPhantomCarrierR777":
false,"dpvSeedList":null,"lacsSeedList":null,"zipInputSet":null,
"reportName":null,"jobRequest":false,"properties":{"DPVDetermineVacancy":"N",
"DualAddressLogic":"N","PerformASM":"N","ExtractUrb":"N","OutputCasing":"M",
"AddressLineSearchOnFail":"Y","ReportListFileName":"",
"ReportMailerCityLine":"","OutputFormattedOnFail":"N","OutputFieldLevelReturnCodes":"N",
"OutputStreetNameAlias":"Y","ReportListProcessorName":"","OutputAddressBlocks":"N","ExtractFirm":
"N","DirectionalMatchingStrictness":"M","OutputPreferredCity":"Z","ReportListNumber":"1",
"FirmMatchingStrictness":"M","KeepMultimatch":"N","StandardAddressPMBLine":"N",
"OutputMultinationalCharacters":"N","PerformSuiteLink":"N","OutputShortCityName":"S",
"DPVSuccessfulStatusCondition":"A","PerformLACSLink":"N","PerformEWS":"N",
"OutputPostalCodeSeparator":"Y","FailOnCMRAMatch":"N","PerformLOT":
"N","StandardAddressFormat":"C","SuppressZplusPhantomCarrierR777":"N",
"OutputCountryFormat":"E","OutputRecordType":"A ",
"HomeCountry":"United States","ReportMailerAddress":"","OutputReport3553":"N",
"OutputVeriMoveDataBlock":"N","PerformRDI":"N","ReportMailerName":"","OutputAbbreviatedAlias":
"N","PerformESM":"N","PerformDPV":"N","OutputVanityCityFormatLong":"Y","OutputReportSummary":"Y",
"OutputPreferredAlias":"N","StreetMatchingStrictness":"M","DPVDetermineNoStat":"N","MaximumResults":
"10"}}';

-- set general configuration
set hivevar:generalconf='{"dFileType":"SPLIT","dMemoryModel":"MEDIUM","lacsLinkMemoryModel":
"MEDIUM","suiteLinkMemoryModel":"MEDIUM"}';

-- set process type
set hivevar:processtype='VALIDATE'; 

-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,City,DefectNumber,FirmName,PostalCode,
StateProvince';

-- Execute Query on the desired table, to display the job output on console. This query returns a 
map of key value pairs containing output fields for each row.
SELECT tmp2.record["Status"],tmp2.record["Status.Description"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["InputKeyValue"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},${hivevar:inputconf},
${hivevar:header},inputkeyvalue,addressline1,addressline2,city,defectnumber,firmname,
postalcode,stateprovince) as mygp from address_uam) as addressgroup LATERAL VIEW explode
(addressgroup.mygp) tmp2 as record ;

Reference data placed on HDFS and downloaded to working directory for jobs

-- Register Universal Addressing Module - US [UAM-US] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;

-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this 
job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.
UAMUSAddressingUDF';

--Provide reference data zip file to be added to cache
ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/UAM/Data.zip;

set hive.fetch.task.conversion=none;

-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin) ,G1RTS(path containing 
COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE in this configuration
set mapreduce.admin.user.env=LD_LIBRARY_PATH=/home/hduser/acushareInstall/modules/c1p/lib:/home/
hduser/acushareInstall/runtime/bin:/home/hduser/acushareInstall/runtime/lib,
ACU_RUNCBL_JNI_ONLOAD_DISABLE=1,G1RTS=/home/hduser/acushareInstall/runtime;

-- set engine configuration
set hivevar:engineconf='{"referenceData":{"dataDir":"/user/hduser/ReferenceData/AddressQuality/
UAM/Data.zip","referenceDataPathLocation":"HDFS"},"cobolRuntimePath":"","modulesDir":"","dpvDbPath":
"/user/hduser/ReferenceData/AddressQuality/UAM/Data.zip","suiteLinkDBPath":"/user/hduser/
ReferenceData/AddressQuality/UAM/Data.zip","ewsDBPath":"/user/hduser/ReferenceData/AddressQuality/
UAM/Data.zip","rdiDBPath":null,"lacsDBPath":"/user/hduser/ReferenceData/AddressQuality/
UAM/Data.zip"}';

-- set input configuration
set hivevar:inputconf='{"processType":"VALIDATE","performUSProcessing":true,
"outputStandardAddress":true,"outputAddressElements":false,"outputPostalData"
:false,"outputParsedInput":false,"outputAddressBlocks":false,"outputFormattedOnFail":false,
"outputCasing":"MIXED","outputPostalCodeSeparator":true,"outputMultinationalCharacters":false,
"performDPV":false,"performRDI":false,"performESM":false,"performASM":false,
"performEWS":false,"performLACSLink":false,"performLOT":false,"failOnCMRAMatch":false,
"extractFirm":false,"extractUrb":false,"outputReport3553":false,"outputReportSummary":true,
"outputCASSDetail":false,"outputFieldLevelReturnCodes":false,"keepMultimatch":false,
"maximumResults":10,"standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT",
"standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE","cityNameFormat":"CITY_FORMAT_STANDARD",
"vanityCityFormatLong":true,"outputCountryFormat":"ENGLISH","homeCountry":"United States",
"streetMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM","firmMatchingStrictness"
:"MATCHING_STRICTNESS_MEDIUM","directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM",
"dualAddressLogic":"DUAL_NORMAL","dpvSuccessfulStatusCondition":"DPV_CONDITON_ALWAYS",
"reportListFileName":"","reportlistProcessorName":"","reportlistNumber":1,"reportMailerAddress":"",
"reportMailerName":"","reportMailerCityLine":"","addressLineSearchOnFail"
:true,"outputStreetAlias":true,"outputVeriMoveBlock":false,"dpvDetermineNoStat":false,
"dpvDetermineVacancy":false,"outputAbbreviatedAlias":false,"outputPreferredAlias":false,
"outputPreferredCity":"CITY_OVERRIDE_NAME_ZIP4","performSuiteLink":false,
"suppressZplusPhantomCarrierR777":false,"dpvSeedList":null,"lacsSeedList":null,"zipInputSet":null,
"reportName":null,"jobRequest":false,"properties":{"DPVDetermineVacancy":"N","DualAddressLogic":"N",
"PerformASM":"N","ExtractUrb":"N","OutputCasing":"M","AddressLineSearchOnFail":"Y",
"ReportListFileName":"","ReportMailerCityLine":
"","OutputFormattedOnFail":"N","OutputFieldLevelReturnCodes":"N","OutputStreetNameAlias":"Y",
"ReportListProcessorName":"","OutputAddressBlocks":"N","ExtractFirm":"N",
"DirectionalMatchingStrictness":"M","OutputPreferredCity":"Z","ReportListNumber":"1",
"FirmMatchingStrictness":"M","KeepMultimatch":"N","StandardAddressPMBLine":"N",
"OutputMultinationalCharacters":"N","PerformSuiteLink":"N","OutputShortCityName":"S",
"DPVSuccessfulStatusCondition":"A","PerformLACSLink":"N","PerformEWS":"N",
"OutputPostalCodeSeparator":"Y","FailOnCMRAMatch":"N","PerformLOT":"N","StandardAddressFormat":
"C","SuppressZplusPhantomCarrierR777":"N","OutputCountryFormat":"E","OutputRecordType":"A   
","HomeCountry":"United States","ReportMailerAddress":"","OutputReport3553":"N",
"OutputVeriMoveDataBlock":"N","PerformRDI":"N","ReportMailerName":"","OutputAbbreviatedAlias":
"N","PerformESM":"N","PerformDPV":"N","OutputVanityCityFormatLong":"Y","OutputReportSummary":
"Y","OutputPreferredAlias":"N","StreetMatchingStrictness":"M","DPVDetermineNoStat":"N",
"MaximumResults":"10"}}';

-- set general configuration
set hivevar:generalconf='{"dFileType":"SPLIT","dMemoryModel":"MEDIUM","lacsLinkMemoryModel":
"MEDIUM","suiteLinkMemoryModel":"MEDIUM"}';

-- set reference path
set hivevar:location='/user/hduser/ReferenceData/AddressQuality/UAM/Data.zip';

-- set process type
set hivevar:processtype='VALIDATE'; 

-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,
City,DefectNumber,FirmName,PostalCode,StateProvince';

-- Execute Query on the desired table, to display the job output on console.
 This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Status"],tmp2.record["Status.Description"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["InputKeyValue"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},${hivevar:inputconf},
${hivevar:header},inputkeyvalue,addressline1,addressline2,city,defectnumber,firmname,postalcode,
stateprovince) as mygp from address_uam) as addressgroup LATERAL VIEW explode(addressgroup.mygp)
 tmp2 as record ;