Reference data placed on local node
-- Register Universal Addressing Module - US [UAM-US] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;
-- Provide alias to UDAF class (optional). String in quotes represent
class names needed for this job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.UAMUSAddressingUDF';
-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin)
,G1RTS(path containing COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE in this configuration
set mapreduce.admin.user.env=LD_LIBRARY_PATH=/home/hduser/~/runtime/lib:/home/hduser/~/runtime/
bin:/home/hduser/~/server/modules/universaladdress/lib,ACU_RUNCBL_JNI_ONLOAD_DISABLE=1,G1RTS=/
home/hduser/~/ ;
-- set engine configuration
set hivevar:engineconf='{"referenceData":{"dataDir":"/home/hduser/ReferenceData/AddressQuality/
UAM/Data","referenceDataPathLocation":"LocaltoDataNodes"},"cobolRuntimePath":"","modulesDir":"",
"dpvDbPath":"/home/hduser/ReferenceData/AddressQuality/UAM/Data","suiteLinkDBPath":"/home/hduser/
ReferenceData/AddressQuality/UAM/Data","ewsDBPath":"/home/hduser/ReferenceData/AddressQuality/UAM/
Data","rdiDBPath":"/home/hduser/ReferenceData/AddressQuality/UAM/Data","lacsDBPath":"/home/hduser/
ReferenceData/AddressQuality/UAM/Data"}';
-- set input configuration
set hivevar:inputconf='{"processType":"VALIDATE","performUSProcessing":true,
"outputStandardAddress":true,"outputAddressElements":false,"outputPostalData":false,
"outputParsedInput":false,"outputAddressBlocks":false,"outputFormattedOnFail":false,
"outputCasing":"MIXED","outputPostalCodeSeparator":true,
"outputMultinationalCharacters":false,"performDPV":false,"performRDI":false,"performESM":false,
"performASM":false,"performEWS":false,\"performLACSLink":false,"performLOT":
false,"failOnCMRAMatch":false,"extractFirm":false,"extractUrb":false,"outputReport3553":false,
"outputReportSummary":true,"outputCASSDetail":false,
"outputFieldLevelReturnCodes":false,"keepMultimatch":false,"maximumResults":10,
"standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT",
"standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE","cityNameFormat":"CITY_FORMAT_STANDARD",
"vanityCityFormatLong":true,"outputCountryFormat":"ENGLISH","homeCountry":"United States",
"streetMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM","firmMatchingStrictness":
"MATCHING_STRICTNESS_MEDIUM","directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM",
"dualAddressLogic":"DUAL_NORMAL","dpvSuccessfulStatusCondition":"DPV_CONDITON_ALWAYS",
"reportListFileName":"","reportlistProcessorName":"","reportlistNumber":1,"reportMailerAddress":
"","reportMailerName":"","reportMailerCityLine":"","addressLineSearchOnFail":true,
"outputStreetAlias":true,"outputVeriMoveBlock":false,"dpvDetermineNoStat":false,
"dpvDetermineVacancy":false,"outputAbbreviatedAlias":false,"outputPreferredAlias":false,
"outputPreferredCity":"CITY_OVERRIDE_NAME_ZIP4","performSuiteLink":false,
"suppressZplusPhantomCarrierR777":false,"dpvSeedList":null,"lacsSeedList":null,
"zipInputSet":null,"reportName":null,"jobRequest":false,"properties":{"DPVDetermineVacancy":"N",
"DualAddressLogic":"N","PerformASM":"N","ExtractUrb":"N","OutputCasing":"M",
"AddressLineSearchOnFail":"Y","ReportListFileName":"","ReportMailerCityLine":"",
"OutputFormattedOnFail":"N","OutputFieldLevelReturnCodes":"N","OutputStreetNameAlias":"Y",
"ReportListProcessorName":"","OutputAddressBlocks":"N","ExtractFirm":"N",
"DirectionalMatchingStrictness":"M","OutputPreferredCity":"Z","ReportListNumber":"1",
"FirmMatchingStrictness":"M","KeepMultimatch":"N","StandardAddressPMBLine":"N",
"OutputMultinationalCharacters":"N","PerformSuiteLink":"N","OutputShortCityName":"S",
"DPVSuccessfulStatusCondition":"A","PerformLACSLink":"N","PerformEWS":"N",
"OutputPostalCodeSeparator":"Y","FailOnCMRAMatch":"N","PerformLOT":"N",
"StandardAddressFormat":"C","SuppressZplusPhantomCarrierR777":"N","OutputCountryFormat":"E",
"OutputRecordType":"A ","HomeCountry":"United States","ReportMailerAddress":"",
"OutputReport3553":"N","OutputVeriMoveDataBlock":"N","PerformRDI":"N","ReportMailerName":"",
"OutputAbbreviatedAlias":"N","PerformESM":"N","PerformDPV":"N","OutputVanityCityFormatLong":"Y",
"OutputReportSummary":"Y","OutputPreferredAlias":"N","StreetMatchingStrictness":
"M","DPVDetermineNoStat":"N","MaximumResults":"10"}}';
-- set general configuration
set hivevar:generalconf='{"dFileType":"SPLIT","dMemoryModel":"MEDIUM","lacsLinkMemoryModel":
"MEDIUM","suiteLinkMemoryModel":"MEDIUM"}';
-- set reference path
set hivevar:location='/home/hduser/ReferenceData/AddressQuality/UAM/Data';
-- set process type
set hivevar:processtype='VALIDATE';
-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,City,DefectNumber,FirmName,PostalCode,
StateProvince';
-- Execute Query on the desired table, to display the job output on console. This query returns a
map of key value pairs containing output fields for each row.
SELECT tmp2.record["Confidence"],tmp2.record["AddressLine1"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},${hivevar:inputconf},
${hivevar:header},inputkeyvalue,firmname,addressline1,addressline2,city,stateprovince,postalcode,
text) from uam_us) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/' row format delimited FIELDS
TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["Confidence"],tmp2.record["AddressLine1"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},
${hivevar:inputconf},${hivevar:header},inputkeyvalue,firmname,addressline1,
addressline2,city,stateprovince,postalcode,text) from uam_us) as addressgroup
LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid | address.addressline1 | address.city | address.stateprovince | address.postalcode | address.country |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1 | 18 Merivale St | South Brisbane | QLD | 4101 | AUS |
| 2 | 19 Serpentine Rd | Albany | WA | 6330 | AUS |
| 3 | 317 VICTORIA ST GR | BRUNSWICK | VIC | 3056 | AUS |
| 4 | DUPLEX 6/16-18 O'CONNELL ST | AINSLIE | ACT | 2602 | AUS |
| 5 | LOT 154 470 BRYGON CREEK DR | UPPER COOMERA | QLD | 4209 | AUS |
| 6 | 16 GREENE ST | WARRAWONG | ACT | 2502 | AUS |
| 7 | UNIT 47/16 BLAIRMOUNT ST | PARKINSON | QLD | 4115 | AUS |
| 8 | 13-15 FRANCESCO CRES | BELLA VISTA | NSW | 2153 | AUS |
| 9 | 4 RYANS LANE | HEATHCOTE | VIC | 3523 | AUS |
| 10 | 1 CHRISTMAS LN | NORTH POLE | VIC | 1111 | AUS |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName |HouseNumber | AddressLine1 | AddressType |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00 | MERIVALE | 18 | 18 MERIVALE ST | S |
| 99.42 | SERPENTINE | 19 | 19 SERPENTINE RD E | S |
| 97.95 | VICTORIA | 317 | 317 VICTORIA ST | S |
| 100.00 | O'CONNELL | 16-18 | DUP 6 16-18 O'CONNELL ST | S |
| 0.00 | BRYGON CREEK | 470 | LOT 154 470 BRYGON CREEK DR | U |
| 76.99 | GREENE | 16 | 16 GREENE ST | S |
| 100.00 | BLAIRMOUNT | 16 | U 47 16 BLAIRMOUNT ST | S |
| 100.00 | FRANCESCO | 13-15 | 13-15 FRANCESCO CRES | S |
| 100.00 | RYANS | 4 | 4 RYANS LANE | S |
| 0.00 | CHRISTMAS | 1 | 1 CHRISTMAS LN | U |
+-----------+---------------+------------+------------------------------+---------------+
Reference data placed on HDFS and downloaded on local nodes for jobs
-- Register Universal Addressing Module - US [UAM-US] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;
-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this
job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.
UAMUSAddressingUDF';
set hive.fetch.task.conversion=none;
-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin)
,G1RTS(path containing COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE
in this configuration
set mapreduce.admin.user.env=LD_LIBRARY_PATH=/home/hduser/acushareInstall/modules/c1p/lib:/home/
hduser/acushareInstall/runtime/bin:/home/hduser/acushareInstall/runtime/lib,
ACU_RUNCBL_JNI_ONLOAD_DISABLE=1,G1RTS=/home/hduser/acushareInstall/runtime;
-- set engine configuration
set hivevar:engineconf='{"referenceData":{"referenceDataPathLocation":"HDFS","dataDir":
"/user/hadoop/ReferenceData/UAM_US","dataDownloader":{"dataDownloader":"HDFS","localFSRepository":
"/opt/ReferenceData/UAM-US"}},"cobolRuntimePath":"","modulesDir":"","acushareServiceRunning":false,
"unixVersion":"REDHAT7","acushareLicensePath":"","dpvDbPath":"/user/hadoop/ReferenceData/UAM_US",
"suiteLinkDBPath":"/user/hadoop/ReferenceData/UAM_US","ewsDBPath":"/user/hadoop/ReferenceData/
UAM_US","rdiDBPath":"/user/hadoop/ReferenceData/UAM_US","lacsDBPath":"/user/hadoop/ReferenceData/
UAM_US"}';
-- set input configuration
set hivevar:inputconf='{"processType":"VALIDATE","performUSProcessing":true,
"outputStandardAddress":true,"outputAddressElements":false,"outputPostalData":false,
"outputParsedInput":false,"outputAddressBlocks":false,"outputFormattedOnFail":false,
"outputCasing":"MIXED","outputPostalCodeSeparator":true,"outputMultinationalCharacters":false,
"performDPV":false,"performRDI":false,"performESM":false,"performASM":false,
"performEWS":false,"performLACSLink":false,"performLOT":false,"failOnCMRAMatch":false,
"extractFirm":false,"extractUrb":false,"outputReport3553":false,"outputReportSummary":true,
"outputCASSDetail":false,"outputFieldLevelReturnCodes":false,"keepMultimatch":false,
"maximumResults":10,"standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT",
"standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE","cityNameFormat":
"CITY_FORMAT_STANDARD","vanityCityFormatLong":true,"outputCountryFormat":
"ENGLISH","homeCountry":"United States","streetMatchingStrictness":
"MATCHING_STRICTNESS_MEDIUM","firmMatchingStrictness"
:"MATCHING_STRICTNESS_MEDIUM","directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM",
"dualAddressLogic":"DUAL_NORMAL","dpvSuccessfulStatusCondition":
"DPV_CONDITON_ALWAYS","reportListFileName":"","reportlistProcessorName":"",
"reportlistNumber":1,"reportMailerAddress":"","reportMailerName":"",
"reportMailerCityLine":"","addressLineSearchOnFail":true,"outputStreetAlias":true,
"outputVeriMoveBlock":false,"dpvDetermineNoStat":false,"dpvDetermineVacancy":false,
"outputAbbreviatedAlias":false,"outputPreferredAlias":false,"outputPreferredCity":
"CITY_OVERRIDE_NAME_ZIP4","performSuiteLink":false,"suppressZplusPhantomCarrierR777":
false,"dpvSeedList":null,"lacsSeedList":null,"zipInputSet":null,
"reportName":null,"jobRequest":false,"properties":{"DPVDetermineVacancy":"N",
"DualAddressLogic":"N","PerformASM":"N","ExtractUrb":"N","OutputCasing":"M",
"AddressLineSearchOnFail":"Y","ReportListFileName":"",
"ReportMailerCityLine":"","OutputFormattedOnFail":"N","OutputFieldLevelReturnCodes":"N",
"OutputStreetNameAlias":"Y","ReportListProcessorName":"","OutputAddressBlocks":"N","ExtractFirm":
"N","DirectionalMatchingStrictness":"M","OutputPreferredCity":"Z","ReportListNumber":"1",
"FirmMatchingStrictness":"M","KeepMultimatch":"N","StandardAddressPMBLine":"N",
"OutputMultinationalCharacters":"N","PerformSuiteLink":"N","OutputShortCityName":"S",
"DPVSuccessfulStatusCondition":"A","PerformLACSLink":"N","PerformEWS":"N",
"OutputPostalCodeSeparator":"Y","FailOnCMRAMatch":"N","PerformLOT":
"N","StandardAddressFormat":"C","SuppressZplusPhantomCarrierR777":"N",
"OutputCountryFormat":"E","OutputRecordType":"A ",
"HomeCountry":"United States","ReportMailerAddress":"","OutputReport3553":"N",
"OutputVeriMoveDataBlock":"N","PerformRDI":"N","ReportMailerName":"","OutputAbbreviatedAlias":
"N","PerformESM":"N","PerformDPV":"N","OutputVanityCityFormatLong":"Y","OutputReportSummary":"Y",
"OutputPreferredAlias":"N","StreetMatchingStrictness":"M","DPVDetermineNoStat":"N","MaximumResults":
"10"}}';
-- set general configuration
set hivevar:generalconf='{"dFileType":"SPLIT","dMemoryModel":"MEDIUM","lacsLinkMemoryModel":
"MEDIUM","suiteLinkMemoryModel":"MEDIUM"}';
-- set process type
set hivevar:processtype='VALIDATE';
-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,City,DefectNumber,FirmName,PostalCode,
StateProvince';
-- Execute Query on the desired table, to display the job output on console. This query returns a
map of key value pairs containing output fields for each row.
SELECT tmp2.record["Status"],tmp2.record["Status.Description"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["InputKeyValue"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},${hivevar:inputconf},
${hivevar:header},inputkeyvalue,addressline1,addressline2,city,defectnumber,firmname,
postalcode,stateprovince) as mygp from address_uam) as addressgroup LATERAL VIEW explode
(addressgroup.mygp) tmp2 as record ;
Reference data placed on HDFS and downloaded to working directory for jobs
-- Register Universal Addressing Module - US [UAM-US] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;
-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this
job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.
UAMUSAddressingUDF';
--Provide reference data zip file to be added to cache
ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/UAM/Data.zip;
set hive.fetch.task.conversion=none;
-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin) ,G1RTS(path containing
COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE in this configuration
set mapreduce.admin.user.env=LD_LIBRARY_PATH=/home/hduser/acushareInstall/modules/c1p/lib:/home/
hduser/acushareInstall/runtime/bin:/home/hduser/acushareInstall/runtime/lib,
ACU_RUNCBL_JNI_ONLOAD_DISABLE=1,G1RTS=/home/hduser/acushareInstall/runtime;
-- set engine configuration
set hivevar:engineconf='{"referenceData":{"dataDir":"/user/hduser/ReferenceData/AddressQuality/
UAM/Data.zip","referenceDataPathLocation":"HDFS"},"cobolRuntimePath":"","modulesDir":"","dpvDbPath":
"/user/hduser/ReferenceData/AddressQuality/UAM/Data.zip","suiteLinkDBPath":"/user/hduser/
ReferenceData/AddressQuality/UAM/Data.zip","ewsDBPath":"/user/hduser/ReferenceData/AddressQuality/
UAM/Data.zip","rdiDBPath":null,"lacsDBPath":"/user/hduser/ReferenceData/AddressQuality/
UAM/Data.zip"}';
-- set input configuration
set hivevar:inputconf='{"processType":"VALIDATE","performUSProcessing":true,
"outputStandardAddress":true,"outputAddressElements":false,"outputPostalData"
:false,"outputParsedInput":false,"outputAddressBlocks":false,"outputFormattedOnFail":false,
"outputCasing":"MIXED","outputPostalCodeSeparator":true,"outputMultinationalCharacters":false,
"performDPV":false,"performRDI":false,"performESM":false,"performASM":false,
"performEWS":false,"performLACSLink":false,"performLOT":false,"failOnCMRAMatch":false,
"extractFirm":false,"extractUrb":false,"outputReport3553":false,"outputReportSummary":true,
"outputCASSDetail":false,"outputFieldLevelReturnCodes":false,"keepMultimatch":false,
"maximumResults":10,"standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT",
"standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE","cityNameFormat":"CITY_FORMAT_STANDARD",
"vanityCityFormatLong":true,"outputCountryFormat":"ENGLISH","homeCountry":"United States",
"streetMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM","firmMatchingStrictness"
:"MATCHING_STRICTNESS_MEDIUM","directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM",
"dualAddressLogic":"DUAL_NORMAL","dpvSuccessfulStatusCondition":"DPV_CONDITON_ALWAYS",
"reportListFileName":"","reportlistProcessorName":"","reportlistNumber":1,"reportMailerAddress":"",
"reportMailerName":"","reportMailerCityLine":"","addressLineSearchOnFail"
:true,"outputStreetAlias":true,"outputVeriMoveBlock":false,"dpvDetermineNoStat":false,
"dpvDetermineVacancy":false,"outputAbbreviatedAlias":false,"outputPreferredAlias":false,
"outputPreferredCity":"CITY_OVERRIDE_NAME_ZIP4","performSuiteLink":false,
"suppressZplusPhantomCarrierR777":false,"dpvSeedList":null,"lacsSeedList":null,"zipInputSet":null,
"reportName":null,"jobRequest":false,"properties":{"DPVDetermineVacancy":"N","DualAddressLogic":"N",
"PerformASM":"N","ExtractUrb":"N","OutputCasing":"M","AddressLineSearchOnFail":"Y",
"ReportListFileName":"","ReportMailerCityLine":
"","OutputFormattedOnFail":"N","OutputFieldLevelReturnCodes":"N","OutputStreetNameAlias":"Y",
"ReportListProcessorName":"","OutputAddressBlocks":"N","ExtractFirm":"N",
"DirectionalMatchingStrictness":"M","OutputPreferredCity":"Z","ReportListNumber":"1",
"FirmMatchingStrictness":"M","KeepMultimatch":"N","StandardAddressPMBLine":"N",
"OutputMultinationalCharacters":"N","PerformSuiteLink":"N","OutputShortCityName":"S",
"DPVSuccessfulStatusCondition":"A","PerformLACSLink":"N","PerformEWS":"N",
"OutputPostalCodeSeparator":"Y","FailOnCMRAMatch":"N","PerformLOT":"N","StandardAddressFormat":
"C","SuppressZplusPhantomCarrierR777":"N","OutputCountryFormat":"E","OutputRecordType":"A
","HomeCountry":"United States","ReportMailerAddress":"","OutputReport3553":"N",
"OutputVeriMoveDataBlock":"N","PerformRDI":"N","ReportMailerName":"","OutputAbbreviatedAlias":
"N","PerformESM":"N","PerformDPV":"N","OutputVanityCityFormatLong":"Y","OutputReportSummary":
"Y","OutputPreferredAlias":"N","StreetMatchingStrictness":"M","DPVDetermineNoStat":"N",
"MaximumResults":"10"}}';
-- set general configuration
set hivevar:generalconf='{"dFileType":"SPLIT","dMemoryModel":"MEDIUM","lacsLinkMemoryModel":
"MEDIUM","suiteLinkMemoryModel":"MEDIUM"}';
-- set reference path
set hivevar:location='/user/hduser/ReferenceData/AddressQuality/UAM/Data.zip';
-- set process type
set hivevar:processtype='VALIDATE';
-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,
City,DefectNumber,FirmName,PostalCode,StateProvince';
-- Execute Query on the desired table, to display the job output on console.
This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Status"],tmp2.record["Status.Description"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["InputKeyValue"] FROM ( select uamvalidation
(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},${hivevar:inputconf},
${hivevar:header},inputkeyvalue,addressline1,addressline2,city,defectnumber,firmname,postalcode,
stateprovince) as mygp from address_uam) as addressgroup LATERAL VIEW explode(addressgroup.mygp)
tmp2 as record ;