Sample Hive Scripts - USA Addressing Validation

Reference data placed on local node

-- Register Universal Addressing Module [UAM-GAV(Global Address Validation)] BDQ Hive UDAF Jar 
ADD JAR <directory path>/gam.globaladdressvalidation.hive.${version}.jar;

-- Provide alias to UDF class (optional). String in quotes represent class names needed for 
this job to run.
CREATE TEMPORARY FUNCTION addressvalidation as
'com.pb.bdq.gam.process.hive.addressvalidation.AddressValidationUDF';

-- set Engine configuration
set hivevar:engineconf='{"productDatabaseInfoList":[{"referenceDataPath":
{"referenceDataPathLocation":"LocaltoDataNodes","dataDir":"/user/hadoop/ReferenceData/
GAV_US_DOM"},"countryCode":["USA"],"processType":"VALIDATE"}]}';

-- set input configuration
set hivevar:inputoption='{"casing":"Mixed","matchMode":"Relaxed","defaultCountry":"USA",
"maximumResults":2,"returnInputAddress":false,"returnParsedAddress":false,
"returnPrecisionCode":false,"returnCountrySpecificFields":true,"mustMatchAddressNumber":false,
"mustMatchStreet":false,"mustMatchCity":false,"mustMatchLocality":false,"mustMatchState":false,
"mustMatchStateProvince":false,"mustMatchPostCode":false,"preferPostalOverCity":false,
"cityFallback":true,"postalFallback":true,"validationLevel":"ADDRESS",
"preferPOBoxOverStreet":false,"allStreetMatching":true,"assignABBRCity":false,
"assignLot":true,"cassFlag":true,"convertSecToPMB":false,"dpv":true,"dpvCMRA":true,
"dpvDNA":true,"dpvIndicator":true,"dpvNDD":true,"dpvNSL":true,"dpvNoStat":true,
"dpvPBSA":true,"dpvThrowback":true,"dpvTieBreak":true,"dpvVacant":true,
"dualAddress":"POBoxPreference","earlyWarningSystem":true,"lacsLinkIndicator":true,
"logLevel":"Error","mailerAddress":"","mailerAddress2":"","mailerAddress3":"",
"mailerAddress4":"","mailerCityLine":"","mailerName":"","processingMode":"",
"r777Deliverable":false,"removeNoiseChars":true,"residentialDeliveryIndicator":true,
"returnAliasStreet":"AbbrPreferredAliasBase","returnInputFirm":false,
"returnSLKSecondary":"BothSuitelinkAndInput","standalonePMB":false,"standaloneUnit":false,
"suiteLinkIndicator":false,"vmDataBlock":false,"dataBlocks":false,"return3553Data":false,
"returnAdsInfo":false,"returnAlternate":false,"returnRunStatistics":false,
"returnSetupInfo":false}';

-- set header
set hivevar:header='AddressLine1,City,PostalCode,StateProvince,Country';

-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/opt/PitneyBowes/addressvalidation_us/out' row format 
delimited FIELDS TERMINATED BY '|' lines terminated by '\n' STORED AS TEXTFILE
SELECT 
coalesce(tmp2.record["AddressLine1"] ,''),
coalesce(tmp2.record["AddressBlock1"] ,''),
coalesce(tmp2.record["AddressBlock2"] ,''),
coalesce(tmp2.record["City"] ,''),
coalesce(tmp2.record["CitySubdivision"] ,''),
coalesce(tmp2.record["StateProvince"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision"] ,''),
coalesce(tmp2.record["PostalCode"] ,''),
coalesce(tmp2.record["Country"] ,''),
coalesce(tmp2.record["PrecisionCode"] ,''),
coalesce(tmp2.record["VendorCode"] ,''),
coalesce(tmp2.record["FirmName"] ,''),
coalesce(tmp2.record["PostalCode.AddOn"] ,''),
coalesce(tmp2.record["ProcessedBy"] ,''),
coalesce(tmp2.record["MultimatchCount"] ,''),
coalesce(tmp2.record["HouseNumber"] ,''),
coalesce(tmp2.record["StreetName"] ,''),
coalesce(tmp2.record["ApartmentLabel"] ,''),
coalesce(tmp2.record["ApartmentNumber"] ,''),
coalesce(tmp2.record["StreetType"] ,''),
coalesce(tmp2.record["Confidence"] ,''),
coalesce(tmp2.record["Building"] ,''),
coalesce(tmp2.record["POBox"] ,''),
coalesce(tmp2.record["Principality"] ,''),
coalesce(tmp2.record["LeadingDirectional"] ,''),
coalesce(tmp2.record["TrailingDirectional"] ,''),
coalesce(tmp2.record["MatchOnAllStreetFields"] ,''),
coalesce(tmp2.record["MatchOnStreetDirectional"] ,''),
coalesce(tmp2.record["AdditionalInputData"] ,''),
coalesce(tmp2.record["PlaceName"] ,''),
coalesce(tmp2.record["AddressLine1.Input"] ,''),
coalesce(tmp2.record["City.Input"] ,''),
coalesce(tmp2.record["CitySubdivision.Input"] ,''),
coalesce(tmp2.record["StateProvince.Input"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["PostalCode.Input"] ,''),
coalesce(tmp2.record["Country.Input"] ,''),
coalesce(tmp2.record["FirmName.Input"] ,''),
coalesce(tmp2.record["City.Matched"] ,''),
coalesce(tmp2.record["CitySubdivision.Matched"] ,''),
coalesce(tmp2.record["StateProvince.Matched"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Matched"] ,''),
coalesce(tmp2.record["StreetName.Matched"] ,''),
coalesce(tmp2.record["StreetType.Matched"] ,''),
coalesce(tmp2.record["Firmname.Matched"] ,''),
coalesce(tmp2.record["Housenumber.Matched"] ,''),
coalesce(tmp2.record["Postalcode.Matched"] ,''),
coalesce(tmp2.record["County.Matched"] ,''),
coalesce(tmp2.record["County"] ,''),
coalesce(tmp2.record["Status"] ,''),
coalesce(tmp2.record["Status.Code"] ,''),
coalesce(tmp2.record["Status.Description"] ,''),
coalesce(tmp2.record["AUS.Address.Class"] ,''),
coalesce(tmp2.record["AUS.Level.Number"] ,''),
coalesce(tmp2.record["AUS.Parcel.ID"] ,''),
coalesce(tmp2.record["AUS.PID"] ,''),
coalesce(tmp2.record["AUS.Principal.Pid"] ,''),
coalesce(tmp2.record["AUS.SA1"] ,''),
coalesce(tmp2.record["CAN.Census.CD"] ,''),
coalesce(tmp2.record["CAN.Census.CMA"] ,''),
coalesce(tmp2.record["CAN.Census.CSD"] ,''),
coalesce(tmp2.record["CAN.Census.CT"] ,''),
coalesce(tmp2.record["CAN.Census.DA"] ,''),
coalesce(tmp2.record["CAN.FormattedStreet.Range"] ,''),
coalesce(tmp2.record["CouldNotValidate"] ,''),
coalesce(tmp2.record["GBR.Aliased.Locality"] ,''),
coalesce(tmp2.record["GBR.Dependent.Locality"] ,''),
coalesce(tmp2.record["GBR.DependentStreet.Name"] ,''),
coalesce(tmp2.record["GBR.DoubleDependent.Locality"] ,''),
coalesce(tmp2.record["GBR.Historic.Postcode"] ,''),
coalesce(tmp2.record["GBR.OSAPR"] ,''),
coalesce(tmp2.record["GBR.RPC"] ,''),
coalesce(tmp2.record["GBR.UPRN"] ,''),
coalesce(tmp2.record["IRL.Eircode"] ,''),
coalesce(tmp2.record["ITA.Historical.Postcode"] ,''),
coalesce(tmp2.record["NZL.Aliased.SUBURB"] ,''),
coalesce(tmp2.record["ParsedAddressLine1.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentLable.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentNumber.Input"] ,''),
coalesce(tmp2.record["ParsedCity.Input"] ,''),
coalesce(tmp2.record["ParsedCitySubDivision.Input"] ,''),
coalesce(tmp2.record["ParsedCountry.Input"] ,''),
coalesce(tmp2.record["ParsedHouseNumber.Input"] ,''),
coalesce(tmp2.record["ParsedPlaceName.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeAddOn.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeBase.Input"] ,''),
coalesce(tmp2.record["ParsedPostStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedPreStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvince.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["Subcity"] ,''),
coalesce(tmp2.record["DualAddressParsed.Input"] ,''),
coalesce(tmp2.record["IND.ExtendedResultCode"] ,''),
coalesce(tmp2.record["IND.IsRural"] ,''),
coalesce(tmp2.record["IND.SubLocality"] ,''),
coalesce(tmp2.record["IND.POICategory"] ,''),
coalesce(tmp2.record["JPN.BANCHI"] ,''),
coalesce(tmp2.record["JPN.CHOMOKO"] ,''),
coalesce(tmp2.record["JPN.CHOOAZA"] ,''),
coalesce(tmp2.record["JPN.GO"] ,''),
coalesce(tmp2.record["JPN.JUSHOCODE"] ,''),
coalesce(tmp2.record["USA.DPV"] ,''),
coalesce(tmp2.record["USA.DPV.Footnote"] ,''),
coalesce(tmp2.record["USA.DPV.CMRA"] ,''),
coalesce(tmp2.record["USA.DPV.FalsePositive"] ,''),
coalesce(tmp2.record["USA.DPV.Flags"] ,''),
coalesce(tmp2.record["USA.DPV.NoStat"] ,''),
coalesce(tmp2.record["USA.POBoxOnly"] ,''),
coalesce(tmp2.record["USA.DPV.Vacant"] ,''),
coalesce(tmp2.record["USA.DPV.PBSAFound"] ,''),
coalesce(tmp2.record["USA.LACS"] ,''),
coalesce(tmp2.record["USA.LACS.SeedHit"] ,''),
coalesce(tmp2.record["USA.LACS.ReturnCode"] ,''),
coalesce(tmp2.record["USA.LACS.Indicator"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSAddress"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSRange"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSLeadingDirectional"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSStreetName"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSSuffix"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSTrailingDirectional"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSUnitD"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSUnitN"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSMatchedZIP"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSMatchedZIP4"] ,''),
coalesce(tmp2.record["USA.LOTCode"] ,''),
coalesce(tmp2.record["USA.LOTSequence"] ,''),
coalesce(tmp2.record["USA.SuiteLink.ReturnCode"] ,''),
coalesce(tmp2.record["USA.SuiteLink.MatchCode"] ,''),
coalesce(tmp2.record["USA.SuiteLink.Fidelity"] ,''),
coalesce(tmp2.record["VeriMoveDataBlock"] ,''),
coalesce(tmp2.record["USA.CarrierRouteCode"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Type"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Input"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Type.Input"] ,''),
coalesce(tmp2.record["USA.RDI"] ,''),
coalesce(tmp2.record["USA.CASSAddressLine1"] ,''),
coalesce(tmp2.record["USA.CASSAddressLine2"] ,''),
coalesce(tmp2.record["USA.CASSCityName"] ,''),
coalesce(tmp2.record["USA.FullCityName"] ,''),
coalesce(tmp2.record["USA.AbbreviatedCityName"] ,''),
coalesce(tmp2.record["USA.NonMailingCityName"] ,''),
coalesce(tmp2.record["USA.PreferredCityName"] ,''),
coalesce(tmp2.record["USA.PreferredState"] ,''),
coalesce(tmp2.record["USA.EWSFailure"] ,''),
coalesce(tmp2.record["USA.MatchLevel"] ,''),
coalesce(tmp2.record["USA.DefaultMatch"] ,''),
coalesce(tmp2.record["USA.Status"] ,''),
coalesce(tmp2.record["USA.Status.Code"] ,''),
coalesce(tmp2.record["USA.Status.Description"] ,''),
coalesce(tmp2.record["USA.PostalBarcode"] ,''),
coalesce(tmp2.record["USA.BCCheckDigit"] ,''),
coalesce(tmp2.record["USA.AdvancedBarcode"] ,''),
coalesce(tmp2.record["USA.FiveDigitBarcode"] ,''),
coalesce(tmp2.record["USA.ZIPValid"] ,''),
coalesce(tmp2.record["USA.ZIP4Valid"] ,''),
coalesce(tmp2.record["USA.AddressLocation"] ,''),
coalesce(tmp2.record["USA.CongressionalDistrict"] ,''),
coalesce(tmp2.record["USA.FIPSCountyNumber"] ,''),
coalesce(tmp2.record["USA.AltStreetType"] ,''),
coalesce(tmp2.record["USA.AltStreet"] ,''),
coalesce(tmp2.record["USA.Parsed.Range"] ,''),
coalesce(tmp2.record["USA.Parsed.PreDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.StreetName"] ,''),
coalesce(tmp2.record["USA.Parsed.PostDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.StreetSuffix"] ,''),
coalesce(tmp2.record["USA.Parsed.UnitDesignator"] ,''),
coalesce(tmp2.record["USA.Parsed.UnitNumber"] ,''),
coalesce(tmp2.record["USA.Parsed.Unit2Designator"] ,''),
coalesce(tmp2.record["USA.Parsed.Unit2Number"] ,''),
coalesce(tmp2.record["USA.Parsed.PMUnitDesignator"] ,''),
coalesce(tmp2.record["USA.Parsed.PMUnitNumber"] ,''),
coalesce(tmp2.record["USA.Parsed.AltStreet"] ,''),
coalesce(tmp2.record["USA.Parsed.AltRange"] ,''),
coalesce(tmp2.record["USA.Parsed.AltPreDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.AltStreetName"] ,''),
coalesce(tmp2.record["USA.Parsed.cAltPostDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.cAltStreetSuffix"], '') FROM 
(SELECT  addressvalidation(${hivevar:engineconf},${hivevar:inputoption},
${hivevar:header},AddressLine1,City,PostalCode,StateProvince,Country) as mygp from usaddress )
as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record;

Reference data placed on HDFS and downloaded on local nodes for jobs

-- Register Universal Addressing Module [UAM-GAV(Global Address Validation)] BDQ Hive UDAF Jar 
ADD JAR <directory path>/gam.globaladdressvalidation.hive.${version}.jar;

-- Provide alias to UDF class (optional). String in quotes represent class names needed for 
this job to run.
CREATE TEMPORARY FUNCTION addressvalidation as 
'com.pb.bdq.gam.process.hive.addressvalidation.AddressValidationUDF';

-- set Engine configuration
set hivevar:engineconf='{"productDatabaseInfoList":[{"referenceDataPath":
{"referenceDataPathLocation":"HDFS","dataDir":"/user/hadoop/RefrenceData/AddressValidation",
"dataDownloader":{"dataDownloader":"HDFS","localFSRepository":
"/opt/PitneyBowes/ReferenceData/AddressValidation"}},"countryCode":["USA"],
"processType":"VALIDATE"}]}';

-- set input configuration
set hivevar:inputoption='{"casing":"Mixed","matchMode":"Relaxed","defaultCountry":"USA",
"maximumResults":2,"returnInputAddress":false,"returnParsedAddress":false,
"returnPrecisionCode":false,"returnCountrySpecificFields":true,
"mustMatchAddressNumber":false,"mustMatchStreet":false,"mustMatchCity":false,
"mustMatchLocality":false,"mustMatchState":false,"mustMatchStateProvince":false,
"mustMatchPostCode":false,"preferPostalOverCity":false,"cityFallback":true,
"postalFallback":true,"validationLevel":"ADDRESS","preferPOBoxOverStreet":false,
"allStreetMatching":true,"assignABBRCity":false,"assignLot":true,"cassFlag":true,
"convertSecToPMB":false,"dpv":true,"dpvCMRA":true,"dpvDNA":true,"dpvIndicator":true,
"dpvNDD":true,"dpvNSL":true,"dpvNoStat":true,"dpvPBSA":true,"dpvThrowback":true,
"dpvTieBreak":true,"dpvVacant":true,"dualAddress":"POBoxPreference",
"earlyWarningSystem":true,"lacsLinkIndicator":true,"logLevel":"Error",
"mailerAddress":"","mailerAddress2":"","mailerAddress3":"","mailerAddress4":"",
"mailerCityLine":"","mailerName":"","processingMode":"","r777Deliverable":false,
"removeNoiseChars":true,"residentialDeliveryIndicator":true,
"returnAliasStreet":"AbbrPreferredAliasBase","returnInputFirm":false,
"returnSLKSecondary":"BothSuitelinkAndInput","standalonePMB":false,"standaloneUnit":false,
"suiteLinkIndicator":false,"vmDataBlock":false,"dataBlocks":false,"return3553Data":false,
"returnAdsInfo":false,"returnAlternate":false,"returnRunStatistics":false,
"returnSetupInfo":false}';

-- set header
set hivevar:header='AddressLine1,City,PostalCode,StateProvince,Country';

-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/opt/PitneyBowes/addressvalidation_us/out' 
row format delimited FIELDS TERMINATED BY '|' lines terminated by '\n' STORED AS TEXTFILE
SELECT 
coalesce(tmp2.record["AddressLine1"] ,''),
coalesce(tmp2.record["AddressBlock1"] ,''),
coalesce(tmp2.record["AddressBlock2"] ,''),
coalesce(tmp2.record["City"] ,''),
coalesce(tmp2.record["CitySubdivision"] ,''),
coalesce(tmp2.record["StateProvince"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision"] ,''),
coalesce(tmp2.record["PostalCode"] ,''),
coalesce(tmp2.record["Country"] ,''),
coalesce(tmp2.record["PrecisionCode"] ,''),
coalesce(tmp2.record["VendorCode"] ,''),
coalesce(tmp2.record["FirmName"] ,''),
coalesce(tmp2.record["PostalCode.AddOn"] ,''),
coalesce(tmp2.record["ProcessedBy"] ,''),
coalesce(tmp2.record["MultimatchCount"] ,''),
coalesce(tmp2.record["HouseNumber"] ,''),
coalesce(tmp2.record["StreetName"] ,''),
coalesce(tmp2.record["ApartmentLabel"] ,''),
coalesce(tmp2.record["ApartmentNumber"] ,''),
coalesce(tmp2.record["StreetType"] ,''),
coalesce(tmp2.record["Confidence"] ,''),
coalesce(tmp2.record["Building"] ,''),
coalesce(tmp2.record["POBox"] ,''),
coalesce(tmp2.record["Principality"] ,''),
coalesce(tmp2.record["LeadingDirectional"] ,''),
coalesce(tmp2.record["TrailingDirectional"] ,''),
coalesce(tmp2.record["MatchOnAllStreetFields"] ,''),
coalesce(tmp2.record["MatchOnStreetDirectional"] ,''),
coalesce(tmp2.record["AdditionalInputData"] ,''),
coalesce(tmp2.record["PlaceName"] ,''),
coalesce(tmp2.record["AddressLine1.Input"] ,''),
coalesce(tmp2.record["City.Input"] ,''),
coalesce(tmp2.record["CitySubdivision.Input"] ,''),
coalesce(tmp2.record["StateProvince.Input"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["PostalCode.Input"] ,''),
coalesce(tmp2.record["Country.Input"] ,''),
coalesce(tmp2.record["FirmName.Input"] ,''),
coalesce(tmp2.record["City.Matched"] ,''),
coalesce(tmp2.record["CitySubdivision.Matched"] ,''),
coalesce(tmp2.record["StateProvince.Matched"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Matched"] ,''),
coalesce(tmp2.record["StreetName.Matched"] ,''),
coalesce(tmp2.record["StreetType.Matched"] ,''),
coalesce(tmp2.record["Firmname.Matched"] ,''),
coalesce(tmp2.record["Housenumber.Matched"] ,''),
coalesce(tmp2.record["Postalcode.Matched"] ,''),
coalesce(tmp2.record["County.Matched"] ,''),
coalesce(tmp2.record["County"] ,''),
coalesce(tmp2.record["Status"] ,''),
coalesce(tmp2.record["Status.Code"] ,''),
coalesce(tmp2.record["Status.Description"] ,''),
coalesce(tmp2.record["AUS.Address.Class"] ,''),
coalesce(tmp2.record["AUS.Level.Number"] ,''),
coalesce(tmp2.record["AUS.Parcel.ID"] ,''),
coalesce(tmp2.record["AUS.PID"] ,''),
coalesce(tmp2.record["AUS.Principal.Pid"] ,''),
coalesce(tmp2.record["AUS.SA1"] ,''),
coalesce(tmp2.record["CAN.Census.CD"] ,''),
coalesce(tmp2.record["CAN.Census.CMA"] ,''),
coalesce(tmp2.record["CAN.Census.CSD"] ,''),
coalesce(tmp2.record["CAN.Census.CT"] ,''),
coalesce(tmp2.record["CAN.Census.DA"] ,''),
coalesce(tmp2.record["CAN.FormattedStreet.Range"] ,''),
coalesce(tmp2.record["CouldNotValidate"] ,''),
coalesce(tmp2.record["GBR.Aliased.Locality"] ,''),
coalesce(tmp2.record["GBR.Dependent.Locality"] ,''),
coalesce(tmp2.record["GBR.DependentStreet.Name"] ,''),
coalesce(tmp2.record["GBR.DoubleDependent.Locality"] ,''),
coalesce(tmp2.record["GBR.Historic.Postcode"] ,''),
coalesce(tmp2.record["GBR.OSAPR"] ,''),
coalesce(tmp2.record["GBR.RPC"] ,''),
coalesce(tmp2.record["GBR.UPRN"] ,''),
coalesce(tmp2.record["IRL.Eircode"] ,''),
coalesce(tmp2.record["ITA.Historical.Postcode"] ,''),
coalesce(tmp2.record["NZL.Aliased.SUBURB"] ,''),
coalesce(tmp2.record["ParsedAddressLine1.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentLable.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentNumber.Input"] ,''),
coalesce(tmp2.record["ParsedCity.Input"] ,''),
coalesce(tmp2.record["ParsedCitySubDivision.Input"] ,''),
coalesce(tmp2.record["ParsedCountry.Input"] ,''),
coalesce(tmp2.record["ParsedHouseNumber.Input"] ,''),
coalesce(tmp2.record["ParsedPlaceName.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeAddOn.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeBase.Input"] ,''),
coalesce(tmp2.record["ParsedPostStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedPreStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvince.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["Subcity"] ,''),
coalesce(tmp2.record["DualAddressParsed.Input"] ,''),
coalesce(tmp2.record["IND.ExtendedResultCode"] ,''),
coalesce(tmp2.record["IND.IsRural"] ,''),
coalesce(tmp2.record["IND.SubLocality"] ,''),
coalesce(tmp2.record["IND.POICategory"] ,''),
coalesce(tmp2.record["JPN.BANCHI"] ,''),
coalesce(tmp2.record["JPN.CHOMOKO"] ,''),
coalesce(tmp2.record["JPN.CHOOAZA"] ,''),
coalesce(tmp2.record["JPN.GO"] ,''),
coalesce(tmp2.record["JPN.JUSHOCODE"] ,''),
coalesce(tmp2.record["USA.DPV"] ,''),
coalesce(tmp2.record["USA.DPV.Footnote"] ,''),
coalesce(tmp2.record["USA.DPV.CMRA"] ,''),
coalesce(tmp2.record["USA.DPV.FalsePositive"] ,''),
coalesce(tmp2.record["USA.DPV.Flags"] ,''),
coalesce(tmp2.record["USA.DPV.NoStat"] ,''),
coalesce(tmp2.record["USA.POBoxOnly"] ,''),
coalesce(tmp2.record["USA.DPV.Vacant"] ,''),
coalesce(tmp2.record["USA.DPV.PBSAFound"] ,''),
coalesce(tmp2.record["USA.LACS"] ,''),
coalesce(tmp2.record["USA.LACS.SeedHit"] ,''),
coalesce(tmp2.record["USA.LACS.ReturnCode"] ,''),
coalesce(tmp2.record["USA.LACS.Indicator"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSAddress"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSRange"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSLeadingDirectional"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSStreetName"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSSuffix"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSTrailingDirectional"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSUnitD"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSUnitN"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSMatchedZIP"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSMatchedZIP4"] ,''),
coalesce(tmp2.record["USA.LOTCode"] ,''),
coalesce(tmp2.record["USA.LOTSequence"] ,''),
coalesce(tmp2.record["USA.SuiteLink.ReturnCode"] ,''),
coalesce(tmp2.record["USA.SuiteLink.MatchCode"] ,''),
coalesce(tmp2.record["USA.SuiteLink.Fidelity"] ,''),
coalesce(tmp2.record["VeriMoveDataBlock"] ,''),
coalesce(tmp2.record["USA.CarrierRouteCode"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Type"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Input"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Type.Input"] ,''),
coalesce(tmp2.record["USA.RDI"] ,''),
coalesce(tmp2.record["USA.CASSAddressLine1"] ,''),
coalesce(tmp2.record["USA.CASSAddressLine2"] ,''),
coalesce(tmp2.record["USA.CASSCityName"] ,''),
coalesce(tmp2.record["USA.FullCityName"] ,''),
coalesce(tmp2.record["USA.AbbreviatedCityName"] ,''),
coalesce(tmp2.record["USA.NonMailingCityName"] ,''),
coalesce(tmp2.record["USA.PreferredCityName"] ,''),
coalesce(tmp2.record["USA.PreferredState"] ,''),
coalesce(tmp2.record["USA.EWSFailure"] ,''),
coalesce(tmp2.record["USA.MatchLevel"] ,''),
coalesce(tmp2.record["USA.DefaultMatch"] ,''),
coalesce(tmp2.record["USA.Status"] ,''),
coalesce(tmp2.record["USA.Status.Code"] ,''),
coalesce(tmp2.record["USA.Status.Description"] ,''),
coalesce(tmp2.record["USA.PostalBarcode"] ,''),
coalesce(tmp2.record["USA.BCCheckDigit"] ,''),
coalesce(tmp2.record["USA.AdvancedBarcode"] ,''),
coalesce(tmp2.record["USA.FiveDigitBarcode"] ,''),
coalesce(tmp2.record["USA.ZIPValid"] ,''),
coalesce(tmp2.record["USA.ZIP4Valid"] ,''),
coalesce(tmp2.record["USA.AddressLocation"] ,''),
coalesce(tmp2.record["USA.CongressionalDistrict"] ,''),
coalesce(tmp2.record["USA.FIPSCountyNumber"] ,''),
coalesce(tmp2.record["USA.AltStreetType"] ,''),
coalesce(tmp2.record["USA.AltStreet"] ,''),
coalesce(tmp2.record["USA.Parsed.Range"] ,''),
coalesce(tmp2.record["USA.Parsed.PreDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.StreetName"] ,''),
coalesce(tmp2.record["USA.Parsed.PostDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.StreetSuffix"] ,''),
coalesce(tmp2.record["USA.Parsed.UnitDesignator"] ,''),
coalesce(tmp2.record["USA.Parsed.UnitNumber"] ,''),
coalesce(tmp2.record["USA.Parsed.Unit2Designator"] ,''),
coalesce(tmp2.record["USA.Parsed.Unit2Number"] ,''),
coalesce(tmp2.record["USA.Parsed.PMUnitDesignator"] ,''),
coalesce(tmp2.record["USA.Parsed.PMUnitNumber"] ,''),
coalesce(tmp2.record["USA.Parsed.AltStreet"] ,''),
coalesce(tmp2.record["USA.Parsed.AltRange"] ,''),
coalesce(tmp2.record["USA.Parsed.AltPreDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.AltStreetName"] ,''),
coalesce(tmp2.record["USA.Parsed.cAltPostDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.cAltStreetSuffix"], '') 
FROM (SELECT  addressvalidation(${hivevar:engineconf},${hivevar:inputoption},
${hivevar:header},AddressLine1,City,PostalCode,StateProvince,Country) as mygp from usaddress )
as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record;

Reference data placed on HDFS and downloaded to working directory for jobs

-- Register Universal Addressing Module [UAM-GAV(Global Address Validation)] BDQ Hive UDAF 
Jar 
ADD JAR <directory path>/gam.globaladdressvalidation.hive.${version}.jar;

-- Provide alias to UDF class (optional). String in quotes represent class names needed for 
this job to run.
CREATE TEMPORARY FUNCTION addressvalidation as
 'com.pb.bdq.gam.process.hive.addressvalidation.AddressValidationUDF';

ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/GAM/GAV_US_DOM.spd;

-- set Engine configuration
set hivevar:engineconf='{"productDatabaseInfoList":[{"referenceDataPath":
{"referenceDataPathLocation":"HDFS","dataDir":"/user/hadoop/RefrenceData/AddressValidation",
"dataDownloader":{"dataDownloader":"DC"}},"countryCode":["USA"],"processType":"VALIDATE"}]}';

-- set input configuration
set hivevar:inputoption='{"casing":"Mixed","matchMode":"Relaxed","defaultCountry":"USA",
"maximumResults":2,"returnInputAddress":false,"returnParsedAddress":false,
"returnPrecisionCode":false,"returnCountrySpecificFields":true,
"mustMatchAddressNumber":false,"mustMatchStreet":false,"mustMatchCity":false,
"mustMatchLocality":false,"mustMatchState":false,"mustMatchStateProvince":false,
"mustMatchPostCode":false,"preferPostalOverCity":false,"cityFallback":true,
"postalFallback":true,"validationLevel":"ADDRESS","preferPOBoxOverStreet":false,
"allStreetMatching":true,"assignABBRCity":false,"assignLot":true,"cassFlag":true,
"convertSecToPMB":false,"dpv":true,"dpvCMRA":true,"dpvDNA":true,"dpvIndicator":true,
"dpvNDD":true,"dpvNSL":true,"dpvNoStat":true,"dpvPBSA":true,"dpvThrowback":true,
"dpvTieBreak":true,"dpvVacant":true,"dualAddress":"POBoxPreference",
"earlyWarningSystem":true,"lacsLinkIndicator":true,"logLevel":"Error",
"mailerAddress":"","mailerAddress2":"","mailerAddress3":"","mailerAddress4":"",
"mailerCityLine":"","mailerName":"","processingMode":"","r777Deliverable":false,
"removeNoiseChars":true,"residentialDeliveryIndicator":true,
"returnAliasStreet":"AbbrPreferredAliasBase","returnInputFirm":false,
"returnSLKSecondary":"BothSuitelinkAndInput","standalonePMB":false,
"standaloneUnit":false,"suiteLinkIndicator":false,"vmDataBlock":false,
"dataBlocks":false,"return3553Data":false,"returnAdsInfo":false,
"returnAlternate":false,"returnRunStatistics":false,"returnSetupInfo":false}';

-- set header
set hivevar:header='AddressLine1,City,PostalCode,StateProvince,Country';

-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/opt/PitneyBowes/addressvalidation_us/out' 
row format delimited FIELDS TERMINATED BY '|' lines terminated by '\n' STORED AS TEXTFILE
SELECT 
coalesce(tmp2.record["AddressLine1"] ,''),
coalesce(tmp2.record["AddressBlock1"] ,''),
coalesce(tmp2.record["AddressBlock2"] ,''),
coalesce(tmp2.record["City"] ,''),
coalesce(tmp2.record["CitySubdivision"] ,''),
coalesce(tmp2.record["StateProvince"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision"] ,''),
coalesce(tmp2.record["PostalCode"] ,''),
coalesce(tmp2.record["Country"] ,''),
coalesce(tmp2.record["PrecisionCode"] ,''),
coalesce(tmp2.record["VendorCode"] ,''),
coalesce(tmp2.record["FirmName"] ,''),
coalesce(tmp2.record["PostalCode.AddOn"] ,''),
coalesce(tmp2.record["ProcessedBy"] ,''),
coalesce(tmp2.record["MultimatchCount"] ,''),
coalesce(tmp2.record["HouseNumber"] ,''),
coalesce(tmp2.record["StreetName"] ,''),
coalesce(tmp2.record["ApartmentLabel"] ,''),
coalesce(tmp2.record["ApartmentNumber"] ,''),
coalesce(tmp2.record["StreetType"] ,''),
coalesce(tmp2.record["Confidence"] ,''),
coalesce(tmp2.record["Building"] ,''),
coalesce(tmp2.record["POBox"] ,''),
coalesce(tmp2.record["Principality"] ,''),
coalesce(tmp2.record["LeadingDirectional"] ,''),
coalesce(tmp2.record["TrailingDirectional"] ,''),
coalesce(tmp2.record["MatchOnAllStreetFields"] ,''),
coalesce(tmp2.record["MatchOnStreetDirectional"] ,''),
coalesce(tmp2.record["AdditionalInputData"] ,''),
coalesce(tmp2.record["PlaceName"] ,''),
coalesce(tmp2.record["AddressLine1.Input"] ,''),
coalesce(tmp2.record["City.Input"] ,''),
coalesce(tmp2.record["CitySubdivision.Input"] ,''),
coalesce(tmp2.record["StateProvince.Input"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["PostalCode.Input"] ,''),
coalesce(tmp2.record["Country.Input"] ,''),
coalesce(tmp2.record["FirmName.Input"] ,''),
coalesce(tmp2.record["City.Matched"] ,''),
coalesce(tmp2.record["CitySubdivision.Matched"] ,''),
coalesce(tmp2.record["StateProvince.Matched"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Matched"] ,''),
coalesce(tmp2.record["StreetName.Matched"] ,''),
coalesce(tmp2.record["StreetType.Matched"] ,''),
coalesce(tmp2.record["Firmname.Matched"] ,''),
coalesce(tmp2.record["Housenumber.Matched"] ,''),
coalesce(tmp2.record["Postalcode.Matched"] ,''),
coalesce(tmp2.record["County.Matched"] ,''),
coalesce(tmp2.record["County"] ,''),
coalesce(tmp2.record["Status"] ,''),
coalesce(tmp2.record["Status.Code"] ,''),
coalesce(tmp2.record["Status.Description"] ,''),
coalesce(tmp2.record["AUS.Address.Class"] ,''),
coalesce(tmp2.record["AUS.Level.Number"] ,''),
coalesce(tmp2.record["AUS.Parcel.ID"] ,''),
coalesce(tmp2.record["AUS.PID"] ,''),
coalesce(tmp2.record["AUS.Principal.Pid"] ,''),
coalesce(tmp2.record["AUS.SA1"] ,''),
coalesce(tmp2.record["CAN.Census.CD"] ,''),
coalesce(tmp2.record["CAN.Census.CMA"] ,''),
coalesce(tmp2.record["CAN.Census.CSD"] ,''),
coalesce(tmp2.record["CAN.Census.CT"] ,''),
coalesce(tmp2.record["CAN.Census.DA"] ,''),
coalesce(tmp2.record["CAN.FormattedStreet.Range"] ,''),
coalesce(tmp2.record["CouldNotValidate"] ,''),
coalesce(tmp2.record["GBR.Aliased.Locality"] ,''),
coalesce(tmp2.record["GBR.Dependent.Locality"] ,''),
coalesce(tmp2.record["GBR.DependentStreet.Name"] ,''),
coalesce(tmp2.record["GBR.DoubleDependent.Locality"] ,''),
coalesce(tmp2.record["GBR.Historic.Postcode"] ,''),
coalesce(tmp2.record["GBR.OSAPR"] ,''),
coalesce(tmp2.record["GBR.RPC"] ,''),
coalesce(tmp2.record["GBR.UPRN"] ,''),
coalesce(tmp2.record["IRL.Eircode"] ,''),
coalesce(tmp2.record["ITA.Historical.Postcode"] ,''),
coalesce(tmp2.record["NZL.Aliased.SUBURB"] ,''),
coalesce(tmp2.record["ParsedAddressLine1.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentLable.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentNumber.Input"] ,''),
coalesce(tmp2.record["ParsedCity.Input"] ,''),
coalesce(tmp2.record["ParsedCitySubDivision.Input"] ,''),
coalesce(tmp2.record["ParsedCountry.Input"] ,''),
coalesce(tmp2.record["ParsedHouseNumber.Input"] ,''),
coalesce(tmp2.record["ParsedPlaceName.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeAddOn.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeBase.Input"] ,''),
coalesce(tmp2.record["ParsedPostStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedPreStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvince.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["Subcity"] ,''),
coalesce(tmp2.record["DualAddressParsed.Input"] ,''),
coalesce(tmp2.record["IND.ExtendedResultCode"] ,''),
coalesce(tmp2.record["IND.IsRural"] ,''),
coalesce(tmp2.record["IND.SubLocality"] ,''),
coalesce(tmp2.record["IND.POICategory"] ,''),
coalesce(tmp2.record["JPN.BANCHI"] ,''),
coalesce(tmp2.record["JPN.CHOMOKO"] ,''),
coalesce(tmp2.record["JPN.CHOOAZA"] ,''),
coalesce(tmp2.record["JPN.GO"] ,''),
coalesce(tmp2.record["JPN.JUSHOCODE"] ,''),
coalesce(tmp2.record["USA.DPV"] ,''),
coalesce(tmp2.record["USA.DPV.Footnote"] ,''),
coalesce(tmp2.record["USA.DPV.CMRA"] ,''),
coalesce(tmp2.record["USA.DPV.FalsePositive"] ,''),
coalesce(tmp2.record["USA.DPV.Flags"] ,''),
coalesce(tmp2.record["USA.DPV.NoStat"] ,''),
coalesce(tmp2.record["USA.POBoxOnly"] ,''),
coalesce(tmp2.record["USA.DPV.Vacant"] ,''),
coalesce(tmp2.record["USA.DPV.PBSAFound"] ,''),
coalesce(tmp2.record["USA.LACS"] ,''),
coalesce(tmp2.record["USA.LACS.SeedHit"] ,''),
coalesce(tmp2.record["USA.LACS.ReturnCode"] ,''),
coalesce(tmp2.record["USA.LACS.Indicator"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSAddress"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSRange"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSLeadingDirectional"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSStreetName"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSSuffix"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSTrailingDirectional"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSUnitD"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSUnitN"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSMatchedZIP"] ,''),
coalesce(tmp2.record["USA.LACS.PreLACSMatchedZIP4"] ,''),
coalesce(tmp2.record["USA.LOTCode"] ,''),
coalesce(tmp2.record["USA.LOTSequence"] ,''),
coalesce(tmp2.record["USA.SuiteLink.ReturnCode"] ,''),
coalesce(tmp2.record["USA.SuiteLink.MatchCode"] ,''),
coalesce(tmp2.record["USA.SuiteLink.Fidelity"] ,''),
coalesce(tmp2.record["VeriMoveDataBlock"] ,''),
coalesce(tmp2.record["USA.CarrierRouteCode"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Type"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Input"] ,''),
coalesce(tmp2.record["USA.PrivateMailbox.Type.Input"] ,''),
coalesce(tmp2.record["USA.RDI"] ,''),
coalesce(tmp2.record["USA.CASSAddressLine1"] ,''),
coalesce(tmp2.record["USA.CASSAddressLine2"] ,''),
coalesce(tmp2.record["USA.CASSCityName"] ,''),
coalesce(tmp2.record["USA.FullCityName"] ,''),
coalesce(tmp2.record["USA.AbbreviatedCityName"] ,''),
coalesce(tmp2.record["USA.NonMailingCityName"] ,''),
coalesce(tmp2.record["USA.PreferredCityName"] ,''),
coalesce(tmp2.record["USA.PreferredState"] ,''),
coalesce(tmp2.record["USA.EWSFailure"] ,''),
coalesce(tmp2.record["USA.MatchLevel"] ,''),
coalesce(tmp2.record["USA.DefaultMatch"] ,''),
coalesce(tmp2.record["USA.Status"] ,''),
coalesce(tmp2.record["USA.Status.Code"] ,''),
coalesce(tmp2.record["USA.Status.Description"] ,''),
coalesce(tmp2.record["USA.PostalBarcode"] ,''),
coalesce(tmp2.record["USA.BCCheckDigit"] ,''),
coalesce(tmp2.record["USA.AdvancedBarcode"] ,''),
coalesce(tmp2.record["USA.FiveDigitBarcode"] ,''),
coalesce(tmp2.record["USA.ZIPValid"] ,''),
coalesce(tmp2.record["USA.ZIP4Valid"] ,''),
coalesce(tmp2.record["USA.AddressLocation"] ,''),
coalesce(tmp2.record["USA.CongressionalDistrict"] ,''),
coalesce(tmp2.record["USA.FIPSCountyNumber"] ,''),
coalesce(tmp2.record["USA.AltStreetType"] ,''),
coalesce(tmp2.record["USA.AltStreet"] ,''),
coalesce(tmp2.record["USA.Parsed.Range"] ,''),
coalesce(tmp2.record["USA.Parsed.PreDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.StreetName"] ,''),
coalesce(tmp2.record["USA.Parsed.PostDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.StreetSuffix"] ,''),
coalesce(tmp2.record["USA.Parsed.UnitDesignator"] ,''),
coalesce(tmp2.record["USA.Parsed.UnitNumber"] ,''),
coalesce(tmp2.record["USA.Parsed.Unit2Designator"] ,''),
coalesce(tmp2.record["USA.Parsed.Unit2Number"] ,''),
coalesce(tmp2.record["USA.Parsed.PMUnitDesignator"] ,''),
coalesce(tmp2.record["USA.Parsed.PMUnitNumber"] ,''),
coalesce(tmp2.record["USA.Parsed.AltStreet"] ,''),
coalesce(tmp2.record["USA.Parsed.AltRange"] ,''),
coalesce(tmp2.record["USA.Parsed.AltPreDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.AltStreetName"] ,''),
coalesce(tmp2.record["USA.Parsed.cAltPostDirectional"] ,''),
coalesce(tmp2.record["USA.Parsed.cAltStreetSuffix"], '') FROM 
(SELECT  addressvalidation(${hivevar:engineconf},${hivevar:inputoption},${hivevar:header},
AddressLine1,City,PostalCode,StateProvince,Country) as mygp from usaddress ) 
as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record;