Sample Hive Scripts

Reference data placed on local node

-- Register Global Addressing Module [Global Address Validation] BDQ Hive UDF Jar 
ADD JAR <directory path>/gam-globaladdressvalidation-hive-${project.version}.jar; 

-- Provide alias to UDF class (optional). String in quotes
 represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION addressvalidation as 
'com.pb.bdq.gam.process.hive.addressvalidation.AddressValidationUDF';

-- set Engine configuration
set hivevar:engineconf='{"productDatabaseInfoList":
[{"dbPath":"<path to extracted spd>",
"countryCode":["FRA"],"processType":"VALIDATE"}],
"referenceDataPathLocation":"LocaltoDataNodes"}';

-- set input configuration
set hivevar:inputoption='{"casing":"Upper","matchMode":"Relaxed",
"defaultCountry":"FRA","maximumResults":1,"returnInputAddress":true,
"returnParsedAddress":true,"returnPrecisionCode":true,
"returnCountrySpecificFields":true,"mustMatchAddressNumber":false,
"mustMatchStreet":false,"mustMatchCity":false,"mustMatchLocality":false,
"mustMatchState":false,"mustMatchStateProvince":false,
"mustMatchPostCode":false,"keepMultiMatch":false,"preferPostalOverCity":false,
"cityFallback":false,"postalFallback":false,"validationLevel":"ADDRESS"}';

-- set header
set hivevar:header='RecordID,AddressLine1,City,PostalCode,Country';

-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '<local directory path>' row format 
delimited FIELDS TERMINATED BY '|' lines terminated by '\n' STORED AS TEXTFILE
SELECT 
coalesce(tmp2.record["AdditionalInputData"] ,''),
coalesce(tmp2.record["AddressBlock1"] ,''),
coalesce(tmp2.record["AddressBlock2"] ,''),
coalesce(tmp2.record["AddressLine1"] ,''),
coalesce(tmp2.record["AddressLine1.Input"] ,''),
coalesce(tmp2.record["ApartmentLabel"] ,''),
coalesce(tmp2.record["ApartmentNumber"] ,''),
coalesce(tmp2.record["AUS.Address.Class"] ,''),
coalesce(tmp2.record["AUS.Level.Number"] ,''),
coalesce(tmp2.record["AUS.Parcel.ID"] ,''),
coalesce(tmp2.record["AUS.PID"] ,''),
coalesce(tmp2.record["AUS.Principal.Pid"] ,''),
coalesce(tmp2.record["AUS.SA1"] ,''),
coalesce(tmp2.record["Building"] ,''),
coalesce(tmp2.record["CAN.Census.CD"] ,''),
coalesce(tmp2.record["CAN.Census.CMA"] ,''),
coalesce(tmp2.record["CAN.Census.CSD"] ,''),
coalesce(tmp2.record["CAN.Census.CT"] ,''),
coalesce(tmp2.record["CAN.Census.DA"] ,''),
coalesce(tmp2.record["CAN.FormattedStreet.Range"] ,''),
coalesce(tmp2.record["City"] ,''),
coalesce(tmp2.record["City.Input"] ,''),
coalesce(tmp2.record["City.Matched"] ,''),
coalesce(tmp2.record["CitySubdivision"] ,''),
coalesce(tmp2.record["CitySubdivision.Input"] ,''),
coalesce(tmp2.record["CitySubdivision.Matched"] ,''),
coalesce(tmp2.record["Confidence"] ,''),
coalesce(tmp2.record["CouldNotValidate"] ,''),
coalesce(tmp2.record["Country"] ,''),
coalesce(tmp2.record["Country.Input"] ,''),
coalesce(tmp2.record["County"] ,''),
coalesce(tmp2.record["County.Matched"] ,''),
coalesce(tmp2.record["FirmName"] ,''),
coalesce(tmp2.record["FirmName.Input"] ,''),
coalesce(tmp2.record["Firmname.Matched"] ,''),
coalesce(tmp2.record["GBR.Aliased.Locality"] ,''),
coalesce(tmp2.record["GBR.Dependent.Locality"] ,''),
coalesce(tmp2.record["GBR.DependentStreet.Name"] ,''),
coalesce(tmp2.record["GBR.DoubleDependent.Locality"] ,''),
coalesce(tmp2.record["GBR.Historic.Postcode"] ,''),
coalesce(tmp2.record["GBR.OSAPR"] ,''),
coalesce(tmp2.record["GBR.RPC"] ,''),
coalesce(tmp2.record["GBR.UPRN"] ,''),
coalesce(tmp2.record["HouseNumber"] ,''),
coalesce(tmp2.record["Housenumber.Matched"] ,''),
coalesce(tmp2.record["IRL.Eircode"] ,''),
coalesce(tmp2.record["ITA.Historical.Postcode"] ,''),
coalesce(tmp2.record["LeadingDirectional"] ,''),
coalesce(tmp2.record["MatchOnAllStreetFields"] ,''),
coalesce(tmp2.record["MatchOnStreetDirectional"] ,''),
coalesce(tmp2.record["MultimatchCount"] ,''),
coalesce(tmp2.record["NZL.Aliased.SUBURB"] ,''),
coalesce(tmp2.record["ParsedAddressLine1.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentLable.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentNumber.Input"] ,''),
coalesce(tmp2.record["ParsedCity.Input"] ,''),
coalesce(tmp2.record["ParsedCitySubDivision.Input"] ,''),
coalesce(tmp2.record["ParsedCountry.Input"] ,''),
coalesce(tmp2.record["ParsedHouseNumber.Input"] ,''),
coalesce(tmp2.record["ParsedPlaceName.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeAddOn.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeBase.Input"] ,''),
coalesce(tmp2.record["ParsedPostStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedPreStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvince.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["PlaceName"] ,''),
coalesce(tmp2.record["POBox"] ,''),
coalesce(tmp2.record["PostalCode"] ,''),
coalesce(tmp2.record["PostalCode.AddOn"] ,''),
coalesce(tmp2.record["PostalCode.Input"] ,''),
coalesce(tmp2.record["Postalcode.Matched"] ,''),
coalesce(tmp2.record["PrecisionCode"] ,''),
coalesce(tmp2.record["Principality"] ,''),
coalesce(tmp2.record["ProcessedBy"] ,''),
coalesce(tmp2.record["RecordID"] ,''),
coalesce(tmp2.record["StateProvince"] ,''),
coalesce(tmp2.record["StateProvince.Input"] ,''),
coalesce(tmp2.record["StateProvince.Matched"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Matched"] ,''),
coalesce(tmp2.record["Status"] ,''),
coalesce(tmp2.record["Status.Code"] ,''),
coalesce(tmp2.record["Status.Description"] ,''),
coalesce(tmp2.record["StreetName"] ,''),
coalesce(tmp2.record["StreetName.Matched"] ,''),
coalesce(tmp2.record["StreetType"] ,''),
coalesce(tmp2.record["StreetType.Matched"] ,''),
coalesce(tmp2.record["Subcity"] ,''),
coalesce(tmp2.record["TrailingDirectional"] ,''),
coalesce(tmp2.record["VendorCode"] ,'')
 FROM (SELECT  addressvalidation
(${hivevar:engineconf},${hivevar:inputoption},
${hivevar:header},RecordID,AddressLine1,City,PostalCode,Country) 
as mygp from gavtable )as addressgroup
 LATERAL VIEW explode(addressgroup.mygp) tmp2 as record;

Reference data placed on HDFS and downloaded on local nodes for jobs

-- Register Global Addressing Module [Global Address Validation] BDQ Hive UDF Jar 
ADD JAR <directory path>/gam-globaladdressvalidation-hive-${project.version}.jar;

-- Provide alias to UDF class (optional). 
String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION addressvalidation as 
'com.pb.bdq.gam.process.hive.addressvalidation.AddressValidationUDF';

-- set Engine configuration
set hivevar:engineconf='{"productDatabaseInfoList":[{"referenceDataPath":
{"referenceDataPathLocation":"HDFS",
"dataDir":"/user/hadoop/RefrenceData/AddressValidation",
"dataDownloader":{"dataDownloader":"HDFS","localFSRepository":
"/opt/PitneyBowes/ReferenceData/AddressValidation"}},
"countryCode":["GBR"],"processType":"VALIDATE"}]}';

-- set input configuration
set hivevar:inputoption='{"casing":"Upper","matchMode":
"Relaxed","defaultCountry":"GBR","maximumResults":1,"returnInputAddress"
:true,"returnParsedAddress":true,"returnPrecisionCode":true,"returnCountrySpecificFields":true,
"mustMatchAddressNumber":false,"mustMatchStreet":false,"mustMatchCity":false,
"mustMatchLocality":false,"mustMatchState":false,
"mustMatchStateProvince":false,"mustMatchPostCode":false,"keepMultiMatch":false,
"preferPostalOverCity":false,"cityFallback":false,
"postalFallback":false,"validationLevel":"ADDRESS"}';

-- set header
set hivevar:header='RecordID,AddressLine1,City,PostalCode,Country';

-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '<local directory path>' row format 
delimited FIELDS TERMINATED BY '|' lines terminated by '\n' STORED AS TEXTFILE
SELECT 
coalesce(tmp2.record["AdditionalInputData"] ,''),
coalesce(tmp2.record["AddressBlock1"] ,''),
coalesce(tmp2.record["AddressBlock2"] ,''),
coalesce(tmp2.record["AddressLine1"] ,''),
coalesce(tmp2.record["AddressLine1.Input"] ,''),
coalesce(tmp2.record["ApartmentLabel"] ,''),
coalesce(tmp2.record["ApartmentNumber"] ,''),
coalesce(tmp2.record["AUS.Address.Class"] ,''),
coalesce(tmp2.record["AUS.Level.Number"] ,''),
coalesce(tmp2.record["AUS.Parcel.ID"] ,''),
coalesce(tmp2.record["AUS.PID"] ,''),
coalesce(tmp2.record["AUS.Principal.Pid"] ,''),
coalesce(tmp2.record["AUS.SA1"] ,''),
coalesce(tmp2.record["Building"] ,''),
coalesce(tmp2.record["CAN.Census.CD"] ,''),
coalesce(tmp2.record["CAN.Census.CMA"] ,''),
coalesce(tmp2.record["CAN.Census.CSD"] ,''),
coalesce(tmp2.record["CAN.Census.CT"] ,''),
coalesce(tmp2.record["CAN.Census.DA"] ,''),
coalesce(tmp2.record["CAN.FormattedStreet.Range"] ,''),
coalesce(tmp2.record["City"] ,''),
coalesce(tmp2.record["City.Input"] ,''),
coalesce(tmp2.record["City.Matched"] ,''),
coalesce(tmp2.record["CitySubdivision"] ,''),
coalesce(tmp2.record["CitySubdivision.Input"] ,''),
coalesce(tmp2.record["CitySubdivision.Matched"] ,''),
coalesce(tmp2.record["Confidence"] ,''),
coalesce(tmp2.record["CouldNotValidate"] ,''),
coalesce(tmp2.record["Country"] ,''),
coalesce(tmp2.record["Country.Input"] ,''),
coalesce(tmp2.record["County"] ,''),
coalesce(tmp2.record["County.Matched"] ,''),
coalesce(tmp2.record["FirmName"] ,''),
coalesce(tmp2.record["FirmName.Input"] ,''),
coalesce(tmp2.record["Firmname.Matched"] ,''),
coalesce(tmp2.record["GBR.Aliased.Locality"] ,''),
coalesce(tmp2.record["GBR.Dependent.Locality"] ,''),
coalesce(tmp2.record["GBR.DependentStreet.Name"] ,''),
coalesce(tmp2.record["GBR.DoubleDependent.Locality"] ,''),
coalesce(tmp2.record["GBR.Historic.Postcode"] ,''),
coalesce(tmp2.record["GBR.OSAPR"] ,''),
coalesce(tmp2.record["GBR.RPC"] ,''),
coalesce(tmp2.record["GBR.UPRN"] ,''),
coalesce(tmp2.record["HouseNumber"] ,''),
coalesce(tmp2.record["Housenumber.Matched"] ,''),
coalesce(tmp2.record["IRL.Eircode"] ,''),
coalesce(tmp2.record["ITA.Historical.Postcode"] ,''),
coalesce(tmp2.record["LeadingDirectional"] ,''),
coalesce(tmp2.record["MatchOnAllStreetFields"] ,''),
coalesce(tmp2.record["MatchOnStreetDirectional"] ,''),
coalesce(tmp2.record["MultimatchCount"] ,''),
coalesce(tmp2.record["NZL.Aliased.SUBURB"] ,''),
coalesce(tmp2.record["ParsedAddressLine1.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentLable.Input"] ,''),
coalesce(tmp2.record["ParsedApartmentNumber.Input"] ,''),
coalesce(tmp2.record["ParsedCity.Input"] ,''),
coalesce(tmp2.record["ParsedCitySubDivision.Input"] ,''),
coalesce(tmp2.record["ParsedCountry.Input"] ,''),
coalesce(tmp2.record["ParsedHouseNumber.Input"] ,''),
coalesce(tmp2.record["ParsedPlaceName.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeAddOn.Input"] ,''),
coalesce(tmp2.record["ParsedPostCodeBase.Input"] ,''),
coalesce(tmp2.record["ParsedPostStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedPreStreetType.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvince.Input"] ,''),
coalesce(tmp2.record["ParsedStateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["PlaceName"] ,''),
coalesce(tmp2.record["POBox"] ,''),
coalesce(tmp2.record["PostalCode"] ,''),
coalesce(tmp2.record["PostalCode.AddOn"] ,''),
coalesce(tmp2.record["PostalCode.Input"] ,''),
coalesce(tmp2.record["Postalcode.Matched"] ,''),
coalesce(tmp2.record["PrecisionCode"] ,''),
coalesce(tmp2.record["Principality"] ,''),
coalesce(tmp2.record["ProcessedBy"] ,''),
coalesce(tmp2.record["RecordID"] ,''),
coalesce(tmp2.record["StateProvince"] ,''),
coalesce(tmp2.record["StateProvince.Input"] ,''),
coalesce(tmp2.record["StateProvince.Matched"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Input"] ,''),
coalesce(tmp2.record["StateProvinceSubdivision.Matched"] ,''),
coalesce(tmp2.record["Status"] ,''),
coalesce(tmp2.record["Status.Code"] ,''),
coalesce(tmp2.record["Status.Description"] ,''),
coalesce(tmp2.record["StreetName"] ,''),
coalesce(tmp2.record["StreetName.Matched"] ,''),
coalesce(tmp2.record["StreetType"] ,''),
coalesce(tmp2.record["StreetType.Matched"] ,''),
coalesce(tmp2.record["Subcity"] ,''),
coalesce(tmp2.record["TrailingDirectional"] ,''),
coalesce(tmp2.record["VendorCode"] ,'') FROM 
(SELECT  addressvalidation(${hivevar:engineconf},
${hivevar:inputoption},${hivevar:header},
RecordID,AddressLine1,City,PostalCode,Country) as mygp from gavtable ) as
 addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record;

Reference data placed on HDFS and downloaded to working directory for jobs

-- Register Global Addressing Module [GAV] BDQ Hive UDF Jar 
ADD JAR <directory path>/gam-globaladdressvalidation-hive-${project.version}.jar;

-- Provide alias to UDF class (optional). String in quotes represent
 class names needed for this job to run.
CREATE TEMPORARY FUNCTION addressvalidation as
 'com.pb.bdq.gam.process.hive.addressvalidation.AddressValidationUDF';

-- set Engine configuration
set hivevar:engineconf='{"dataDirPath":{"dataDir":"/home/hduser/gav/GGB062017.zip",
"referenceDataPathLocation":"HDFS"}, "productDatabaseInfoList":
[{"dbPath":"/home/hduser/gav/GGB062017.zip","countryCode":
["GBR"],"processType":"VALIDATE"}]}';

-- set input configuration
set hivevar:inputoption='{"casing":"Mixed","matchMode":"Relaxed",
"defaultCountry":"GBR","maximumResults":2,"returnInputAddress":false,
"returnParsedAddress":false,"returnPrecisionCode":false,"mustMatchAddressNumber":false,
"mustMatchStreet":false,"mustMatchCity":false,"mustMatchLocality":false,
"mustMatchState":false,"mustMatchStateProvince":false,
"mustMatchPostCode":false,"keepMultiMatch":true,"preferPostalOverCity":false,
"cityFallback":true,"postalFallback":true,"validationLevel":"ADDRESS"}';


-- set header
set hivevar:header='inputkeyvalue,AddressLine1,AddressLine2,City,postalcode,
StateProvince,firmname,Country';

-- Execute Query on the desired table, to display the job output on console.
 This query returns a map of key value pairs containing output fields for each row.
SELECT * FROM (SELECT  addressvalidation(${hivevar:engineconf},${hivevar:inputoption},
${hivevar:header},inputkeyvalue,addressline1,AddressLine2,city,postalcode,
stateprovince,firmname,country)as mygp from address_validation) as 
addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record;