Sample Hive Scripts

Reference data placed on local node

-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;

ADD FILE <Directory path>/libAddressDoctor5.so;

-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDF';

-- set Engine configuration
set hivevar:engineconf='[{"referenceData":{"dataDir":"/home/hduser/ReferenceData/AddressQuality/
Global","referenceDataPathLocation":"LocaltoDataNodes"},"databaseType":"BATCH_INTERACTIVE","preloadingType":"NONE",
"allCountries":true,"supportedCountries":"CAN,USA,AUS"}]';

-- set input configuration
set hivevar:inputconf='{"resultStateProvinceType":"COUNTRY_STANDARD","processMatchingScope":
"ALL","inputForceCountryISO3":null,"inputDefaultCountryISO3":"USA","inputFormatDelimiter":"CRLF",
"resultFormatDelimiter":"CRLF","resultIncludeInputs":false,"resultCountryType":"NAME_EN",
"processOptimizationLevel":"STANDARD","resultPreferredLanguage":"DATABASE","processMode":"BATCH",
"resultPreferredScript":"DATABASE","resultMaximumResults":1,"resultCasing":"NATIVE","properties":
{"Result.MaximumResults":"1","Database.AddressGlobal":"Database","Input.FormatDelimiter":"CRLF",
"Process.Mode":"BATCH","Input.ForceCountryISO3":"","Result.CountryType":"NAME_EN",
"Process.OptimizationLevel":"STANDARD","Result.IncludeInputs":"false","Input.DefaultCountryISO3"
:"USA","Process.EnrichmentAMAS":"false","Result.PreferredScript":"DATABASE","Process.MatchingScope"
:"ALL","Result.Casing":"NATIVE","Result.PreferredLanguage":"DATABASE","Result.StateProvinceType"
:"COUNTRY_STANDARD","Result.FormatDelimiter":"CRLF"}}';

-- set general configuration
set hivevar:generalconf='{"cacheSize":"LARGE","maxThreadCount":8,
"maxAddressObjectCount":8,"rangesToExpand":"NONE","flexibleRangeExpansion":"ON",
"enableTransactionLogging":false,"maxMemoryUsageMB":1024,"verbose":false}';

-- set unlock codec
set hivevar:unlockCode='';

-- set header
set hivevar:header='recordid,AddressLine1,City,StateProvince,PostalCode,Country';

-- Execute Query on the desired table, to display the job output on console.
 This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["HouseNumber"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["StreetName"],tmp2.record["PostalCode"],
tmp2.record["ElementInputStatus"],tmp2.record["MailabilityScore"] FROM 
( SELECT  globalvalidation(${hivevar:engineconf},${hivevar:generalconf},${hivevar:inputconf},${hivevar:unlockCode},
${hivevar:header},recordid,addressline1,city,stateprovince,postalcode,country)
 as mygp from address) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;


-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/' 
row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["HouseNumber"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["StreetName"],tmp2.record["PostalCode"],
tmp2.record["ElementInputStatus"],tmp2.record["MailabilityScore"] FROM 
( SELECT  globalvalidation(${hivevar:engineconf},${hivevar:generalconf},
${hivevar:inputconf},${hivevar:unlockCode},${hivevar:header},recordid,addressline1,
city,stateprovince,postalcode,country) as mygp from address) as addressgroup
 LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;

+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid  |     address.addressline1     |  address.city   | address.stateprovince  | address.postalcode  | address.country  |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1                 | 18 Merivale St               | South Brisbane  | QLD                    | 4101                | AUS              |
| 2                 | 19 Serpentine Rd             | Albany          | WA                     | 6330                | AUS              |
| 3                 | 317 VICTORIA ST GR           | BRUNSWICK       | VIC                    | 3056                | AUS              |
| 4                 | DUPLEX 6/16-18 O'CONNELL ST  | AINSLIE         | ACT                    | 2602                | AUS              |
| 5                 | LOT 154 470 BRYGON CREEK DR  | UPPER COOMERA   | QLD                    | 4209                | AUS              |
| 6                 | 16 GREENE ST                 | WARRAWONG       | ACT                    | 2502                | AUS              |
| 7                 | UNIT 47/16 BLAIRMOUNT ST     | PARKINSON       | QLD                    | 4115                | AUS              |
| 8                 | 13-15 FRANCESCO CRES         | BELLA VISTA     | NSW                    | 2153                | AUS              |
| 9                 | 4 RYANS LANE                 | HEATHCOTE       | VIC                    | 3523                | AUS              |
| 10                | 1 CHRISTMAS LN               | NORTH POLE      | VIC                    | 1111                | AUS              |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+



+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName     |HouseNumber |     AddressLine1             | AddressType   |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00  	| MERIVALE      | 18 		 | 18 MERIVALE ST               | S    			|
| 99.42   	| SERPENTINE    | 19     	 | 19 SERPENTINE RD E           | S    			|
| 97.95   	| VICTORIA      | 317    	 | 317 VICTORIA ST              | S    			|
| 100.00  	| O'CONNELL     | 16-18  	 | DUP 6 16-18 O'CONNELL ST     | S    			|
| 0.00    	| BRYGON CREEK  | 470    	 | LOT 154 470 BRYGON CREEK DR  | U    			|
| 76.99   	| GREENE        | 16      	 | 16 GREENE ST                 | S    			|
| 100.00  	| BLAIRMOUNT    | 16     	 | U 47 16 BLAIRMOUNT ST        | S    			|
| 100.00  	| FRANCESCO     | 13-15  	 | 13-15 FRANCESCO CRES         | S    			|
| 100.00  	| RYANS         | 4      	 | 4 RYANS LANE                 | S    			|
| 0.00    	| CHRISTMAS     | 1      	 | 1 CHRISTMAS LN               | U    			|
+-----------+---------------+------------+------------------------------+---------------+

Reference data placed on HDFS and downloaded on local nodes for jobs

-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;
        
ADD FILE <Directory path>/libAddressDoctor5.so;
        
-- Provide alias to UDAF class (optional). String in quotes represent
class names needed for this job to run.         
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDF';
        
-- set Engine configuration
set hivevar:engineconf='[{"referenceData":{"referenceDataPathLocation"
:"HDFS","dataDir":"/user/hadoop/ReferenceData/Global","dataDownloader":{"dataDownloader":
"HDFS","localFSRepository":"/opt/PitneyBowes/UAM_Global"}},"databaseType":"BATCH_INTERACTIVE",
"preloadingType":"PARTIAL","allCountries":true,"supportedCountries":"ALL"}]';
        
        
-- set input configuration
set hivevar:inputconf='{"resultStateProvinceType":"COUNTRY_STANDARD",
"processMatchingScope":"ALL","inputForceCountryISO3":null,"inputDefaultCountryISO3":"USA",
"inputFormatDelimiter":"CRLF","resultFormatDelimiter":"CRLF","resultIncludeInputs":false,
"resultCountryType":"NAME_EN","processOptimizationLevel":"STANDARD","resultPreferredLanguage":
"DATABASE","processMode":"BATCH","resultPreferredScript":"DATABASE","resultMaximumResults":1,
"resultCasing":"NATIVE","properties":{"Result.MaximumResults":"1","Database.AddressGlobal":"Database",
"Input.FormatDelimiter":"CRLF","Process.Mode":"BATCH","Input.ForceCountryISO3":"","Result.CountryType":
"NAME_EN","Process.OptimizationLevel":"STANDARD","Result.IncludeInputs":"false",
"Input.DefaultCountryISO3":"USA","Process.EnrichmentAMAS":"false","Result.PreferredScript":
"DATABASE","Process.MatchingScope":"ALL","Result.Casing":"NATIVE","Result.PreferredLanguage"
:"DATABASE","Result.StateProvinceType":"COUNTRY_STANDARD","Result.FormatDelimiter":"CRLF"}}';
        
-- set general configuration
set hivevar:generalconf='{"cacheSize":"LARGE","maxThreadCount":8,"maxAddressObjectCount":8,
"rangesToExpand":"NONE","flexibleRangeExpansion":"ON","enableTransactionLogging":false,
"maxMemoryUsageMB":1024,"verbose":false}';
        
-- set unlock codec
set hivevar:unlockCode='';
        
-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,
City,PostalCode,StateProvince,FirmName,Country';
        
-- Execute Query on the desired table, to display the job output on console. 
This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Country"],tmp2.record["Confidence"],tmp2.record["MailabilityScore"],
tmp2.record["HouseNumber"],tmp2.record["AddressLine1"],tmp2.record["StreetName"],
tmp2.record["PostalCode"],tmp2.record["ElementInputStatus"] FROM 
(SELECT  globalvalidation(${hivevar:engineconf},${hivevar:generalconf},
${hivevar:inputconf},${hivevar:unlockCode},${hivevar:header},inputkeyvalue,
addressline1,addressline2,city,postalcode,stateprovince,firmname,country)
 as mygp from address_global) as addressgroup LATERAL VIEW explode
(addressgroup.mygp) tmp2 as record ;

Reference data placed on HDFS and downloaded to working directory for jobs

-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;

ADD FILE <Directory path>/libAddressDoctor5.so;

-- Provide alias to UDAF class (optional). String in quotes represent 
class names needed for this job to run.
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDF';

-- Add Reference Data zipped Files
ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/AD/AD2032017_590.zip;
ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/AD/AD3032017_590.zip;

-- set Engine configuration
set hivevar:engineconf='[{"referenceData":{"dataDir":"/user/hduser/ReferenceData/AddressQuality/AD/AD2032017_590.zip",
"referenceDataPathLocation":"HDFS"},"databaseType":"BATCH_INTERACTIVE",
"preloadingType":"NONE","allCountries":false,"supportedCountries":"ALL"},
{"referenceData":{"dataDir":"/user/hduser/ReferenceData/AddressQuality/AD/AD3032017_590.zip",
"referenceDataPathLocation":"HDFS"},"databaseType":"BATCH_INTERACTIVE",
"preloadingType":"NONE","allCountries":false,"supportedCountries":"ALL"}]';


-- set input configuration
set hivevar:inputconf='{"resultStateProvinceType":"COUNTRY_STANDARD",
"processMatchingScope":"ALL","inputForceCountryISO3":null,"inputDefaultCountryISO3":
"USA","inputFormatDelimiter":"CRLF","resultFormatDelimiter":"CRLF","resultIncludeInputs":false,
"resultCountryType":"NAME_EN","processOptimizationLevel":"STANDARD","resultPreferredLanguage"
:"DATABASE","processMode":"BATCH","resultPreferredScript":"DATABASE","resultMaximumResults":1,
"resultCasing":"NATIVE","properties":{"Result.MaximumResults":"1","Database.AddressGlobal":
"Database","Input.FormatDelimiter":"CRLF","Process.Mode":"BATCH","Input.ForceCountryISO3":
"","Result.CountryType":"NAME_EN","Process.OptimizationLevel":"STANDARD","Result.IncludeInputs"
:"false","Input.DefaultCountryISO3":"USA","Process.EnrichmentAMAS":"false",
"Result.PreferredScript":"DATABASE","Process.MatchingScope":"ALL","Result.Casing":"NATIVE",
"Result.PreferredLanguage":"DATABASE","Result.StateProvinceType":"COUNTRY_STANDARD",
"Result.FormatDelimiter":"CRLF"}}';

-- set general configuration
set hivevar:generalconf='{"cacheSize":"LARGE","maxThreadCount":8,"maxAddressObjectCount":8,
"rangesToExpand":"NONE","flexibleRangeExpansion":"ON","enableTransactionLogging":false,
"maxMemoryUsageMB":1024,"verbose":false}';
-- set unlock codec
set hivevar:unlockCode='';

-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,City,PostalCode,StateProvince,FirmName,Country';

-- Execute Query on the desired table, to display the job output on console.
 This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Country"],tmp2.record["Confidence"],tmp2.record["MailabilityScore"],
tmp2.record["HouseNumber"],tmp2.record["AddressLine1"],tmp2.record["StreetName"],tmp2.record
["PostalCode"],tmp2.record["ElementInputStatus"]
FROM (SELECT  globalvalidation(${hivevar:engineconf},${hivevar:generalconf},
${hivevar:inputconf},${hivevar:unlockCode},${hivevar:header},inputkeyvalue,
addressline1,addressline2,city,postalcode,stateprovince,firmname,country) 
as mygp from address_global) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;