ローカル ノードに配置されたリファレンス データ
-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;
ADD FILE <Directory path>/libAddressDoctor5.so;
-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDF';
-- set Engine configuration
set hivevar:engineconf='[{"referenceData":{"dataDir":"/home/hduser/ReferenceData/AddressQuality/
Global","referenceDataPathLocation":"LocaltoDataNodes"},"databaseType":"BATCH_INTERACTIVE","preloadingType":"NONE",
"allCountries":true,"supportedCountries":"CAN,USA,AUS"}]';
-- set input configuration
set hivevar:inputconf='{"resultStateProvinceType":"COUNTRY_STANDARD","processMatchingScope":
"ALL","inputForceCountryISO3":null,"inputDefaultCountryISO3":"USA","inputFormatDelimiter":"CRLF",
"resultFormatDelimiter":"CRLF","resultIncludeInputs":false,"resultCountryType":"NAME_EN",
"processOptimizationLevel":"STANDARD","resultPreferredLanguage":"DATABASE","processMode":"BATCH",
"resultPreferredScript":"DATABASE","resultMaximumResults":1,"resultCasing":"NATIVE","properties":
{"Result.MaximumResults":"1","Database.AddressGlobal":"Database","Input.FormatDelimiter":"CRLF",
"Process.Mode":"BATCH","Input.ForceCountryISO3":"","Result.CountryType":"NAME_EN",
"Process.OptimizationLevel":"STANDARD","Result.IncludeInputs":"false","Input.DefaultCountryISO3"
:"USA","Process.EnrichmentAMAS":"false","Result.PreferredScript":"DATABASE","Process.MatchingScope"
:"ALL","Result.Casing":"NATIVE","Result.PreferredLanguage":"DATABASE","Result.StateProvinceType"
:"COUNTRY_STANDARD","Result.FormatDelimiter":"CRLF"}}';
-- set general configuration
set hivevar:generalconf='{"cacheSize":"LARGE","maxThreadCount":8,
"maxAddressObjectCount":8,"rangesToExpand":"NONE","flexibleRangeExpansion":"ON",
"enableTransactionLogging":false,"maxMemoryUsageMB":1024,"verbose":false}';
-- set unlock codec
set hivevar:unlockCode='';
-- set header
set hivevar:header='recordid,AddressLine1,City,StateProvince,PostalCode,Country';
-- Execute Query on the desired table, to display the job output on console.
This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["HouseNumber"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["StreetName"],tmp2.record["PostalCode"],
tmp2.record["ElementInputStatus"],tmp2.record["MailabilityScore"] FROM
( SELECT globalvalidation(${hivevar:engineconf},${hivevar:generalconf},${hivevar:inputconf},${hivevar:unlockCode},
${hivevar:header},recordid,addressline1,city,stateprovince,postalcode,country)
as mygp from address) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/'
row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["HouseNumber"],tmp2.record["Confidence"],
tmp2.record["AddressLine1"],tmp2.record["StreetName"],tmp2.record["PostalCode"],
tmp2.record["ElementInputStatus"],tmp2.record["MailabilityScore"] FROM
( SELECT globalvalidation(${hivevar:engineconf},${hivevar:generalconf},
${hivevar:inputconf},${hivevar:unlockCode},${hivevar:header},recordid,addressline1,
city,stateprovince,postalcode,country) as mygp from address) as addressgroup
LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid | address.addressline1 | address.city | address.stateprovince | address.postalcode | address.country |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1 | 18 Merivale St | South Brisbane | QLD | 4101 | AUS |
| 2 | 19 Serpentine Rd | Albany | WA | 6330 | AUS |
| 3 | 317 VICTORIA ST GR | BRUNSWICK | VIC | 3056 | AUS |
| 4 | DUPLEX 6/16-18 O'CONNELL ST | AINSLIE | ACT | 2602 | AUS |
| 5 | LOT 154 470 BRYGON CREEK DR | UPPER COOMERA | QLD | 4209 | AUS |
| 6 | 16 GREENE ST | WARRAWONG | ACT | 2502 | AUS |
| 7 | UNIT 47/16 BLAIRMOUNT ST | PARKINSON | QLD | 4115 | AUS |
| 8 | 13-15 FRANCESCO CRES | BELLA VISTA | NSW | 2153 | AUS |
| 9 | 4 RYANS LANE | HEATHCOTE | VIC | 3523 | AUS |
| 10 | 1 CHRISTMAS LN | NORTH POLE | VIC | 1111 | AUS |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName |HouseNumber | AddressLine1 | AddressType |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00 | MERIVALE | 18 | 18 MERIVALE ST | S |
| 99.42 | SERPENTINE | 19 | 19 SERPENTINE RD E | S |
| 97.95 | VICTORIA | 317 | 317 VICTORIA ST | S |
| 100.00 | O'CONNELL | 16-18 | DUP 6 16-18 O'CONNELL ST | S |
| 0.00 | BRYGON CREEK | 470 | LOT 154 470 BRYGON CREEK DR | U |
| 76.99 | GREENE | 16 | 16 GREENE ST | S |
| 100.00 | BLAIRMOUNT | 16 | U 47 16 BLAIRMOUNT ST | S |
| 100.00 | FRANCESCO | 13-15 | 13-15 FRANCESCO CRES | S |
| 100.00 | RYANS | 4 | 4 RYANS LANE | S |
| 0.00 | CHRISTMAS | 1 | 1 CHRISTMAS LN | U |
+-----------+---------------+------------+------------------------------+---------------+
HDFS に配置され、ジョブ用のローカル ノードにダウンロードされたリファレンス データ
-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;
ADD FILE <Directory path>/libAddressDoctor5.so;
-- Provide alias to UDAF class (optional). String in quotes represent
class names needed for this job to run.
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDF';
-- set Engine configuration
set hivevar:engineconf='[{"referenceData":{"referenceDataPathLocation"
:"HDFS","dataDir":"/user/hadoop/ReferenceData/Global","dataDownloader":{"dataDownloader":
"HDFS","localFSRepository":"/opt/PitneyBowes/UAM_Global"}},"databaseType":"BATCH_INTERACTIVE",
"preloadingType":"PARTIAL","allCountries":true,"supportedCountries":"ALL"}]';
-- set input configuration
set hivevar:inputconf='{"resultStateProvinceType":"COUNTRY_STANDARD",
"processMatchingScope":"ALL","inputForceCountryISO3":null,"inputDefaultCountryISO3":"USA",
"inputFormatDelimiter":"CRLF","resultFormatDelimiter":"CRLF","resultIncludeInputs":false,
"resultCountryType":"NAME_EN","processOptimizationLevel":"STANDARD","resultPreferredLanguage":
"DATABASE","processMode":"BATCH","resultPreferredScript":"DATABASE","resultMaximumResults":1,
"resultCasing":"NATIVE","properties":{"Result.MaximumResults":"1","Database.AddressGlobal":"Database",
"Input.FormatDelimiter":"CRLF","Process.Mode":"BATCH","Input.ForceCountryISO3":"","Result.CountryType":
"NAME_EN","Process.OptimizationLevel":"STANDARD","Result.IncludeInputs":"false",
"Input.DefaultCountryISO3":"USA","Process.EnrichmentAMAS":"false","Result.PreferredScript":
"DATABASE","Process.MatchingScope":"ALL","Result.Casing":"NATIVE","Result.PreferredLanguage"
:"DATABASE","Result.StateProvinceType":"COUNTRY_STANDARD","Result.FormatDelimiter":"CRLF"}}';
-- set general configuration
set hivevar:generalconf='{"cacheSize":"LARGE","maxThreadCount":8,"maxAddressObjectCount":8,
"rangesToExpand":"NONE","flexibleRangeExpansion":"ON","enableTransactionLogging":false,
"maxMemoryUsageMB":1024,"verbose":false}';
-- set unlock codec
set hivevar:unlockCode='';
-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,
City,PostalCode,StateProvince,FirmName,Country';
-- Execute Query on the desired table, to display the job output on console.
This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Country"],tmp2.record["Confidence"],tmp2.record["MailabilityScore"],
tmp2.record["HouseNumber"],tmp2.record["AddressLine1"],tmp2.record["StreetName"],
tmp2.record["PostalCode"],tmp2.record["ElementInputStatus"] FROM
(SELECT globalvalidation(${hivevar:engineconf},${hivevar:generalconf},
${hivevar:inputconf},${hivevar:unlockCode},${hivevar:header},inputkeyvalue,
addressline1,addressline2,city,postalcode,stateprovince,firmname,country)
as mygp from address_global) as addressgroup LATERAL VIEW explode
(addressgroup.mygp) tmp2 as record ;
HDFS に配置され、ジョブ用の作業ディレクトリにダウンロードされたリファレンス データ
-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;
ADD FILE <Directory path>/libAddressDoctor5.so;
-- Provide alias to UDAF class (optional). String in quotes represent
class names needed for this job to run.
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDF';
-- Add Reference Data zipped Files
ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/AD/AD2032017_590.zip;
ADD ARCHIVE hdfs:///user/hduser/ReferenceData/AddressQuality/AD/AD3032017_590.zip;
-- set Engine configuration
set hivevar:engineconf='[{"referenceData":{"dataDir":"/user/hduser/ReferenceData/AddressQuality/AD/AD2032017_590.zip",
"referenceDataPathLocation":"HDFS"},"databaseType":"BATCH_INTERACTIVE",
"preloadingType":"NONE","allCountries":false,"supportedCountries":"ALL"},
{"referenceData":{"dataDir":"/user/hduser/ReferenceData/AddressQuality/AD/AD3032017_590.zip",
"referenceDataPathLocation":"HDFS"},"databaseType":"BATCH_INTERACTIVE",
"preloadingType":"NONE","allCountries":false,"supportedCountries":"ALL"}]';
-- set input configuration
set hivevar:inputconf='{"resultStateProvinceType":"COUNTRY_STANDARD",
"processMatchingScope":"ALL","inputForceCountryISO3":null,"inputDefaultCountryISO3":
"USA","inputFormatDelimiter":"CRLF","resultFormatDelimiter":"CRLF","resultIncludeInputs":false,
"resultCountryType":"NAME_EN","processOptimizationLevel":"STANDARD","resultPreferredLanguage"
:"DATABASE","processMode":"BATCH","resultPreferredScript":"DATABASE","resultMaximumResults":1,
"resultCasing":"NATIVE","properties":{"Result.MaximumResults":"1","Database.AddressGlobal":
"Database","Input.FormatDelimiter":"CRLF","Process.Mode":"BATCH","Input.ForceCountryISO3":
"","Result.CountryType":"NAME_EN","Process.OptimizationLevel":"STANDARD","Result.IncludeInputs"
:"false","Input.DefaultCountryISO3":"USA","Process.EnrichmentAMAS":"false",
"Result.PreferredScript":"DATABASE","Process.MatchingScope":"ALL","Result.Casing":"NATIVE",
"Result.PreferredLanguage":"DATABASE","Result.StateProvinceType":"COUNTRY_STANDARD",
"Result.FormatDelimiter":"CRLF"}}';
-- set general configuration
set hivevar:generalconf='{"cacheSize":"LARGE","maxThreadCount":8,"maxAddressObjectCount":8,
"rangesToExpand":"NONE","flexibleRangeExpansion":"ON","enableTransactionLogging":false,
"maxMemoryUsageMB":1024,"verbose":false}';
-- set unlock codec
set hivevar:unlockCode='';
-- set header
set hivevar:header='InputKeyValue,AddressLine1,AddressLine2,City,PostalCode,StateProvince,FirmName,Country';
-- Execute Query on the desired table, to display the job output on console.
This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Country"],tmp2.record["Confidence"],tmp2.record["MailabilityScore"],
tmp2.record["HouseNumber"],tmp2.record["AddressLine1"],tmp2.record["StreetName"],tmp2.record
["PostalCode"],tmp2.record["ElementInputStatus"]
FROM (SELECT globalvalidation(${hivevar:engineconf},${hivevar:generalconf},
${hivevar:inputconf},${hivevar:unlockCode},${hivevar:header},inputkeyvalue,
addressline1,addressline2,city,postalcode,stateprovince,firmname,country)
as mygp from address_global) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;