-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.global.hive.${project.version}.jar;
ADD FILE <Directory path>/libAddressDoctor5.so;
-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION globalvalidation as 'com.pb.bdq.uam.process.hive.global.GlobalAddressingUDAF';
set hive.map.aggr = false;
-- set engine configuration
set pb.bdq.uam.global.engine.configurations=[{ "referenceData": {"dataDir":"/media/New Volume/hduser/resources/uam/addressDoctor/5.8.0/", "referenceDataPathLocation":"LocaltoDataNodes"}, "databaseType":"BATCH_INTERACTIVE", "preloadingType":"NONE", "allCountries":false, "supportedCountries":"CAN,USA,AUS"}];
-- set input configuration
set pb.bdq.uam.global.input.configuration={"resultStateProvinceType":"COUNTRY_STANDARD", "processMatchingScope":"ALL", "processEnrichmentAMAS":false, "inputForceCountryISO3":"AUS", "inputDefaultCountryISO3":"AUS", "inputFormatDelimiter":"CRLF", "resultFormatDelimiter":"CRLF", "resultIncludeInputs":false, "resultCountryType":"NAME_EN", "processOptimizationLevel":"STANDARD", "resultPreferredLanguage":"DATABASE", "processMode":"BATCH", "resultPreferredScript":"DATABASE", "resultMaximumResults":1, "resultCasing":"NATIVE", "properties":{"Result.StateProvinceType":"COUNTRY_STANDARD", "Process.MatchingScope":"ALL", "Process.EnrichmentAMAS":"false", "Input.ForceCountryISO3":"AUS", "Input.FormatDelimiter":"CRLF", "Result.FormatDelimiter":"CRLF", "Input.DefaultCountryISO3":"AUS", "Result.IncludeInputs":"false", "Result.CountryType":"NAME_EN", "Process.OptimizationLevel":"STANDARD", "Result.PreferredLanguage":"DATABASE", "Process.Mode":"BATCH", "Result.PreferredScript":"DATABASE", "Result.MaximumResults":"1", "Result.Casing":"NATIVE", "Database.AddressGlobal":"Database"}};
-- set general configuration
set pb.bdq.uam.global.general.configuration={"cacheSize":"LARGE", "maxThreadCount":8, "maxAddressObjectCount":8, "rangesToExpand":"NONE", "flexibleRangeExpansion":"ON", "enableTransactionLogging":false, "maxMemoryUsageMB":1024};
-- set unlock codec
set pb.bdq.uam.global.unlockCode=<Insert your Unlock Code here>;
-- set header
set pb.bdq.header=recordid,AddressLine1,City,StateProvince,PostalCode,Country;
-- Execute Query on the desired table, to display the job output on console. This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["HouseNumber"], tmp2.record["Confidence"], tmp2.record["AddressLine1"], tmp2.record["StreetName"], tmp2.record["PostalCode"], tmp2.record["ElementInputStatus"], tmp2.record["MailabilityScore"] FROM ( SELECT globalvalidation(recordid, addressline1, city, stateprovince, postalcode, country) as mygp from address) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/' row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["HouseNumber"], tmp2.record["Confidence"], tmp2.record["AddressLine1"], tmp2.record["StreetName"], tmp2.record["PostalCode"], tmp2.record["ElementInputStatus"], tmp2.record["MailabilityScore"] FROM ( SELECT globalvalidation(recordid, addressline1, city, stateprovince, postalcode, country) as mygp from address) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid | address.addressline1 | address.city | address.stateprovince | address.postalcode | address.country |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1 | 18 Merivale St | South Brisbane | QLD | 4101 | AUS |
| 2 | 19 Serpentine Rd | Albany | WA | 6330 | AUS |
| 3 | 317 VICTORIA ST GR | BRUNSWICK | VIC | 3056 | AUS |
| 4 | DUPLEX 6/16-18 O'CONNELL ST | AINSLIE | ACT | 2602 | AUS |
| 5 | LOT 154 470 BRYGON CREEK DR | UPPER COOMERA | QLD | 4209 | AUS |
| 6 | 16 GREENE ST | WARRAWONG | ACT | 2502 | AUS |
| 7 | UNIT 47/16 BLAIRMOUNT ST | PARKINSON | QLD | 4115 | AUS |
| 8 | 13-15 FRANCESCO CRES | BELLA VISTA | NSW | 2153 | AUS |
| 9 | 4 RYANS LANE | HEATHCOTE | VIC | 3523 | AUS |
| 10 | 1 CHRISTMAS LN | NORTH POLE | VIC | 1111 | AUS |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName |HouseNumber | AddressLine1 | AddressType |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00 | MERIVALE | 18 | 18 MERIVALE ST | S |
| 99.42 | SERPENTINE | 19 | 19 SERPENTINE RD E | S |
| 97.95 | VICTORIA | 317 | 317 VICTORIA ST | S |
| 100.00 | O'CONNELL | 16-18 | DUP 6 16-18 O'CONNELL ST | S |
| 0.00 | BRYGON CREEK | 470 | LOT 154 470 BRYGON CREEK DR | U |
| 76.99 | GREENE | 16 | 16 GREENE ST | S |
| 100.00 | BLAIRMOUNT | 16 | U 47 16 BLAIRMOUNT ST | S |
| 100.00 | FRANCESCO | 13-15 | 13-15 FRANCESCO CRES | S |
| 100.00 | RYANS | 4 | 4 RYANS LANE | S |
| 0.00 | CHRISTMAS | 1 | 1 CHRISTMAS LN | U |
+-----------+---------------+------------+------------------------------+---------------+