-- Register Universal Address Module [UAM] BDQ Hive Loqate UDAF Jar
ADD JAR <Directory path>/uam.loqate.hive.${project.version}.jar;
-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION loqatevalidation as 'com.pb.bdq.uam.process.hive.loqate.LoqateAddressingUDAF';
-- Adding required files to distributed cache.
ADD FILES <Directory Path>/loqate-core.car;
ADD FILES <Directory Path>/LoqateVerificationLevel.csv;
ADD FILES <Directory Path>/Loqate.csv;
ADD FILES <Directory Path>/countryTables.csv;
ADD FILES <Directory Path>/countryNameTables.csv;
set hive.map.aggr = false;
-- set process configuration
set pb.bdq.uam.loqate.process.configuration={"processType":"VALIDATE", "includeMatchedAddressElements":true, "standardizedInputAddressElements":true, "returnAddressDataBlocks":true, "casing":"Mixed", "outputReportSummary":false, "returnMultipleAddresses":false, "failedOnMultiMatchFound":false, "countryFormat":"ENGLISH", "defaultCountry":"USA", "scriptAlphabet":"Native", "returnGeocodedAddressFields":true, "acceptanceLevel":"Level0", "minimumMatchScore":0, "formatDataUsingAMASConventions":false, "singleFieldDuplicateHandling":false, "multiFieldDuplicateHandling":false, "nonStandardFieldDuplicateHandling":false, "outputFieldDuplicateHandling":false, "includeStandardAddress":true, "duplicateHandling":false, "returnMultipleAddressCount":10};
-- set general configuration
set pb.bdq.uam.loqate.general.configuration={"maxIdle":null, "minIdle":16, "maxActive":16, "maxWait":null, "whenExhaustedAction":null, "testOnBorrow":null, "testOnReturn":null, "testWhileIdle":null, "timeBetweenEvictionRunsMillis":null, "numTestsPerEvictionRun":null, "minEvictableIdleTimeMillis":null};
-- set engine configuration
set pb.bdq.uam.loqate.engine.configuration={"verbose":true, "toolInfo":true, "outputAddressFormat":false, "logInput":false, "logOutput":false, "logFileName":null, "matchScoreAbsoluteThreshold":60, "matchScoreThresholdFactor":95, "postalCodeMaxResults":10, "strictReferenceMatch":false};
-- set reference directory path
set pb.bdq.referencedata.dir=/media/New Volume/hduser/resources/uam/loqate/Linux;
-- set process type
set pb.bdq.uam.loqate.process.type=VALIDATE;
-- set input header
set pb.bdq.header='InputKeyValue,AddressLine1,AddressLine2,AddressLine3,
AddressLine4,City,StateProvince,PostalCode,Country,FirmName';
select SELECT tmp2.record["HouseNumber"], tmp2.record["Confidence"], tmp2.record["AddressLine1"], tmp2.record["StreetName"], tmp2.record["PostalCode"], tmp2.record["DPID"], tmp2.record["Barcode"] FROM ( SELECT loqatevalidation(recordid, addressline1, city, stateprovince, postalcode, country) as mygp from address) as <TABLE_NAME> LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/loqate/' row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE SELECT * FROM ( SELECT tmp2.record["HouseNumber"], tmp2.record["Confidence"], tmp2.record["AddressLine1"], tmp2.record["StreetName"], tmp2.record["PostalCode"], tmp2.record["DPID"], tmp2.record["Barcode"] FROM ( SELECT loqatevalidation(recordid, addressline1, city, stateprovince, postalcode, country) as mygp from address) as <TABLE_NAME> LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
--Sample Input
+----------------+------------------------------------+----------------+-------------+---------------+
| inputkeyvalue | addressline1 | stateprovince | postalcode | country |
+----------------+------------------------------------+----------------+-------------+---------------+
| 1 | 80 Quan Su | | | Vietnam |
| 2 | Final Av. Panteón Foro Libertador | | 1010 | Venezuela |
| 3 | P O Box 834 | | | St Vincent |
| 4 | Colonia 2066 | | | Uruguay |
| 5 | Ave de la Resistance BP127 | | | Burkina Faso |
| 6 | Buyuk Turon Street, 41 | | | Uzbekistan |
| 7 | Empire State Building | NY | 10118 | US |
| 8 | 3 Leontovycha St | | | Ukraine |
| 9 | | Ceredigion | | Wales |
| 10 | 5 Main Street | Ballindalloch | | Scotland |
+----------------+------------------------------------+----------------+-------------+---------------+
-- Sample Output
+-----------+---------------+--------+---+------------------------------------+
|Match Score|StreetName |HouseNumber | addressline1 |
+-----------+---------------+------------+------------------------------------+
| 100.00 | MERIVALE | 80 | 80 Quan Su |
| 100.00 | SERPENTINE | | Final Av. Panteón Foro Libertador |
| 0.00 | VICTORIA | 0 | P O Box 834 |
| 75.00 | O'CONNELL | 2066 | Colonia 2066 |
| 83.33 | BRYGON CREEK | 470 | Ave de la Resistance BP127 |
| 100.00 | GREENE | | Buyuk Turon Street, 41 |
| 96.8254 | BLAIRMOUNT | 41 | Empire State Building |
| 83.950 | FRANCESCO | 350 | 3 Leontovycha St |
| 50.00 | RYANS | 3 | |
| 100 | CHRISTMAS | 5 | 5 Main Street |
+-----------+---------------+------------+------------------------------------+
!quit