使用hive udf进行模型调用,有以下两种方案:
网关地址url、模型编码信息
目前eppdev-mlib提供的原生udf支持hive1.2, 2.3, 3.1三个版本,需要分别下载不同 的jar包来完成模型的调用:
以hive2.3为例:
hdfs dfs put eppdev-mlib-sdk-hive-udf23.jar /user/udf/hive/
CREATE FUNCTION eppdev_to_json AS 'cn.eppdev.mlib.sdk.hive.udf.EppdevMlibToJsonUDF' USING jar 'hdfs://user/udf/hive/eppdev-mlib-sdk-hive-udf23.jar'; CREATE FUNCTION eppdev_mlib_calc AS 'cn.eppdev.mlib.sdk.hive.udf.EppdevMlibCalcUDF' USING jar 'hdfs://user/udf/hive/eppdev-mlib-sdk-hive-udf23.jar';
进行hive调用可以有两种方式:
方法1: 三个参数获取结果json
SELECT eppdev_mlib_calc( 'http://localhost:11541/consumer', 'test-01', eppdev_to_json( 'sepal.width', sepal_with, 'sepal.height', sepal_height, 'petal.width', petal_width, 'petal.height', petal_height ) ) AS full_result FROM iris_data;
输出结果为全量的json:
{ "probability(Setosa)": 1.0, "probability(Virginica)": 0.0, "probability(Versicolor)": 0.0, "variety": "Setosa" }
方法2:输入4个参数,直接获取具体结果
SELECT eppdev_mlib_calc( 'http://localhost:11541/consumer', 'test-01', eppdev_to_json( 'sepal.width', sepal_with, 'sepal.height', sepal_height, 'petal.width', petal_width, 'petal.height', petal_height ), 'variety' ) AS variety FROM iris_data;
输出结果为veriety结果,如:Setosa
以HIVE2.3为例
<dependencies> <dependency> <groupId>cn.eppdev.mlib</groupId> <artifactId>eppdev-mlib-sdk-hive-udf23</artifactId> <version>1.0.0</version> </dependency> </dependencies>
package cn.eppdev.mlib.sample.hive.udf; import org.apache.hadoop.io.Text; import org.apache.hadoop.hive.ql.exec.UDF; import cn.eppdev.mlib.sdk.hive.udf.EppdevMlibCalcUDF; public class EppdevMlibTest01UDF extends EppdevMlibCalcUDF{ // 网关地址 static final CONSUMER_BASIC_URL = "http://localhost:11541/consumer"; // 模型编码 static final MODEL_CODE = "test-01"; public Text evaluate(String postJson) { return eppdevMlibCalc(CONSUMER_BASIC_URL, MODEL_CODE, postJson); } public Text evaluate(String postJson, String resultField){ return eppdevMlibCalc(CONSUMER_BASIC_URL, MODEL_CODE, postJson, resultField); } }
hdfs dfs put eppdev-mlib-sdk-hive-udf23.jar /user/udf/hive/ hdfs dfs put eppdev-mlib-sample-hive-udf.jar /user/udf/hive
上述命令中eppdev-mlib-sample-hive-udf.jar为自定义udf编译后的jar包
CREATE FUNCTION eppdev_to_json AS 'cn.eppdev.mlib.sdk.hive.udf.EppdevMlibToJsonUDF' USING jar 'hdfs:///user/udf/hive/eppdev-mlib-sdk-hive-udf23.jar'; CREATE FUNCTION eppdev_mlib_calc_test01 AS 'cn.eppdev.mlib.sdk.hive.udf.EppdevMlibCalcUDF' USING jar 'hdfs:///user/udf/hive/eppdev-mlib-sdk-hive-udf23.jar', jar 'hdfs:///user/udf/hive/eppdev-mlib-sample-hive-udf.jar';
此时进行hive调用可以有两种方式:
方法1:输入1个参数,获取json结果
SELECT eppdev_mlib_calc_test01( eppdev_to_json( 'sepal.width', sepal_with, 'sepal.height', sepal_height, 'petal.width', petal_width, 'petal.height', petal_height ) ) AS full_result FROM iris_data;
输出结果为全量json
{ "probability(Setosa)": 1.0, "probability(Virginica)": 0.0, "probability(Versicolor)": 0.0, "variety": "Setosa" }
方法2:输入2个参数,获取具体结果
SELECT eppdev_mlib_calc( eppdev_to_json( 'sepal.width', sepal_with, 'sepal.height', sepal_height, 'petal.width', petal_width, 'petal.height', petal_height ), 'variety' ) AS variety FROM iris_data;
输出结果为veriety结果,如:Setosa