这里会显示出您选择的修订版和当前版本之间的差别。
后一修订版 | 前一修订版 | ||
call:udf [2019/12/29 14:55] jinlong 创建 |
call:udf [2020/07/12 12:07] (当前版本) |
||
---|---|---|---|
行 93: | 行 93: | ||
输出结果为veriety结果,如:Setosa | 输出结果为veriety结果,如:Setosa | ||
+ | |||
+ | ===== 使用自定义udf ===== | ||
+ | |||
+ | ==== 程序编码 ==== | ||
+ | |||
+ | 以HIVE2.3为例 | ||
+ | |||
+ | === POM依赖 === | ||
+ | |||
+ | <code xml> | ||
+ | <dependencies> | ||
+ | <dependency> | ||
+ | <groupId>cn.eppdev.mlib</groupId> | ||
+ | <artifactId>eppdev-mlib-sdk-hive-udf23</artifactId> | ||
+ | <version>1.0.0</version> | ||
+ | </dependency> | ||
+ | </dependencies> | ||
+ | </code> | ||
+ | |||
+ | === java === | ||
+ | |||
+ | <code java> | ||
+ | |||
+ | package cn.eppdev.mlib.sample.hive.udf; | ||
+ | |||
+ | import org.apache.hadoop.io.Text; | ||
+ | import org.apache.hadoop.hive.ql.exec.UDF; | ||
+ | import cn.eppdev.mlib.sdk.hive.udf.EppdevMlibCalcUDF; | ||
+ | |||
+ | public class EppdevMlibTest01UDF extends EppdevMlibCalcUDF{ | ||
+ | |||
+ | // 网关地址 | ||
+ | static final CONSUMER_BASIC_URL = "http://localhost:11541/consumer"; | ||
+ | |||
+ | // 模型编码 | ||
+ | static final MODEL_CODE = "test-01"; | ||
+ | |||
+ | public Text evaluate(String postJson) { | ||
+ | return eppdevMlibCalc(CONSUMER_BASIC_URL, MODEL_CODE, postJson); | ||
+ | } | ||
+ | |||
+ | public Text evaluate(String postJson, String resultField){ | ||
+ | return eppdevMlibCalc(CONSUMER_BASIC_URL, | ||
+ | MODEL_CODE, | ||
+ | postJson, | ||
+ | resultField); | ||
+ | } | ||
+ | } | ||
+ | </code> | ||
+ | |||
+ | ==== 上传jar包到hdfs中 ==== | ||
+ | |||
+ | <code shell> | ||
+ | hdfs dfs put eppdev-mlib-sdk-hive-udf23.jar /user/udf/hive/ | ||
+ | hdfs dfs put eppdev-mlib-sample-hive-udf.jar /user/udf/hive | ||
+ | </code> | ||
+ | |||
+ | > 上述命令中eppdev-mlib-sample-hive-udf.jar为自定义udf编译后的jar包 | ||
+ | |||
+ | ==== hive中创建udf ==== | ||
+ | |||
+ | <code sql> | ||
+ | create function eppdev_to_json as 'cn.eppdev.mlib.sdk.hive.udf.EppdevMlibToJsonUDF' | ||
+ | using jar 'hdfs:///user/udf/hive/eppdev-mlib-sdk-hive-udf23.jar'; | ||
+ | create function eppdev_mlib_calc_test01 as 'cn.eppdev.mlib.sdk.hive.udf.EppdevMlibCalcUDF' | ||
+ | using jar 'hdfs:///user/udf/hive/eppdev-mlib-sdk-hive-udf23.jar', | ||
+ | jar 'hdfs:///user/udf/hive/eppdev-mlib-sample-hive-udf.jar'; | ||
+ | </code> | ||
+ | |||
+ | |||
+ | ==== 在hive中进行模型调用 ==== | ||
+ | |||
+ | 此时进行hive调用可以有两种方式: | ||
+ | |||
+ | - 输入1个参数(请求数据json),可以获取到全量的模型输入 | ||
+ | - 输入2个参数(请求json和所需的输出项),可以获取指定的输出 | ||
+ | |||
+ | |||
+ | 方法1:输入1个参数,获取json结果 | ||
+ | |||
+ | <code sql> | ||
+ | select | ||
+ | eppdev_mlib_calc_test01( | ||
+ | eppdev_to_json( | ||
+ | 'sepal.width', sepal_with, | ||
+ | 'sepal.height', sepal_height, | ||
+ | 'petal.width', petal_width, | ||
+ | 'petal.height', petal_height | ||
+ | ) | ||
+ | ) as full_result | ||
+ | from iris_data; | ||
+ | </code> | ||
+ | |||
+ | 输出结果为全量json | ||
+ | |||
+ | <code json> | ||
+ | { | ||
+ | "probability(Setosa)": 1.0, | ||
+ | "probability(Virginica)": 0.0, | ||
+ | "probability(Versicolor)": 0.0, | ||
+ | "variety": "Setosa" | ||
+ | } | ||
+ | </code> | ||
+ | |||
+ | 方法2:输入2个参数,获取具体结果 | ||
+ | |||
+ | <code sql> | ||
+ | select | ||
+ | eppdev_mlib_calc( | ||
+ | eppdev_to_json( | ||
+ | 'sepal.width', sepal_with, | ||
+ | 'sepal.height', sepal_height, | ||
+ | 'petal.width', petal_width, | ||
+ | 'petal.height', petal_height | ||
+ | ), | ||
+ | 'variety' | ||
+ | ) as variety | ||
+ | from iris_data; | ||
+ | </code> | ||
+ | |||
+ | 输出结果为veriety结果,如:Setosa | ||
+ | |||
+ |