`
yugouai
  • 浏览: 491715 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

记录中的最大行max_row

 
阅读更多
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;

@Description(name = "maxrow", value = "_FUNC_(expr) - Returns the maximum value of expr and values of associated columns as a struct")
public class GenericUDAFMaxRow extends AbstractGenericUDAFResolver {

  static final Log LOG = LogFactory.getLog(GenericUDAFMaxRow.class.getName());

  @Override
  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
    // Verify that the first parameter supports comparisons.
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
    if (!ObjectInspectorUtils.compareSupported(oi)) {
      throw new UDFArgumentTypeException(0, "Cannot support comparison of map<> type or complex type containing map<>.");
    }
    return new GenericUDAFMaxRowEvaluator();
  }

  // @UDFType(distinctLike=true)
  public static class GenericUDAFMaxRowEvaluator extends GenericUDAFEvaluator {

    ObjectInspector[] inputOIs;
    ObjectInspector[] outputOIs;
    ObjectInspector structOI;

    @Override
    public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
      super.init(mode, parameters);

      int length = parameters.length;
      if (length > 1 || !(parameters[0] instanceof StructObjectInspector)) {
        assert(mode == Mode.COMPLETE || mode == Mode.FINAL);
        initMapSide(parameters);

      } else {
        assert(mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2);
        assert(parameters.length == 1 && parameters[0] instanceof StructObjectInspector);
        initReduceSide((StructObjectInspector) parameters[0]);
      }

      return structOI;
    }

    /* Initialize the UDAF on the map side. */
    private void initMapSide(ObjectInspector[] parameters) throws HiveException {
      int length = parameters.length;
      outputOIs = new ObjectInspector[length];
      List<String> fieldNames = new ArrayList<String>(length);
      List<ObjectInspector> fieldOIs = Arrays.asList(outputOIs);

      for (int i = 0; i < length; i++) {
        fieldNames.add("col" + i); // field names are not made available! :(
        outputOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(parameters[i]);
      }

      inputOIs = parameters;
      structOI = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
    }

    /* Initialize the UDAF on the reduce side (or the map side in some cases). */
    private void initReduceSide(StructObjectInspector inputStructOI) throws HiveException {
      List<? extends StructField> fields = inputStructOI.getAllStructFieldRefs();
      int length = fields.size();
      inputOIs = new ObjectInspector[length];
      outputOIs = new ObjectInspector[length];
      for (int i = 0; i < length; i++) {
        StructField field = fields.get(i);
        inputOIs[i] = field.getFieldObjectInspector();
        outputOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(inputOIs[i]);
      }
      structOI = ObjectInspectorUtils.getStandardObjectInspector(inputStructOI);
    }

    static class MaxAgg implements AggregationBuffer {
      Object[] objects;
    }

    @Override
    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
      MaxAgg result = new MaxAgg();
      return result;
    }

    @Override
    public void reset(AggregationBuffer agg) throws HiveException {
      MaxAgg maxagg = (MaxAgg) agg;
      maxagg.objects = null;
    }

    @Override
    public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
      merge(agg, parameters);
    }

    @Override
    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
      return terminate(agg);
    }

    @Override
    public void merge(AggregationBuffer agg, Object partial) throws HiveException {
      if (partial != null) {
        MaxAgg maxagg = (MaxAgg) agg;
        List<Object> objects;
        if (partial instanceof Object[]) {
          objects = Arrays.asList((Object[]) partial);
        } else if (partial instanceof LazyBinaryStruct) {
          objects = ((LazyBinaryStruct) partial).getFieldsAsList();
        } else {
          throw new HiveException("Invalid type: " + partial.getClass().getName());
        }

        boolean isMax = false;
        if (maxagg.objects == null) {
          isMax = true;
        } else {
          int cmp = ObjectInspectorUtils.compare(maxagg.objects[0], outputOIs[0], objects.get(0), inputOIs[0]);
          if (cmp < 0) {
            isMax = true;
          }
        }

        if (isMax) {
          int length = objects.size();
          maxagg.objects = new Object[length];
          for (int i = 0; i < length; i++) {
            maxagg.objects[i] = ObjectInspectorUtils.copyToStandardObject(objects.get(i), inputOIs[i]);
          }
        }
      }
    }

    @Override
    public Object terminate(AggregationBuffer agg) throws HiveException {
      MaxAgg maxagg = (MaxAgg) agg;
      return Arrays.asList(maxagg.objects);
    }
  }
}

 maxrow(compare_col,col1,col2,col3....)根据输入的compare列进行比较,返回最大行,包含值compare-col,col1,col2...返回结构是struct,需要根据struct结构取值

分享到:
评论

相关推荐

    Oracle9i的init.ora参数中文说明

    row_locking: 说明: 指定在表已更新或正在更新时是否获取行锁。如果设置为 ALWAYS, 只有在表被更新后才获取行锁。如果设置为 INTENT, 只有行锁将用于SELECT FOR UPDATE, 但在更新时将获取表锁。 值范围: ALWAYS | ...

    Oracle事例

    14、查询从多少行到多少行的记录(可以用在web开发中的分页显示) select * from ( select rownum row_id,b.* from (select a.* from sys_oper a) b ) where row_id between 15 and 20 15、对公共授予访问权 ...

    orcale常用命令

    可以使用DEFINE命令设置系统变量EDITOR来改变文本编辑器的类型,在login.sql文件中定义如下一行 DEFINE_EDITOR=vi f、运行命令文件 SQL&gt;START test SQL&gt;@test 常用SQL*Plus语句 a、表的创建、修改、删除 创建表的...

    LINGO软件的学习

    一旦把对象聚合成集,就可以利用集来最大限度的发挥LINGO建模语言的优势。 现在我们将深入介绍如何创建集,并用数据初始化集的属性。学完本节后,你对基于建模技术的集如何引入模型会有一个基本的理解。 2.1 为什么...

    oracle学习文档 笔记 全面 深刻 详细 通俗易懂 doc word格式 清晰 连接字符串

    二进制数据类型 row 1~2000字节 可变长二进制数据,在具体定义字段的时候必须指明最大长度n long raw 1~2GB 可变长二进制数据 LOB数据类型 clob 1~4GB 只能存储字符数据 nclob 1~4GB 保存本地语言字符集数据 blob...

    mysql数据库的基本操作语法

    当主表的记录被从表参照时,主表的记录将不允许删除,如果要删除数据,需要先删除从表中依赖该记录的数据, 然后才可以删除主表的数据。还有一种就是级联删除子表数据。 注意:外键约束的参照列,在主表中引用的只能...

    cxGrid右键菜单

    procedure PopupMenu_FilterRow(Sender: TObject); procedure PopupMenu_ShowFilter_fvAlways(Sender: TObject); procedure PopupMenu_ShowFilter_fvNever(Sender: TObject); procedure PopupMenu_ShowFilter_...

    精髓Oralcle讲课笔记

    -- (函数max() 求出emp表中sal字段的最大值) 46、select min(sal) from emp; -- (函数max() 求出emp表中sal字段的最小值) 47、select avg(sal) from emp; --(avg()求平均薪水); 48、select to_char(avg(sal),...

    T-SQL高级查询

    select s.id, s.name, cid, c.name, row_number() over(partition by c.name order by s.id) as rank from student s, classes c where cid = c.id; select s.id, s.name, cid, c.name, rank() over(partition by...

    jQuery LigerUI V1.1.0

    [修复]修改grid addRow()如果没有数据的时候row没有高度 [修复]修改可编辑grid中DateEditor编辑错误的问题 下拉框 [需求]支持动态改变值 [需求]增加打开下拉框前事件,利用这个参数可以用来调用其他函数,比如...

    sql2005全文检索.doc

    ' ROW_NUMBER() OVER (ORDER BY RANK DESC) AS SerialNumber ,'+ ' F.[rank], '+ ' p.*' + ' FROM'+ ' FREETEXTTABLE( CapitalInfoFactTab , (ProvinceName, CityName,  CountyName, Keyword,Title ,IndustryBName ...

    C#全能速查宝典

    1.1.18 ReadLine方法——从当前流中读取一行字符 20 1.1.19 typeof运算符——获得系统原型对象的类型 21 1.1.20 using关键字——引入命名空间 22 1.1.21 WriteLine方法——写入流 23 1.2 数学方法类——Math 25 ...

    物业管理系统

    操作员姓名) =thisform.listview1.listitems.add(,,key,4) &&添加一个列表项 skip &&相对移动记录指针enddo在表单的resize事件中添加代码为:purview=val(alltrim(tabpurview.权限级别)) &&根据用户选择设置权限级别...

    Excel 2010函数应用速查手册

    数据库函数 7.1 DGET 7.2 DPRODUCT 7.3 DCOUNTA 7.4 DSUM 7.5 DAVERAGE 7.6 DMAX 7.7 DCOUNT 7.8 DMIN 第8章 查找和引用函数 8.1 ADDRESS 8.2 COLUMN 8.3 ROW 8.4 AREAS 8.5 ROWS 8.6 COLUMNS 8.7 CHOOSE 8.8 MATCH ...

Global site tag (gtag.js) - Google Analytics