`
yugouai
  • 浏览: 492229 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

vertica自定义函数C++版

 
阅读更多

Vertica 6.x 只支持R、C++编写Vertica的UDF,由于使用内置的String 提供的函数实现不了substring_index功能,且用C++实现

 

#include <algorithm>
#include <string>
#include "Vertica.h"

using namespace Vertica;
using namespace std;


// 从text中获取第|delim_num|个|delim|前的全部子串
std::string getSubString(const std::string& text, const std::string& delim, int delim_num) {
	// 需要0个分隔符
	if (delim_num <= 0) {
		return std::string();
	}

	// 查找第n个分隔符
	size_t delim_size = delim.size();
	
	size_t next = 0;
	do {
		next = text.find(delim, next);
		if (next != std::string::npos) {
			--delim_num;
			next+=delim_size;
		}
	} while (next <= text.size() && delim_num > 0);
	return text.substr(0, next - delim_size);
}

/*
 *
 */
class SubstringIndex : public ScalarFunction
{
	public:
	virtual void processBlock(ServerInterface &srvInterface,
			BlockReader &arg_reader,
			BlockWriter &res_writer) {
		if (arg_reader.getNumCols() != 3)
			vt_report_error(0, "Function only accept 3 arguments, but %zu provided", arg_reader.getNumCols());

		// While we have inputs to process
		try{
			do {//C++实现UDF特有的(Java不需要),避免出现:ERROR 5400:  User-defined Scalar Function substring_index produced fewer output rows (0) than input rows (1)
				string src   = arg_reader.getStringRef(0).str();
				string delim = arg_reader.getStringRef(1).str();
				vint   occur = arg_reader.getIntRef(2); 
				int    tot   = occur;
				string result = getSubString(src,delim,tot);
	
				res_writer.getStringRef().copy(result);
				res_writer.next();
			} while (arg_reader.next());
		}catch(...){//这里需要捕捉异常,不然Vertica后台会退出,整个db就stop了,PS:C++的异常太复杂,...表示捕捉所有异常
			res_writer.getStringRef().copy("OTHER");
		}
	}
};


class SubstringIndexFactory : public ScalarFunctionFactory
{
	virtual ScalarFunction *createScalarFunction(ServerInterface &interface)
	{ return vt_createFuncObj(interface.allocator, SubstringIndex); }

	virtual void getPrototype(ServerInterface &interface,
			ColumnTypes &argTypes,
			ColumnTypes &returnType)
	{
		argTypes.addVarchar();
		argTypes.addVarchar();
		argTypes.addInt();
		returnType.addVarchar();
	}

	virtual void getReturnType(ServerInterface &srvInterface,
			const SizedColumnTypes &argTypes,
			SizedColumnTypes &returnType)
	{
		const VerticaType &t = argTypes.getColumnType(0);
		returnType.addVarchar(t.getStringLength());
	}
};

RegisterFactory(SubstringIndexFactory);

 

 

dwyyproduct=> select substring_index('a/b/c','/',2);
 substring_index 
-----------------
 a/b
(1 row)

dwyyproduct=> select substring_index('a/b/c','/',3);
 substring_index 
-----------------
 a/b/c
(1 row)

dwyyproduct=> select substring_index('a/b/c','/',4);
 substring_index 
-----------------
 a/b/c
(1 row)

dwyyproduct=> select substring_index('a/b/c','/',0);
 substring_index 
-----------------
 
(1 row)

 与JAVA实现不同,请注意上述功能代码的注释部分!

 

编写完cpp,在linux下执行gcc命令

gcc substring_index.cpp Vertica.cpp -I /opt/vertica/sdk/include/ -fPIC -shared -o verticaso.so

 /opt/vertica/sdk/include/ 为vertica c++ 的 .h 文件所在位置

verticaso.so 为生成的so文件

 

使用dbadmin进入vsql,执行

CREATE LIBRARY verticasolib AS '/tmp/verticaso.so' language 'C++';
CREATE FUNCTION substring_index AS LANGUAGE 'C++' NAME 'SubstringIndexFactory' LIBRARY verticasolib;

 

ALL DONE!

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics