Options

Text Index Java Tokenizer

 

Is UDTF supported for Text Index Search?

 

CREATE TEXT INDEX Table_IDX ON Table(_PK_, _Content_) TOKENIZER public.JavaCustomTokenizer(Varchar);

 

I’m getting below error when trying to use my Java custom UDTF.

ERROR 7129: Tokenizer UDx string must be polymorphic or have a single input field of CHAR, VARCHAR, LONG VARCHAR, VARBINARY, LONG VARBINARY, or USER DEFINED argument type

 

 

public class TokenFactory extends TransformFunctionFactory {

@Override
public void getPrototype(ServerInterface srvInterface, ColumnTypes argTypes, ColumnTypes returnType) {
argTypes.addVarchar();
returnType.addVarchar();
}

@Override
public void getReturnType(ServerInterface srvInterface, SizedColumnTypes inputTypes, SizedColumnTypes outputTypes) {
outputTypes.addVarchar(inputTypes.getColumnType(0).getStringLength(), "token");
}

public class TokenizeString extends TransformFunction {
@SuppressWarnings("unchecked")
@Override
public void processPartition(ServerInterface srvInterface, PartitionReader inputReader,
PartitionWriter outputWriter) throws UdfException, DestroyInvocation {
try {
do {
if (inputReader.isStringNull(0)) {
outputWriter.setStringNull(0);
outputWriter.next();
} else {

String cefEvent = inputReader.getString(1).toLowerCase();

String[] extension_tokens = value.split("\\s+");
for (int i = 0; i < extension_tokens.length; i++) {
String token_value = extension_tokens[i];
outputWriter.getStringWriter(0).copy(token_value.trim());
outputWriter.next();
}

}

} while (inputReader.next());
}

catch (Exception e) {
srvInterface.log("Exception: " + e.getClass().getSimpleName() + "Message: " + e.getMessage());
}
}
}

@Override
public TransformFunction createTransformFunction(ServerInterface srvInterface) {
return new TokenizeString();
}
}

 

Leave a Comment

BoldItalicStrikethroughOrdered listUnordered list
Emoji
Image
Align leftAlign centerAlign rightToggle HTML viewToggle full pageToggle lights
Drop image/file