We're Moving!

The Vertica Forum is moving to a new OpenText Analytics Database (Vertica) Community.

Join us there to post discussion topics, learn about

product releases, share tips, access the blog, and much more.

Create My New Community Account Now


Error when creating R-UDF — Vertica Forum

Error when creating R-UDF

Hi,

 

I ran the script below to create an R-UDF and I'm getting an error: ROLLBACK 3466:  Function cannot have 0 return value(s).

 

I'm confused because I have a return value in my function.  Do I need to change something in my factory script?

 

TV_Attribution_Function <- function(MAP.data, tv_data) {
# Inputs are two queries
# MAP.data = visits data
# tv_data = Data with TV spots
MAP.data$Date_time <- as.POSIXct(as.character(MAP.data$Date_time), format="%F %R")
tv_data$IMPRESSIONS <- as.numeric(tv_data$IMPRESSIONS)
tv_data$Date_time <- as.POSIXct(as.character(tv_data$Date_time), format="%F %R")

missing <- tv_data[tv_data$IMPRESSIONS <= 0,]


tv_data[which(tv_data$IMPRESSIONS == 0),'IMPRESSIONS'] <- 1

#replace nas w/ 0
tv_data[is.na(tv_data)] <- 0
MAP.data[is.na(MAP.data)] <- 0



for(i in c(1:8,10:12)){
if(class(tv_data[,i])[1] == 'character'){tv_data[,i] <- as.factor(tv_data[,i])}
}
tv_data$Feed <- factor(tv_data$Feed, levels = c("",unique(tv_data$Feed)))
tv_data$SPOT_LENGTH <- as.integer(tv_data$SPOT_LENGTH)

for(i in 2:9){
if(class(MAP.data[,i])[1] == 'character'){MAP.data[,i] <- as.factor(MAP.data[,i])}
if(typeof(MAP.data[,i])[1] == 'double'){MAP.data[,i] <- as.integer(MAP.data[,i])}


}

visits_data <- MAP.data

span = 0.2
sd_x = 1.0
span_b = 0.35
sd_b = 1.0
minutes_gap = 5

days <- unique(visits_data$Day_Only, na.rm=TRUE)
final_outcome <- data.frame()

for (i in 1:length(days)){
outcome_day <- subset(visits_data, Day_Only == days[i])
outcome_2 <- outcome_day$Visits_Count
outcome_2[which(is.na(outcome_2))] <- 0
t <- 1:nrow(outcome_day)
output_set <- cbind(data.frame(outcome_day$Date_time),data.frame(outcome_day$Day_Only), data.frame(outcome_day$Visits_Minute))
v.lo <- loess(outcome_2 ~ t, span = span)
v.sd <- sd(v.lo$residuals)
baseline <- v.lo$fitted + sd_x * v.sd
direct_response <- outcome_2 - baseline
direct_response[direct_response < 0] <- 0
direct_response <- data.frame(direct_response)
colnames(direct_response) <- c('mapped_visits')
output_set <- cbind(output_set, direct_response)
final_outcome <- rbind(final_outcome, output_set)
}
colnames(final_outcome)[1] <- c("Date_time")



temp_imps <- aggregate(IMPRESSIONS ~ Date_time, data = tv_data, FUN = sum)
t_imps_plus <- merge(final_outcome, temp_imps, by = 'Date_time', all.x = TRUE, all.y = FALSE)
t_imps_plus$lagged_imps <- rep(0, nrow(t_imps_plus))

for (i in 1:nrow(t_imps_plus)){
t_imps_plus$lagged_imps[i] <- sum(t_imps_plus$IMPRESSIONS[max(1,i-minutes_gap+1):i], na.rm = TRUE)
}

tv_data$mapped_visits <- rep(0,nrow(tv_data))

for (n in (1:nrow(tv_data))){

num <- match(tv_data[n,1], t_imps_plus$Date_time)
if (is.na(num) == FALSE) {
tv_data$mapped_visits[n] <- tv_data$IMPRESSIONS[n] * minutes_gap * sum(t_imps_plus$mapped_visits[num:(num+minutes_gap-1)] / t_imps_plus$lagged_imps[num:(num+minutes_gap-1)])
} else {tv_data$mapped_visits[n] <- NA}
}


#Next for visits that resulted in a seeker signup

final_outcome <- data.frame()

# Loop for each day to create baseline visits and detect spikes
for(i in 1:length(days)){
outcome_day <- subset(visits_data, Day_Only == days[i])
outcome_2 <- outcome_day$new_seekers
outcome_2[which(is.na(outcome_2))] <- 0
t <- c(1:length(outcome_2))
output_set <- cbind(data.frame(outcome_day$Date_time),data.frame(outcome_day$Day_Only), data.frame(outcome_day$Visits_Minute))
v.lo <- loess(outcome_2 ~ t, span = span_b)
v.sd <- sd(v.lo$residuals)
baseline <- v.lo$fitted + sd_b * v.sd
#shouldnt direct response be outcome_2 - baseline, not fitted???
direct_response <- outcome_2 - v.lo$fitted
direct_response[direct_response < 0] <- 0
direct_response <- data.frame(direct_response)
colnames(direct_response) <- c('mapped_ns_visits')
output_set <- cbind(output_set, direct_response)
final_outcome <- rbind(final_outcome, output_set)
}
colnames(final_outcome)[1] <- c("Date_time")


temp_imps <- aggregate(IMPRESSIONS ~ Date_time, data = tv_data, FUN = sum)
#basically left join
t_imps_plus <- merge(final_outcome, temp_imps, by = 'Date_time', all.x = TRUE, all.y = FALSE)
t_imps_plus$lagged_imps <- rep(0, nrow(t_imps_plus))
for (i in 1:nrow(t_imps_plus)){
t_imps_plus$lagged_imps[i] <- sum(t_imps_plus$IMPRESSIONS[max(1,i-minutes_gap+1):i], na.rm = TRUE)
}


tv_data$mapped_ns_visits <- rep(0,nrow(tv_data))
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], t_imps_plus$Date_time)
if (is.na(num) == FALSE) {
tv_data$mapped_ns_visits[n] <- sum(t_imps_plus$mapped_ns_visits[num:(num+minutes_gap-1)]) * tv_data$IMPRESSIONS[n]*minutes_gap / sum(t_imps_plus$lagged_imps[num:(num+minutes_gap-1)])
} else {n}
}



final_outcome <- data.frame()

# Loop for each day to create baseline visits and detect spikes
for (i in 1:length(days)){
outcome_day <- subset(visits_data, Day_Only == days[i])
outcome_2 <- outcome_day$new_sitters
outcome_2[which(is.na(outcome_2))] <- 0
t <- c(1:nrow(outcome_day))
output_set <- cbind(data.frame(outcome_day$Date_time),data.frame(outcome_day$Day_Only), data.frame(outcome_day$Visits_Minute))
v.lo <- loess(outcome_2 ~ t, span = span_b)
v.sd <- sd(v.lo$residuals)
baseline <- v.lo$fitted + sd_b * v.sd
direct_response <- outcome_2 - baseline
direct_response[direct_response < 0] <- 0
direct_response <- data.frame(direct_response)
colnames(direct_response) <- c('mapped_np_visits')
output_set <- cbind(output_set, direct_response)
final_outcome <- rbind(final_outcome, output_set)
}
colnames(final_outcome)[1] <- c("Date_time")

#Fill in total impressions including lags
temp_imps <- aggregate(IMPRESSIONS ~ Date_time, data = tv_data, FUN = sum)
t_imps_plus <- merge(final_outcome, temp_imps, by = 'Date_time', all.x = TRUE, all.y = FALSE)
t_imps_plus$lagged_imps <- rep(0, nrow(t_imps_plus))
for (i in 1:nrow(t_imps_plus)){
t_imps_plus$lagged_imps[i] <- sum(t_imps_plus$IMPRESSIONS[max(1,i-minutes_gap+1):i], na.rm = TRUE)
}

#Add mapped visits data to the tv_data table and account for any overlapping spots using ratio of impressions
tv_data$mapped_np_visits <- rep(0,nrow(tv_data))
for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], t_imps_plus$Date_time) # mapped new members when spot aired
if (is.na(num) == FALSE) {
tv_data$mapped_np_visits[n] <- tv_data$IMPRESSIONS[n] * minutes_gap *
sum(t_imps_plus$mapped_np_visits[num:(num+minutes_gap-1)] / t_imps_plus$lagged_imps[num:(num+minutes_gap-1)])
} else {n}
}

for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)

if(is.na(num) == FALSE){
temp <- visits_data$new_seekers[num:(num+minutes_gap-1)]
tv_data$total_ns_visits[n] <- sum(temp, na.rm = T)

} else if(is.na(num) == TRUE){tv_data$total_ns_visits[n] <- 0}
}

## Add total new provider visits ####

for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)

if(is.na(num) == FALSE){
temp <- visits_data$new_sitters[num:(num+minutes_gap-1)]
tv_data$total_np_visits[n] <- sum(temp, na.rm = T)

} else if(is.na(num) == TRUE){tv_data$total_np_visits[n] <- 0}
}


### add total day1 prems #####

for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)

if(is.na(num) == FALSE){
temp <- visits_data$day1_premiums[num:(num+minutes_gap-1)]
tv_data$day1_premiums[n] <- sum(temp, na.rm = T)

} else if(is.na(num) == TRUE){tv_data$day1_premiums[n] <- 0}

}

# add week1 prems ###########

for (n in 1:nrow(tv_data)){
num <- match(tv_data[n,1], visits_data$Date_time)

if(is.na(num) == FALSE){
temp <- visits_data$week1_premiums[num:(num+minutes_gap-1)]
tv_data$week1_premiums[n] <- sum(temp, na.rm = T)

} else if(is.na(num) == TRUE){tv_data$week1_premiums[n] <- 0}

}

tv_data$attr_premiums <- tv_data$week1_premiums * tv_data$mapped_ns_visits / tv_data$total_ns_visits
return(tv_data)
}


# Factory Function
TV_Attribution_Function_Factory <- function() {
list (
name = TV_Attribution_Function
,udxtype=c("scalar")
,intype = c("any")
,outtype = c("any")
)
}

Comments

  • Hi

    Share your DDL's for creating the function and library

  • CREATE LIBRARY tv_attribute_func as '/home/local/CAREZEN/ccarpino/tv_attribute.R' LANGUAGE 'R';
    CREATE FUNCTION TV_Attribution_Function AS NAME 'TV_Attribution_Function_Factory' LIBRARY tv_attribute_func;

  • Hi ,

    Try used this syntex :

     

    CREATE OR REPLACE FUNCTION TV_Attribution_Function AS LANGUAGE 'R' NAME 'TV_Attribution_Function_Factory' LIBRARY tv_attribute_func;

  • So the issue is the DDL syntax not the function/library creation?

  • Do you validate if it fix your problem ?

  • It didn't work...

Leave a Comment

BoldItalicStrikethroughOrdered listUnordered list
Emoji
Image
Align leftAlign centerAlign rightToggle HTML viewToggle full pageToggle lights
Drop image/file