Logistic Example

John Mount

2019-11-12

We can work an example similar to the rquery example using a data.table back-end.

library("rqdatatable")
library("data.table")
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:wrapr':
## 
##     :=
# data example
dL <- wrapr::build_frame(
   "subjectID", "surveyCategory"     , "assessmentTotal" |
   1          , "withdrawal behavior", 5                 |
   1          , "positive re-framing", 2                 |
   2          , "withdrawal behavior", 3                 |
   2          , "positive re-framing", 4                 )
scale <- 0.237

# example rquery pipeline
rquery_pipeline <- local_td(dL) %.>%
  extend_nse(.,
             one = 1) %.>%
  extend_nse(.,
             probability =
               exp(assessmentTotal * scale)/
               sum(exp(assessmentTotal * scale)),
             count = sum(one),
             rank = rank(probability, surveyCategory),
             partitionby = 'subjectID') %.>%
  extend_nse(.,
             isdiagnosis = rank == count,
             diagnosis = surveyCategory) %.>%
  select_rows_nse(., 
                  isdiagnosis == TRUE) %.>%
  select_columns(., 
                 c('subjectID', 'diagnosis', 'probability')) %.>%
  orderby(., 'subjectID')

Show expanded form of query tree.

cat(format(rquery_pipeline))
mk_td("dL", c(
  "subjectID",
  "surveyCategory",
  "assessmentTotal")) %.>%
 extend(.,
  one := 1) %.>%
 extend(.,
  probability := exp(assessmentTotal * 0.237) / sum(exp(assessmentTotal * 0.237)),
  count := sum(one),
  partitionby = c('subjectID'),
  orderby = c(),
  reverse = c()) %.>%
 extend(.,
  rank := rank(probability, surveyCategory),
  partitionby = c('subjectID'),
  orderby = c(),
  reverse = c()) %.>%
 extend(.,
  isdiagnosis := rank == count,
  diagnosis := surveyCategory) %.>%
 select_rows(.,
   isdiagnosis == TRUE) %.>%
 select_columns(., 
    c('subjectID', 'diagnosis', 'probability')) %.>%
 order_rows(.,
  c('subjectID'),
  reverse = c(),
  limit = NULL)

Execute the calculation.

ex_data_table(rquery_pipeline)
##    subjectID           diagnosis probability
## 1:         1 withdrawal behavior   0.6706221
## 2:         2 positive re-framing   0.5589742