Slicing Columns

H2O lazily slices out columns of data and will only materialize a shared copy upon some type of triggering IO. This example shows how to slice columns from a frame of data.

  • r
  • python
library(h2o)
h2o.init()

# Import the iris with headers dataset
path <- "http://h2o-public-test-data.s3.amazonaws.com/smalldata/iris/iris_wheader.csv"
df <- h2o.importFile(path)
print(df)
  sepal_len sepal_wid petal_len petal_wid       class
1       5.1       3.5       1.4       0.2 Iris-setosa
2       4.9       3.0       1.4       0.2 Iris-setosa
3       4.7       3.2       1.3       0.2 Iris-setosa
4       4.6       3.1       1.5       0.2 Iris-setosa
5       5.0       3.6       1.4       0.2 Iris-setosa
6       5.4       3.9       1.7       0.4 Iris-setosa

[150 rows x 5 columns]

# Slice 1 column by index
c1 <- df[,1]
print(c1)
  sepal_len
1       5.1
2       4.9
3       4.7
4       4.6
5       5.0
6       5.4

[150 rows x 1 column]

# Slice 1 column by name
c1_1 <- df[, "petal_len"]
print(c1_1)
  petal_len
1       1.4
2       1.4
3       1.3
4       1.5
5       1.4
6       1.7

[150 rows x 1 column]

# Slice cols by vector of indexes
cols <- df[, 1:4]
print(cols)
  sepal_len sepal_wid petal_len petal_wid
1       5.1       3.5       1.4       0.2
2       4.9       3.0       1.4       0.2
3       4.7       3.2       1.3       0.2
4       4.6       3.1       1.5       0.2
5       5.0       3.6       1.4       0.2
6       5.4       3.9       1.7       0.4

[150 rows x 4 columns]

# Slice cols by vector of names
cols_1 <- df[, c("sepal_len", "sepal_wid", "petal_len", "petal_wid")]
print(cols_1)
  sepal_len sepal_wid petal_len petal_wid
1       5.1       3.5       1.4       0.2
2       4.9       3.0       1.4       0.2
3       4.7       3.2       1.3       0.2
4       4.6       3.1       1.5       0.2
5       5.0       3.6       1.4       0.2
6       5.4       3.9       1.7       0.4

[150 rows x 4 columns]