The dataset S3 Class • dataset

library(dataset)

In the R language, datasets are usually contained in a data.frame() object, or in one of their modernized versions. For example, tibble::tibble() or data.table::data.table() are inherited from the base data.frame().

This documentation is not updated yet to the development version of the [dataset] package.

The base data.frame() constructor, like most base R types, is very flexible. It allows the use of any kind of metadata attached to the object.

foo <- data.frame( x = c(1,2), y = c(3,4))
attr(foo, "Title") <- "My Foo Object"
attributes(foo)
#> $names
#> [1] "x" "y"
#> 
#> $class
#> [1] "data.frame"
#> 
#> $row.names
#> [1] 1 2
#> 
#> $Title
#> [1] "My Foo Object"

Metadata standardisation is critically essential for reproducible research, publication, or linking resources on the web. The aim dataset() class is the creation of semantically enriched data frames with as much interoperability as possible across various sub-classes of the base R data.frame().

head (dataset(mtcars, 
        title="The Motor Trend [mtcar] Dataset", 
        author=person("Motor Trend Magazine"), 
        year=1974, 
        publisher="Motor Trend Magazine" ))
#> Motor Trend Magazine (1974). "The Motor Trend [mtcar] Dataset
#> [subset]."
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
#> Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
#> Further metadata: describe(x)

temp_ttl_file  <- file.path(tempdir(), "temp_ttl.ttl")
mtcars_dataset <- dataset(mtcars, 
        title="The Motor Trend [mtcar] Dataset", 
        author=person("Motor Trend Magazine"), 
        year=1974, 
        publisher="Motor Trend Magazine")

mtcars_namespace <- dataset_namespace[
  dataset_namespace$prefix %in% c("owl:", "rdf:", "rdfs:", "qb:", "eg:"), ]

mtcars_dataset   <- id_to_column(mtcars_dataset, prefix = "eg:", ids = NULL)
mtcars_dataset   <- dataset_to_triples(mtcars_dataset, idcol = "rowid")
mtcars_dataset$p <- paste0("eg:mtcars#", mtcars_dataset$p)
mtcars_dataset$o <- xsd_convert(mtcars_dataset$o)
dataset_ttl_write(mtcars_dataset, 
                  ttl_namespace = mtcars_namespace, 
                  file_path = temp_ttl_file)
readLines(temp_ttl_file, 25)
#>  [1] "@prefix  eg:         <http://example.org/ns#> ."                     
#>  [2] "@prefix  owl:        <http://www.w3.org/2002/07/owl#> ."             
#>  [3] "@prefix  qb:         <http://purl.org/linked-data/cube#> ."          
#>  [4] "@prefix  rdf:        <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
#>  [5] "@prefix  rdfs:       <http://www.w3.org/2000/01/rdf-schema#> ."      
#>  [6] ""                                                                    
#>  [7] "# -- Observations -----------------------------------------"         
#>  [8] ""                                                                    
#>  [9] "eg:Mazda-RX4 a qb:Observation ;"                                     
#> [10] "   eg:mtcars#mpg   \"21\"^^<xs:decimal> ;"                           
#> [11] "   eg:mtcars#cyl   \"6\"^^<xs:decimal> ;"                            
#> [12] "   eg:mtcars#disp   \"160\"^^<xs:decimal> ;"                         
#> [13] "   eg:mtcars#hp   \"110\"^^<xs:decimal> ;"                           
#> [14] "   eg:mtcars#drat   \"3.9\"^^<xs:decimal> ;"                         
#> [15] "   eg:mtcars#wt   \"2.62\"^^<xs:decimal> ;"                          
#> [16] "   eg:mtcars#qsec   \"16.46\"^^<xs:decimal> ;"                       
#> [17] "   eg:mtcars#vs   \"0\"^^<xs:decimal> ;"                             
#> [18] "   eg:mtcars#am   \"1\"^^<xs:decimal> ;"                             
#> [19] "   eg:mtcars#gear   \"4\"^^<xs:decimal> ;"                           
#> [20] "   eg:mtcars#carb   \"4\"^^<xs:decimal> ;"                           
#> [21] "   ."                                                                
#> [22] "eg:Mazda-RX4-Wag a qb:Observation ;"                                 
#> [23] "   eg:mtcars#mpg   \"21\"^^<xs:decimal> ;"                           
#> [24] "   eg:mtcars#cyl   \"6\"^^<xs:decimal> ;"                            
#> [25] "   eg:mtcars#disp   \"160\"^^<xs:decimal> ;"

Tibble

library(tibble)
ds_tibble <- dataset(as_tibble(mtcars), 
                     title = "The Motor Trend [mtcar] Dataset", 
                     author = person("Motor Trend Magazine"), 
                     year = 1974, 
                     publisher  =  "Motor Trend Magazine" )

rowid_to_column(ds_tibble)
#> Motor Trend Magazine (1974). "The Motor Trend [mtcar] Dataset."
#> # A tibble: 12 × 12
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb rowid
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#>  1  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3    12
#>  2  21       6  160    110  3.9   2.62  16.5     0     1     4     4     1
#>  3  21       6  160    110  3.9   2.88  17.0     0     1     4     4     2
#>  4  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1     3
#>  5  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1     4
#>  6  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2     5
#>  7  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1     6
#>  8  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4     7
#>  9  24.4     4  147.    62  3.69  3.19  20       1     0     4     2     8
#> 10  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2     9
#> 11  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4    10
#> 12  17.8     6  168.   123  3.92  3.44  18.9     1     0     4     4    11
#> Further metadata: describe(x)

library("nycflights13")
library("tsibble")
library("dplyr")
library("tidyr")

data("weather")
weather <- weather %>% 
  select(origin, time_hour, temp, humid, precip)
weather_tsbl <- as_tsibble(weather, key = origin, index = time_hour)

ds_weather <- dataset(weather_tsbl, 
                      title = "Weather dataset", 
                      author = person("Jane", "Doe"), 
                      creator = person("Jane", "Doe"))

dataset_bibentry(ds_weather)
#> Doe J (2024). "Weather dataset."

full_weather <- ds_weather %>%
  fill_gaps(precip = 0) %>% 
  group_by_key() %>% 
  tidyr::fill(temp, humid, .direction = "down")

full_weather %>%
  group_by_key() %>%
  index_by(year_month = ~ yearmonth(.)) %>% # monthly aggregates
  summarise(
    avg_temp = mean(temp, na.rm = TRUE),
    ttl_precip = sum(precip, na.rm = TRUE)
  )
#> # A tsibble: 36 x 4 [1M]
#> # Key:       origin [3]
#>    origin year_month avg_temp ttl_precip
#>    <chr>       <mth>    <dbl>      <dbl>
#>  1 EWR      2013 Jan     35.6       3.53
#>  2 EWR      2013 Feb     34.2       3.83
#>  3 EWR      2013 Mar     40.1       3   
#>  4 EWR      2013 Apr     53.0       1.47
#>  5 EWR      2013 May     63.3       5.44
#>  6 EWR      2013 Jun     73.3       8.73
#>  7 EWR      2013 Jul     80.7       3.74
#>  8 EWR      2013 Aug     74.5       4.57
#>  9 EWR      2013 Sep     67.3       1.54
#> 10 EWR      2013 Oct     59.7       0.5 
#> # ℹ 26 more rows

ds_full_weather <- dataset (full_weather %>%
  group_by_key() %>%
  index_by(year_month = ~ yearmonth(.)) %>% # monthly aggregates
  summarise(
    avg_temp = mean(temp, na.rm = TRUE),
    ttl_precip = sum(precip, na.rm = TRUE)
  ), 
  title = "Weather tsibble",
  author = person("Jane", "Doe"), 
  creator = person("Jane", "Doe"), 
  language = "eng", 
  description = "A replication of the tsibble README example."
  )

print(as_datacite(ds_full_weather), "citation")
#> Doe J (2024). "Weather tsibble."
#> 
#> A BibTeX entry for LaTeX users is
#> 
#>   @Misc{,
#>     title = {Weather tsibble},
#>     author = {Jane Doe},
#>     identifier = {:tba},
#>     publisher = {:unas},
#>     year = {2024},
#>     date = {:tba},
#>     language = {eng},
#>     alternateidentifier = {:unas},
#>     relatedidentifier = {:unas},
#>     format = {application/r-rds},
#>     version = {0.1.0},
#>     rights = {:unas},
#>     description = {A replication of the tsibble README example.},
#>     geolocation = {:unas},
#>     fundingreference = {:unas},
#>   }

temp_weather_file  <- file.path(tempdir(), "temp_weather.ttl")
weather_namespace <- dataset_namespace[
  dataset_namespace$prefix %in% c("owl:", "rdf:", "rdfs:", "qb:", "eg:"), ]

ds_weather    <- id_to_column(ds_full_weather, prefix = "eg:", ids = NULL)
#ds_weather    <- xsd_convert(ds_weather, idcol = "rowid")
ds_weather    <- dataset_to_triples(ds_weather, idcol = "rowid")
ds_weather$p <- paste0("eg:weather#", ds_weather$p)
ds_weather$o <- xsd_convert(ds_weather$o)
dataset_ttl_write(ds_weather, 
                  ttl_namespace = weather_namespace, 
                  file_path = temp_weather_file)
readLines(temp_weather_file, 25)
#>  [1] "@prefix  eg:         <http://example.org/ns#> ."                     
#>  [2] "@prefix  owl:        <http://www.w3.org/2002/07/owl#> ."             
#>  [3] "@prefix  qb:         <http://purl.org/linked-data/cube#> ."          
#>  [4] "@prefix  rdf:        <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
#>  [5] "@prefix  rdfs:       <http://www.w3.org/2000/01/rdf-schema#> ."      
#>  [6] ""                                                                    
#>  [7] "# -- Observations -----------------------------------------"         
#>  [8] ""                                                                    
#>  [9] "eg:1 a qb:Observation ;"                                             
#> [10] "   eg:weather#origin   \"EWR\"^^<xs:string> ;"                       
#> [11] "   eg:weather#year_month   \"15706\"^^<xs:string> ;"                 
#> [12] "   eg:weather#avg_temp   \"35.5694751009421\"^^<xs:string> ;"        
#> [13] "   eg:weather#ttl_precip   \"3.53\"^^<xs:string> ;"                  
#> [14] "   ."                                                                
#> [15] "eg:2 a qb:Observation ;"                                             
#> [16] "   eg:weather#origin   \"EWR\"^^<xs:string> ;"                       
#> [17] "   eg:weather#year_month   \"15737\"^^<xs:string> ;"                 
#> [18] "   eg:weather#avg_temp   \"34.2266964285714\"^^<xs:string> ;"        
#> [19] "   eg:weather#ttl_precip   \"3.83\"^^<xs:string> ;"                  
#> [20] "   ."                                                                
#> [21] "eg:3 a qb:Observation ;"                                             
#> [22] "   eg:weather#origin   \"EWR\"^^<xs:string> ;"                       
#> [23] "   eg:weather#year_month   \"15765\"^^<xs:string> ;"                 
#> [24] "   eg:weather#avg_temp   \"40.1186541049798\"^^<xs:string> ;"        
#> [25] "   eg:weather#ttl_precip   \"3\"^^<xs:string> ;"