[1] "/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library"
Lecture 24
R packages are just a collection of files (R code, compiled code, data, documentation, etc.) that live in your library path.
[1] "_cache" "abind"
[3] "anytime" "ape"
[5] "arrayhelpers" "arrow"
[7] "AsioHeaders" "askpass"
[9] "assertthat" "astsa"
[11] "backports" "base"
[13] "base64enc" "bayesplot"
[15] "beeswarm" "bench"
[17] "BH" "bit"
[19] "bit64" "blob"
[21] "bonsai" "bookdown"
[23] "boot" "brew"
[25] "bridgesampling" "brio"
[27] "brms" "Brobdingnag"
[29] "broom" "broom.helpers"
[31] "broom.mixed" "bsicons"
[33] "bslib" "cachem"
[35] "callr" "car"
[37] "carData" "cellranger"
[39] "checklist" "checkmate"
[41] "chiflights22" "chromote"
[43] "chron" "class"
[45] "classInt" "cli"
[47] "clipr" "clock"
[49] "cluster" "cmdstanr"
[51] "coda" "codetools"
[53] "colorspace" "colourpicker"
[55] "commonmark" "compiler"
[57] "conflicted" "corrplot"
[59] "countdown" "cowplot"
[61] "cpp11" "crayon"
[63] "credentials" "crosstalk"
[65] "crul" "curl"
[67] "DAAG" "data.table"
[69] "datasauRus" "datasets"
[71] "DBI" "dbplyr"
[73] "deepgp" "deldir"
[75] "desc" "devtools"
[77] "diagram" "dials"
[79] "DiceDesign" "diffmatchpatch"
[81] "diffobj" "diffviewer"
[83] "digest" "distributional"
[85] "doFuture" "doMC"
[87] "doParallel" "dotCall64"
[89] "downlit" "dplyr"
[91] "DT" "dtplyr"
[93] "dukestm" "dygraphs"
[95] "e1071" "ellipsis"
[97] "evaluate" "expm"
[99] "extraDistr" "extrafont"
[101] "extrafontdb" "fable"
[103] "fable.prophet" "fabletools"
[105] "fansi" "farver"
[107] "fastmap" "feasts"
[109] "fields" "flexiblas"
[111] "FNN" "fontawesome"
[113] "fontBitstreamVera" "fontLiberation"
[115] "fontquiver" "forcats"
[117] "foreach" "forecast"
[119] "foreign" "formatR"
[121] "Formula" "fracdiff"
[123] "fs" "furrr"
[125] "future" "future.apply"
[127] "gargle" "gdtools"
[129] "generics" "geometry"
[131] "geoR" "geosphere"
[133] "gert" "gfonts"
[135] "GGally" "gganimate"
[137] "ggbeeswarm" "ggdist"
[139] "ggExtra" "ggplot2"
[141] "ggpubr" "ggrepel"
[143] "ggridges" "ggsci"
[145] "ggsignif" "ggstats"
[147] "ggthemes" "gh"
[149] "ghclass" "gifski"
[151] "gitcreds" "glarma"
[153] "glmnet" "globals"
[155] "glue" "gmp"
[157] "googledrive" "googlesheets4"
[159] "gower" "GPfit"
[161] "GpGp" "gptstudio"
[163] "graphics" "grDevices"
[165] "grid" "gridExtra"
[167] "gsubfn" "gtable"
[169] "gtools" "hardhat"
[171] "haven" "here"
[173] "hexbin" "highr"
[175] "histoslider" "hms"
[177] "hrbrthemes" "htmltools"
[179] "htmlwidgets" "httpcode"
[181] "httpuv" "httr"
[183] "httr2" "ids"
[185] "igraph" "infer"
[187] "ini" "inline"
[189] "interp" "ipred"
[191] "isoband" "iterators"
[193] "jpeg" "jquerylib"
[195] "jsonlite" "kableExtra"
[197] "KernSmooth" "knitr"
[199] "labeling" "labelled"
[201] "later" "lattice"
[203] "latticeExtra" "lava"
[205] "lazyeval" "leaflet"
[207] "leaflet.providers" "LearnBayes"
[209] "lhs" "lifecycle"
[211] "lightgbm" "linprog"
[213] "listenv" "lme4"
[215] "lmtest" "lobstr"
[217] "loo" "lorem"
[219] "lpSolve" "lubridate"
[221] "lwgeom" "magic"
[223] "magick" "magrittr"
[225] "maps" "markdown"
[227] "MASS" "Matrix"
[229] "MatrixModels" "matrixStats"
[231] "maxLik" "memoise"
[233] "methods" "mgcv"
[235] "mime" "miniUI"
[237] "minqa" "miscTools"
[239] "modeldata" "modelenv"
[241] "modelr" "multcomp"
[243] "munsell" "mustashe"
[245] "mvtnorm" "nleqslv"
[247] "nlme" "nloptr"
[249] "nnet" "numDeriv"
[251] "nycflights13" "openssl"
[253] "packrat" "pagedown"
[255] "pak" "palmerpenguins"
[257] "pander" "parallel"
[259] "parallelly" "parsnip"
[261] "patchwork" "pbapply"
[263] "pbkrtest" "pdist"
[265] "pillar" "pkgbuild"
[267] "pkgconfig" "pkgdown"
[269] "pkgload" "plogr"
[271] "plotly" "plyr"
[273] "png" "polite"
[275] "polynom" "posterior"
[277] "praise" "prettyunits"
[279] "PrevMap" "prismatic"
[281] "processx" "prodlim"
[283] "profmem" "profvis"
[285] "progress" "progressr"
[287] "promises" "prophet"
[289] "proto" "proxy"
[291] "pryr" "ps"
[293] "purrr" "qs"
[295] "quadprog" "quantmod"
[297] "quantreg" "quarto"
[299] "queryparser" "QuickJSR"
[301] "R6" "ragg"
[303] "randomNames" "ranger"
[305] "RANN" "RApiSerialize"
[307] "rappdirs" "rapportools"
[309] "raster" "rasterVis"
[311] "ratelimitr" "rbibutils"
[313] "rcmdcheck" "RColorBrewer"
[315] "Rcpp" "RcppArmadillo"
[317] "RcppEigen" "RcppParallel"
[319] "RcppProgress" "Rdpack"
[321] "reactR" "readr"
[323] "readxl" "recipes"
[325] "registry" "rematch"
[327] "rematch2" "remotes"
[329] "renderthis" "renv"
[331] "repr" "reprex"
[333] "repurrrsive" "reshape"
[335] "reshape2" "rjags"
[337] "rlang" "rmarkdown"
[339] "robotstxt" "ROI"
[341] "roxygen2" "rpart"
[343] "rpart.plot" "rprojroot"
[345] "rsample" "rsconnect"
[347] "RSQLite" "rstan"
[349] "rstanarm" "rstantools"
[351] "rstatix" "rstudioapi"
[353] "Rttf2pt1" "rversions"
[355] "rvest" "s2"
[357] "sandwich" "sass"
[359] "scales" "selectr"
[361] "servr" "sessioninfo"
[363] "sf" "shape"
[365] "shiny" "shiny.i18n"
[367] "shinyjs" "shinystan"
[369] "shinythemes" "skimr"
[371] "slam" "slider"
[373] "sourcetools" "sp"
[375] "spam" "spaMM"
[377] "SparseM" "spatial"
[379] "spatialreg" "spBayes"
[381] "spData" "spdep"
[383] "spiderbar" "splancs"
[385] "splines" "spNNGP"
[387] "sqldf" "SQUAREM"
[389] "StanHeaders" "stars"
[391] "stats" "stats4"
[393] "stringfish" "stringi"
[395] "stringr" "summarytools"
[397] "survival" "svglite"
[399] "svUnit" "sys"
[401] "systemfonts" "tcltk"
[403] "tensorA" "terra"
[405] "testthat" "textshaping"
[407] "TH.data" "threejs"
[409] "tibble" "tidybayes"
[411] "tidymodels" "tidyquery"
[413] "tidyr" "tidyselect"
[415] "tidyverse" "timechange"
[417] "timeDate" "tinytex"
[419] "tools" "toOrdinal"
[421] "translations" "triebeard"
[423] "truncnorm" "tseries"
[425] "tsibble" "tsibbledata"
[427] "TTR" "tune"
[429] "tweenr" "tzdb"
[431] "units" "urca"
[433] "urlchecker" "urltools"
[435] "usethis" "usmap"
[437] "usmapdata" "utf8"
[439] "utils" "uuid"
[441] "vctrs" "vipor"
[443] "viridis" "viridisLite"
[445] "vroom" "waiter"
[447] "waldo" "warp"
[449] "webshot" "websocket"
[451] "whisker" "withr"
[453] "wk" "workflows"
[455] "workflowsets" "xaringan"
[457] "xfun" "xml2"
[459] "xopen" "xtable"
[461] "xts" "yaml"
[463] "yardstick" "zip"
[465] "zoo"
When you run library(pkg)
the functions (and objects) in the package’s namespace are attached to the global search path.
If you do not want to attach a package you can directly use package functions via ::
or load the package with requireNamespace()
.
[1] "digest" "methods"
[3] "diffmatchpatch" "fastmap"
[5] "xfun" "magrittr"
[7] "glue" "knitr"
[9] "htmltools" "rmarkdown"
[11] "lifecycle" "utils"
[13] "cli" "graphics"
[15] "grDevices" "stats"
[17] "compiler" "forcats"
[19] "base" "rstudioapi"
[21] "tools" "evaluate"
[23] "Rcpp" "yaml"
[25] "rlang" "jsonlite"
[27] "datasets"
We’ve already seen the two primary sources of R packages:
there is one other method that comes up (particularly around package development), which is to install a package from local files.
From the terminal,
or from R,
The Comprehensive R Archive Network which is the central repository of R packages.
Maintained by the R Foundation and run by a team of volunteers, ~20k packages
Retains all current versions of released packages as well as archives of previous versions
Similar in spirit to Perl’s CPAN, TeX’s CTAN, and Python’s PyPI
Some important features:
All submissions are reviewed by humans + automated checks
Strictly enforced submission policies and package requirements
All packages must be actively maintained and support upstream and downstream changes
DESCRIPTION
- file containing package metadata (e.g. package name, description, version, license, and author details). Also specifies package dependencies,
NAMESPACE
- details which functions and objects are exported by your package
R/
- folder containing R script files (.R
)
man/
- folder containing R documentation files (.Rd
)
The following components are optional, but quite common:
tests/
- folder contain unit tests
src/
- folder containing code to be compiled (usually C / C++)
data/
- folder containing example data sets
inst/
- files that will be copied to the package’s top-level directory when it is installed (e.g. C/C++ headers, examples or data files that don’t belong in data/
)
vignettes/
- long form documentation, can be static (.pdf
or .html
) or literate documents (e.g. .qmd
, .Rmd
or .Rnw
)
Source Package
~/Desktop/Projects/diffmatchpatch/
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
│ ├── RcppExports.R
│ ├── diff.R
│ ├── diffmatchpatch-package.R
│ ├── match.R
│ ├── options.R
│ ├── patch.R
│ └── print.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── diffmatchpatch.Rproj
├── inst
│ └── include
│ └── diff_match_patch.h
├── man
│ ├── diff.Rd
│ ├── dmp_options.Rd
│ ├── match.Rd
│ └── patch.Rd
└── src
├── Makevars
├── Makevars.win
├── RcppExports.cpp
├── RcppExports.o
├── common.h
├── diff.cpp
├── diff.o
├── diffmatchpatch.so
├── match.cpp
├── match.o
├── options.cpp
├── options.o
├── patch.cpp
└── patch.o
Installed Package
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/diffmatchpatch
├── DESCRIPTION
├── INDEX
├── Meta
│ ├── Rd.rds
│ ├── features.rds
│ ├── hsearch.rds
│ ├── links.rds
│ ├── nsInfo.rds
│ └── package.rds
├── NAMESPACE
├── NEWS.md
├── R
│ ├── diffmatchpatch
│ ├── diffmatchpatch.rdb
│ └── diffmatchpatch.rdx
├── help
│ ├── AnIndex
│ ├── aliases.rds
│ ├── diffmatchpatch.rdb
│ ├── diffmatchpatch.rdx
│ └── paths.rds
├── html
│ ├── 00Index.html
│ └── R.css
├── include
│ └── diff_match_patch.h
└── libs
└── diffmatchpatch.so
What follows is an opinionated introduction to package development,
this is not the only way to do thing (none of the following are required)
I would strongly recommend using:
Read and follow along with R Packages (2e) - Chap. 1 - “The Whole Game”
usethis
This is an immensely useful package for automating all kinds of routine (and tedious) tasks within R
Tools for managing git and GitHub configuration
Tools for managing collaboration on GitHub via pull requests (see pr_*()
)
Tools for creating and configuring packages
Tools for configuring your R environment (e.g. .Rprofile
and .Renviron
)
and much much more
An important early step in developing a package is choosing a license - this is not trivial but is important to do early on, particularly if collaborating with others.
There are many resources available to help you choose a license, including:
Many packages contain sample data (e.g. nycflights13
, babynames
, etc.)
Generally these files are made available by saving a single data object as an .Rdata
file (using save()
) into the data/
directory of your package.
An easy option is to use usethis::use_data(obj)
to create the necessary file(s)
Data is usually compressed, for large data sets it may be worth trying different options (there is a 5 Mb package size limit on CRAN)
Exported data must be documented (possible via roxygen)
By default when attaching a package all of that packages data is loaded - however if LazyData: true
is set in the packages’ DESCRIPTION
then data is only loaded when used.
When published a package should generally only contain the final data set, but it is important that the process to generate the data is documented as well as any necessary preliminary data.
These can live any where but the general suggestion is to create a data-raw/
directory which is included in .Rbuildignore
data-raw/
then contain scripts, data files, and anything else needed to generate the final object
See examples babynames or nycflights
Use usethis::use_data_raw()
to create and ignore the data-raw/
directory.
If you have data that you want to have access to from within the package but not exported then it needs to live in a special Rdata object located at R/sysdata.rda
.
Can be created using usethis::use_data(obj1, obj2, internal = TRUE)
Each call to the above will overwrite, so needs to include all objects
Not necessary for small data frames and similar objects - just create in a script. Use when you want the object to be compressed.
Example nflplotR which contains team logos and colors for NFL teams.
If you want to include raw data files (e.g .csv
, shapefiles, etc.) there are generally placed in inst/
(or a nested folder) so that they are installed with the package.
Accessed using system.file("dir", package = "package")
after install
Use folders to keep things organized, Hadley recommends and uses inst/extdata/
Example sf
Long form documentation for your package that live in vignette/
, use browseVignette(pkg)
to see a package’s vignettes.
Not required, but adds a lot of value to a package
Generally these are literate documents (.Rmd
, .Rnw
) that are compiled to .html
or .pdf
when the package is built.
Built packages retain the rendered document, the source document, and all source code
vignette("colwise", package = "dplyr")
opens rendered version
edit(vignette("colwise", package = "dplyr"))
opens code chunks
Use usethis::use_vignette()
to create a RMarkdown vignette template
These are an un-official extension to vignettes where package authors wish to include additional long form documentation that is included in their pkgdown
site but not in the package (usually for space reasons).
Use usethis::use_article()
to create
Files are added to vignette/articles/
which is added to .Rbuildignore
R CMD check
Last time we saw the usage of R CMD check
, or rather Build > Check Package
from within RStudio.
This is a good idea to run regularly to make sure nothing is broken and you are meeting the important package quality standards, but this only in the context of your machine, your version of R, your OS, and so on.
If using GitHub it is highly recommended that you run usethis::use_github_action_check_standard()
to enable GitHub actions checks of the package each time it is pushed.
On each push this runs R CMD check on: * Latest R on MacOS, Windows, Linux (Ubuntu) * Previous and devel version of R on Linux (Ubuntu)
Package tests live in tests/
,
Any R scripts found in the folder will be run when Checking the package (not Building)
Generally tests fail on errors, but warnings are also tracked
Testing is possible via base R, including comparison of output vs. a file but it is not recommended (See Writing R Extensions)
Note that R CMD check also runs all documentation examples (unless explicitly tagged dont run) - which can be used for basic testing
Not the only option but probably the most widely used and with the best integration into RStudio.
Can be initialized in your project via usethis::use_testthat()
which creates tests/testthat/
and some basic scaffolding.
test/testthat.R
is what is run by R CMD Check and runs your other tests - handles some basic config like loading package(s)
Test scripts go in tests/testthat/
and should start with test_
, suffix is usually the file in R/
that is being tested.
From the bottom up,
a single test is written as an expectation (e.q. expect_equal()
, expect_error()
, etc.)
multiple related expectations are combined into a test group (test_that()
), which provides
multiple test groups are combined into a file
Sta 523 - Fall 2023