dplyr-grammar.Rmd

title: "Einführung in dplyr-Grammatik"
subtitle: "Daten bändigen & visualisieren"
author: "B. Philipp Kleer"
date: "11. Oktober 2021"
output:
  slidy_presentation:
      footer: "CC BY-SA 4.0, B. Philipp Kleer"
      widescreen: true
      highlight: pygments
      theme: readable
      css: style.css
      df_print: paged
      mathjax: default
      self_contained: false
      incremental: false #True  dann jedes Bullet einzeln
      collapse: true # means the text output will be merged into the R source code block
library("knitr")
library("rmarkdown")
library("tidyverse")

uni <- readRDS("../datasets/uni.rds")

opts_chunk$set(fig.path = 'pics/s6-', # path for calculated figures
               fig.align = 'center',  # alignment of figure (also possible right, left, default)
               fig.show = 'hold', # how to show figures: hold -> direct at the end of code chunk; animate: all plots in an animation
               fig.width = 3,   # figure width
               fig.height = 4,  # figure height
               echo = TRUE,     # Code is printed
               eval = FALSE,    # Code is NOT evaluated
               warning = FALSE, # warnings are NOT displayed
               message = FALSE, # messages are NOT displayed
               size = "tiny",  # latex-size of code chunks
               background = "#E7E7E7", # background color of code chunks
               comment = "", # no hashtags before output
               options(width = 80),
               results = "markdown",
               rows.print = 15
)

install.packages("tidyverse")
library("tidyverse")

# alternativ:
# install.packages("dplyr")
# library("dplyr")
uni <- readRDS("yourpath/uni.rds")
uni
# ID: laufende Nummer
# mot: Studienmotivation (0 <sehr niedrig> - 10 <sehr hoch>)
# study: Studienfach (1 <Political Science>, 2 <Sociology>, 3 <Educational Science>, 4 <Psychology>)
# city: Studienort (1 <Gießen>, 2 <Marburg>, 3 <Frankfurt>)
# distance: Anfahrtsdauer zur Uni in Minuten
# abi: Abiturnote
# term: Fachsemester
select(uni,   # Datenquelle
       c(mot, # ausgewählte Spalten
         term))
slice(uni,    # Datenquelle
      50:55)  # ausgewählte Zahlen
filter(uni,  # Datenquelle
       city == "Giessen") # Filtervariable
filter(uni,
       city == "Frankfurt" & study == "Political Science")
arrange(uni,
        abi)
arrange(uni,
        desc(abi))
arrange(uni,
        -abi)
mutate(uni,
       abiDist = abi - mean(abi,
                             na.rm = TRUE))