ggplot-advanced.Rmd

output:
  html_document:
      toc: true
      toc_float:
        toc_collapsed: true
        smooth_scroll: true
      widescreen: true
      highlight: pygments
      theme: readable
      css: ./styles/style.css
      df_print: paged
      mathjax: default
      self_contained: false
      incremental: false #True  dann jedes Bullet einzeln
      collapse: true # means the text output will be merged into the R source code block
      includes:
        after_body: ./styles/footer.html
        before_body: ./styles/header.html
library("knitr")
library("rmarkdown")
library("tidyverse")
library("naniar")
library("UpSetR")
library("dotwhisker")

pss <- readRDS("../datasets/pss.rds")
uniMis <- readRDS("../datasets/uniMis.rds")

opts_chunk$set(#fig.path = 'pics/s6-', # path for calculated figures
               fig.align = 'center',  # alignment of figure (also possible right, left, default)
               fig.show = 'hold', # how to show figures: hold -> direct at the end of code chunk; animate: all plots in an animation
               fig.width = 6,   # figure width
               fig.height = 6,  # figure height
               echo = TRUE,     # Code is printed
               eval = FALSE,    # Code is NOT evaluated
               warning = FALSE, # warnings are NOT displayed
               message = FALSE, # messages are NOT displayed
               size = "tiny",  # latex-size of code chunks
               background = "#E7E7E7", # background color of code chunks
               comment = "", # no hashtags before output
               options(width = 80),
               results = "markdown",
               rows.print = 15
)

htmltools::tagList(
  xaringanExtra::use_clipboard(
    button_text = "<i class=\"fa fa-clipboard\"></i>",
    success_text = "<i class=\"fa fa-check\" style=\"color: #90BE6D\"></i>",
    error_text = "<i class=\"fa fa-times-circle\" style=\"color: #F94144\"></i>"
  ),
  rmarkdown::html_dependency_font_awesome()
)
scatter <- ggplot(pss,
                  aes(stfeco,
                      stfdem
                      )
                  ) +
  geom_jitter(alpha = .2,
              col = "blue"
              ) +
  scale_x_continuous(breaks = seq(0,
                                  10,
                                  1
                                  )
                     ) +
  scale_y_continuous(breaks = seq(0,
                                  10,
                                  1
                                  )
                     )

scatter
scatterLeg <- scatter +
  labs(x = "Satisfaction with Economy",
       y = "Satisfaction with Democracy",
       title = "Correlation Plot",
       caption = "Data: Panem Social Survey.\n Data jittered."
       )

scatterLeg
scatterLeg +
  theme(plot.title = element_text(size = 25,
                                  face = "italic",
                                  hjust = 0.5
                                  )
        )
scatterAxes <- scatterLeg +
  theme(plot.title = element_text(size = 25,
                                  face = "italic",
                                  hjust = 0.5
                                  ),
        axis.title.x = element_text(size = 16,
                                    color = "seagreen",
                                    hjust = 0
                                    ),
        axis.title.y = element_text(size = 8,
                                    color = rgb(0,
                                                105,
                                                179,
                                                maxColorValue = 255
                                                ),
                                    hjust = 1,
                                    face = "bold"
                                    )
        )

scatterAxes
scatterLeg +
  theme(plot.title = element_text(size = 25,
                                  face = "italic",
                                  hjust = 0.5
                                  ),
        axis.title.x = element_text(size = 16,
                                    color = "seagreen",
                                    hjust = 0
                                    ),
        axis.title.y = element_text(size = 8,
                                    color = "#0069B3",
                                    hjust = 1,
                                    face = "bold"
                                    )
        )
scatterTicks <- scatterAxes +
  theme(axis.text.x = element_text(size = 12,
                                    angle = 45,
                                    color = "darkgrey"
                                   ),
        axis.text.y = element_text(size = 11,
                                   hjust = 0,
                                   vjust = 1
                                   )
        )

scatterTicks
scatterGrid <- scatterTicks +
  theme(panel.grid = element_line(color = "green",
                                  size = 1,
                                  linetype = "solid" # blank, solid, dashed, dotted, dotdash, longdash, twodash
                                  )
        )

scatterGrid
scatterGrid <- scatterTicks +
  theme(panel.grid.major = element_line(color = "green",
                                        size = 1,
                                        linetype = "solid" # blank, solid, dashed, dotted, dotdash, longdash, twodash
                                        ),
        panel.grid.minor = element_blank()
        )

scatterGrid
scatterGrid +
  theme(plot.background = element_rect(color ="darkgray",
                                       size = 2,
                                       fill = "lightpink"
                                       ),
        panel.background = element_rect(fill = "moccasin"
                                        )
        )
scatter2 <- ggplot(pss[1:15,],
                  aes(stfeco,
                      stfdem
                      )
                  ) +
  geom_point(col = "blue") +
  scale_x_continuous(breaks = seq(0,
                                  10,
                                  1
                                  )
                     ) +
  scale_y_continuous(breaks = seq(0,
                                  10,
                                  1
                                  )
                     )

scatter2
scatter2 +
  geom_text(aes(label = idno))
scatter2 +
  geom_text(aes(label = idno),
            size = 2,
            nudge_y = -.15
            )
ggplot(pss,
       aes(stfeco,
           stfdem
           )
       ) +
  geom_point(alpha = .2,
             col = "blue"
             ) +
  scale_x_continuous(breaks = seq(0,
                                  10,
                                  1
                                  )
                     ) +
  scale_y_continuous(breaks = seq(0,
                                  10,
                                  1
                                  )
                     ) +
  geom_text(aes(label = idno),
            data = pss[1:10,]
            )
scatter +
    annotate("rect",
           xmin = 8.5, # this corresponds to the axis scale!
           xmax = 9.5,
           ymin = 8.5,
           ymax = 10.5,
           colour = "darkgreen",
           fill = "lightgreen"
           )
scatter +
  annotate("rect",
           xmin = 8.5, # this corresponds to the axis scale!
           xmax = 9.5,
           ymin = 8.5,
           ymax = 10.5,
           colour = "darkgreen",
           fill = "lightgreen",
           alpha = .1
           )
scatter +
  annotate("rect",
           xmin = 8.5,
           xmax = 9.5,
           ymin = 8.5,
           ymax = 10.5,
           colour = "darkgreen",
           fill = "lightgreen",
           alpha = .1
           ) +
  annotate("text",
           x = 1,
           y = 9,
           label = "highlighted area", # with \n you get a new line
           colour = "darkgreen"
           )
scatter +
  annotate("rect",
           xmin = 8.5,
           xmax = 9.5,
           ymin = 8.5,
           ymax = 10.5,
           colour = "darkgreen",
           fill = "lightgreen",
           alpha = .1
           ) +
  annotate("text",
           x = 1,
           y = 9,
           label = "highlighted area", # with \n you get a new line
           color = "darkgreen"
           ) +
  annotate("segment",
           x = 2,
           xend = 8.5,
           y = 9,
           yend = 9,
           color = "darkgreen",
           arrow = arrow()
           )
install.packages("UpSetR")
install.packages("naniar")

library("UpSetR")
library("naniar")
uniMis
missingValues <- uniMis %>%
  select(c(1:5)) %>%
  pivot_longer(everything(),
               names_to = "variable",
               values_to = "val"
               ) %>%
  mutate(is.missing = is.na(val)) %>%
  group_by(variable,
           is.missing
           ) %>%
  summarize(num.missing = n()
            ) %>%
  filter(is.missing == TRUE) %>%
  select(-is.missing) %>%
  arrange(desc(num.missing))

missingValues
missingValues %>%
  ggplot() +
  geom_bar(aes(variable,
               num.missing
               ),
           stat = 'identity'
           ) +
  labs(x = 'Variable',
       y = "Anzahl MV",
       title = 'Missing Values pro Variable'
       ) +
  theme(axis.text.x = element_text(angle = 45,
                                   hjust = 1
                                   )
        )
#Prozente
missingValues <- uniMis %>%
  select(c(1:4)) %>%
  pivot_longer(everything(),
               names_to = "key",
               values_to = "val"
               ) %>%
  mutate(isna = is.na(val)) %>%
  group_by(key) %>%
  mutate(total = n()) %>%
  group_by(key,
           total,
           isna
           ) %>%
  summarise(num.isna = n()) %>%
  mutate(pct = num.isna / total * 100)

levels <- (missingValues  %>%
             filter(isna == T) %>%
             arrange(desc(pct))
           )$key

percentage.plot <- missingValues %>%
  ggplot() +
  geom_bar(aes(x = reorder(key,
                           desc(pct)
                           ),
               y = pct,
               fill = isna
               ),
           stat = 'identity',
           alpha = 0.8) +
  scale_x_discrete(limits = levels) +
  scale_fill_manual(name = "",
                    values = c('steelblue',
                               'tomato3'
                               ),
                    labels = c("vorhanden",
                               "fehlend"
                               )
                    ) +
  coord_flip() +
  labs(title = "Prozent von missing values",
       x = 'Variable',
       y = "% missing values"
       )
percentage.plot
# pro Fall (wird aber bei großen Datensätzen etwas schwer zu lesen)
row.plot <- uniMis %>%
  select(c(1:4)) %>%
  pivot_longer(-c("ID"),
               names_to = "key",
               values_to = "val"
               ) %>%
  mutate(isna = is.na(val)) %>%
  ggplot(aes(key,
             ID,
             fill = isna)) +
  geom_raster(alpha = 0.8) +
  scale_fill_manual(name = "",
                    values = c("steelblue",
                               "tomato3"
                               ),
                    labels = c("vorhanden",
                               "fehlend"
                               )
                    ) +
  scale_x_discrete(limits = levels) +
  labs(x = "Variable",
       y = "Row Number",
       title = "Missing values in rows"
       ) +
  coord_flip()
row.plot
uniMis %>%
  miss_var_summary()
uniMis %>%
  group_by(city) %>%
  miss_var_summary()
gg_miss_var_cumsum(uniMis)
vis_miss(uniMis)
gg_miss_upset(uniMis)
ggplot(uniMis,
       aes(x = mot,
           y = abi
           )
       ) +
  geom_miss_point()
gg_miss_var(uniMis,
            facet = study
            )

gg_miss_fct(x = uniMis,
            fct = study
            )
gg_miss_fct(x = uniMis,
            fct = study
            ) +
  labs(title = "NA in Uni-df nach Studienfach")
model1 <- lm(trstprl ~ 1 + trstprt,
             pss
             )

model2 <- lm(trstprl ~ 1 + trstprt + agea + stfdem,
             pss
             )

model3 <- lm(trstprl ~ 1 + trstprt + agea + stfdem + district,
             pss
             )
ggplot(pss,
       aes(x = trstprt,
           y = trstprl
           )
       ) +
  geom_jitter(color = "darkblue") + # observations
  stat_smooth(method = "lm",       # regression line
              color = "tomato"
              ) +
  labs(title = "Regression trstprl on trstprt",  # titles
       x = "Trust in Parties",
       y = "Trust in Parliament"
       )
fakeDf <- expand.grid(list(trstprt = seq(0,
                                        10,
                                        1
                                        ),
                           agea = mean(pss$agea,
                                       na.rm = TRUE
                                       ),
                           stfdem = mean(pss$stfdem,
                                         na.rm = TRUE
                                         )
                           )
                      )

fakeDf
predFakeDf <- predict(model2,
                      newdata = fakeDf,  # der fiktive Datensatz
                      se = TRUE
                      )
fakeDf$pred    <- predFakeDf$fit
fakeDf$pred_se <- predFakeDf$se.fit
ggplot(fakeDf,
       aes(x = trstprt,
           y = pred
           )
       ) +
  geom_line(color = "darkgreen") +
  geom_line(data = fakeDf,
            aes(x = trstprt,
                y = pred - 1.96 * pred_se
                ),
            linetype = 3
            ) +
  geom_line(data = fakeDf,
            aes(x = trstprt,
                y = pred + 1.96 * pred_se
                ),
            linetype = 3
            ) +
  labs(title = "Linear relationship between trstprl and trstprt (others constant)",
       y = "Predicted value of Trust in Parliament",
       x = "Trust in Parties"
       )
fakeDfD1 <- expand.grid(list(trstprt = seq(0,
                                           10,
                                           1
                                           ),
                             agea = mean(pss$agea,
                                         na.rm = TRUE
                                         ),
                             stfdem = mean(pss$stfdem,
                                           na.rm = TRUE
                                           ),
                             district = "Distrikt 1"
                             )
                        )

fakeDfD2 <- expand.grid(list(trstprt = seq(0,
                                           10,
                                           1
                                           ),
                             agea = mean(pss$agea,
                                         na.rm = TRUE
                                         ),
                             stfdem = mean(pss$stfdem,
                                           na.rm = TRUE
                                           ),
                             district = "Distrikt 5"
                             )
                        )

fakeDfD3 <- expand.grid(list(trstprt = seq(0,
                                           10,
                                           1
                                           ),
                             agea = mean(pss$agea,
                                         na.rm = TRUE
                                         ),
                             stfdem = mean(pss$stfdem,
                                           na.rm = TRUE
                                           ),
                             district = "Distrikt 7"
                             )
                        )

fakeDfD4 <- expand.grid(list(trstprt = seq(0,
                                           10,
                                           1
                                           ),
                             agea = mean(pss$agea,
                                         na.rm = TRUE
                                         ),
                             stfdem = mean(pss$stfdem,
                                           na.rm = TRUE
                                           ),
                             district = "Distrikt 10"
                             )
                        )

fakeDfD5 <- expand.grid(list(trstprt = seq(0,
                                           10,
                                           1
                                           ),
                             agea = mean(pss$agea,
                                         na.rm = TRUE
                                         ),
                             stfdem = mean(pss$stfdem,
                                           na.rm = TRUE
                                           ),
                             district = "Distrikt 12"
                             )
                        )
fakeDfbyDistrict <- rbind(fakeDfD1,
                          fakeDfD2,
                          fakeDfD3,
                          fakeDfD4,
                          fakeDfD5
                          )

predFakeDfbyDistrict <- predict(model3,
                                newdata = fakeDfbyDistrict,  # der fiktive Datensatz
                                se = TRUE
                                )

fakeDfbyDistrict$pred    <- predFakeDfbyDistrict$fit
fakeDfbyDistrict$pred_se <- predFakeDfbyDistrict$se.fit
ggplot(fakeDfbyDistrict,
       aes(x = trstprt,
           y = pred,
           color = district,
           shape = district
           )
       ) +
  geom_line() +
  ylab("Predicted value of Trust in Parliament") +
  xlab("Trust in Parties") +
  labs(title = "Linear relationship between trstprl and trstprt (others constant)",
       color = "Distrikte"
       )
fakeDistrict12 <- fakeDfbyDistrict %>%
  filter(district == "Distrikt 12") %>%
  select(-district)

district12 <- pss %>%
  filter(district == "Distrikt 12") %>%
  select(-district)

ggplot(fakeDistrict12,
       aes(x = trstprt,
           y = pred
           )
       ) +
  geom_line(color = "tomato") +
  geom_point(data = district12,
             aes(y = trstprl),
             color = "tomato",
             position = "jitter",
             size = 0.7,
             alpha = 0.5
             ) +
  ylab("Predicted value of Trust in Parliament") +
  xlab("Trust in Parties") +
  labs(title = "Linear relationship between trstprl and trstprt (others constant)",
       lty = "Distrikte",
       caption = "Highlighted District 12."
       )
districts <- pss %>%
  filter(district != "Distrikt 12") %>%
  select(-district)

ggplot(fakeDistrict12,
       aes(x = trstprt,
           y = pred
           )
       ) +
  geom_line(color = "tomato") +
  geom_point(data = district12,
             aes(y = trstprl),
             color = "tomato",
             position = "jitter",
             size = 0.7,
             alpha = 0.85
             ) +
  geom_point(data = districts,
             aes(y = trstprl),
             color = "darkgray",
             position = "jitter",
             size = 0.7,
             alpha = 0.3
             ) +
  ylab("Predicted value of Trust in Parliament") +
  xlab("Trust in Parties") +
  labs(title = "Linear relationship between trstprl and trstprt (others constant)",
       lty = "Distrikte",
       caption = "Highlighted District 12."
       ) +
  scale_color_manual(values = c("darkgray",
                                "darkgray",
                                "darkgray",
                                "darkgray",
                                "tomato"
                                )
                     ) +
  guides(color = "none")
install.packages("dotwhisker")

library("dotwhisker")
dwplot(model3)

dwplot(model3) +
  geom_vline(xintercept = 0,
             linetype = "dashed"
             ) +
  scale_y_discrete(labels = rev(c("Trust Parties",
                                  "Age",
                                  "Satisfaction w/ Democracy",
                                  "District 5",
                                  "District 7",
                                  "District 10",
                                  "District 12"
                                  )
                                )
                   ) +
  scale_x_continuous(breaks = seq(-2,
                                  1,
                                  0.2
                                  )
                     ) +
  labs(title = "Lin. Regression on Trust in Parliament (ref: District 1)",
       caption = "Data: Panem Social Survey."
       )