Update documents

lhehnke · Jun 2, 2019 · 84d8b6a3e03537555888155d1d166dc505ab4f5d · 84d8b6a
1 parent 6954365
commit 84d8b6a3e03537555888155d1d166dc505ab4f5d
Unified Split

Showing with 25 additions and 25 deletions.

+25 −25 OpenScienceMOOC-follower-analysis.Rmd

BIN OpenScienceMOOC-follower-analysis.pdf
diff --git a/OpenScienceMOOC-follower-analysis.Rmd b/OpenScienceMOOC-follower-analysis.Rmd
@@ -38,7 +38,7 @@ As the old saying goes, every analysis needs its data. While I generally prefer
 And while `ggplot2` - especially when combined with `ggthemes` - comes with beautiful themes by default, I most often cannot resist to create a custom theme for each project. With the colour scheme below, I tried to match the visual appearance of Shirin's follower analysis. And yes, it would have been easier, albeit less fun, if I had followed her example and simply used `tidyquant`.
 ```{r knitr, include = FALSE}
-  
-knitr::opts_chunk$set(echo = TRUE, error = TRUE, warning = FALSE, message = FALSE, tidy = TRUE, fig.width = 9, fig.height = 6, dev = "png", (root.dir = paste0(rprojroot::find_rstudio_root_file(), "/Files")))
+  
+knitr::opts_chunk$set(echo = TRUE, error = TRUE, warning = FALSE, message = FALSE, tidy = TRUE, fig.width = 12, fig.height = 8, dev = "png", (root.dir = paste0(rprojroot::find_rstudio_root_file(), "/Files")))
 ```
 ```{r setup}
@@ -96,8 +96,8 @@ viz_theme <- theme(
  panel.grid.minor = element_blank(),
  plot.caption = element_text(colour = "#3e5871"),
  strip.background = element_rect(colour = "#2c3e50", fill = "white"),
-  
-  strip.text = element_text(size = rel(1)),
-  
-  text = element_text(colour = "#2c3e50"))
+  
+  strip.text = element_text(size = rel(1), face = "bold"),
+  
+  text = element_text(size = 14, colour = "#2c3e50", family = "Avenir"))
 ```
 ## Friend or follow?
@@ -117,9 +117,9 @@ relations_df %>%
  ggplot(mapping = aes(x = relation, fill = relation)) +
  scale_fill_manual(" ", values = c("#6c7a89", "#2C3E50", "#22313f")) +
  scale_x_discrete(labels = c("Friend & follower", "Follower", "Friend")) +
-  
-  viz_theme + ylab("") + xlab("") +
+  
+  viz_theme + ylab("Count") + xlab("") +
  geom_bar(alpha = 0.8, width = 0.5) + ylim(0, 4000) +
-  
-  ggtitle(label = "Count of Open Science MOOC Twitter followers & friends", subtitle = " ") +
+  
+  ggtitle(label = "Open Science MOOC Twitter followers & friends", subtitle = " ") +
  theme(legend.position = "none")
 ```
@@ -161,7 +161,7 @@ data_df %>%
  top_n(20, n) %>%
  ggplot(aes(city_clean, n)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  xlab("") + ylab("") + ggtitle("Top locations of Open Science Twitter MOOC-ers", subtitle = " ") +
+  
+  xlab("") + ylab("Count") + ggtitle("Top locations of Open Science Twitter MOOC-ers", subtitle = " ") +
  coord_flip() + viz_theme
 ```
@@ -214,11 +214,11 @@ ggplot() +
 In a slightly more advanced version of this map, I added further information on the follower count of the geocoded users, which results in a similar picture: The majority of the influential Open Science Twitter MOOC-ers are based in Western Europe and the United States. I will revisit this notion later on in the blog post when conducting a more fine-grained analysis of the most prominent followers.
 ```{r locations-static-map2, fig.align = "center", fig.width = 10}
-  
-# Plot followers' location relative to followers' followers count
+  
+# Plot followers' locations relative to followers' followers count
 ggplot() +
  geom_polygon(data = map_world, aes(x = long, y = lat, group = group), colour = "gray85", fill = "gray80") +
  geom_point(data = data_df, aes(x = lon, y = lat, size = followersCount), color = "#2c3e50", alpha = 0.5) +
-  
-  ggtitle(label = "Locations of Open Science Twitter MOOC-ers", subtitle = "Marker sizes relative to followers' followers counts (N = 2220)") +
+  
+  ggtitle(label = "Locations of Open Science Twitter MOOC-ers", subtitle = "Marker sizes relative to followers' follower counts (N = 2220)") +
  scale_size_continuous(range = c(1, 6), limits = c(0, 300000),
                        breaks = c(0, 1000, 10000, 100000, 200000, 300000),
                        labels = function(x) format(x, scientific = FALSE)) +
@@ -306,9 +306,9 @@ followers_gender %>%
  ggplot(mapping = aes(x = gender, fill = gender)) +
  scale_fill_manual(" ", values = c("#6c7a89", "#22313f")) +
  scale_x_discrete(labels = c("Female", "Male")) +
-  
-  viz_theme + ylab("") + xlab("") +
+  
+  viz_theme + ylab("Count") + xlab("") +
  geom_bar(alpha = 0.8, width = 0.5) +
-  
-  ggtitle(label = "Count of Open Science Twitter MOOC-ers by gender", subtitle = "Gender predicted with genderize.io (random sample of N = 1000 followers)") +
+  
+  ggtitle(label = "Open Science Twitter MOOC-ers by gender", subtitle = "Gender predicted with genderize.io (random sample of N = 1000 followers)") +
  theme(legend.position = "none")
 ```
@@ -340,9 +340,9 @@ data_df %>%
  ggplot(mapping = aes(x = category, fill = category)) +
  scale_fill_manual(" ", values = c("#C0392B", "#2C3E50", "#16A085", "#F1C40F")) +
  scale_x_discrete(labels = c("Influencer", "Personal account", "Verified account", "Verified influencer")) +
-  
-  viz_theme + ylab("") + xlab("") +
+  
+  viz_theme + ylab("Count") + xlab("") +
  geom_bar(alpha = 0.8, width = 0.5) +
-  
-  ggtitle(label = 'Number of Open Science Twitter MOOC-ers by account status', subtitle = "Influencer: at least 500 followers and at least thrice as many followers than friends \nVerified: official verification status") + ylim(0, 6000) +
+  
+  ggtitle(label = 'Open Science Twitter MOOC-ers by account status', subtitle = "Influencer = at least 500 followers and at least thrice as many followers than friends \nVerified = official verification status") + ylim(0, 6000) +
  theme(legend.position = "none")
 ```
@@ -357,7 +357,7 @@ data_df %>%
  mutate(screenName = reorder(screenName, followersCount)) %>%
  ggplot(aes(screenName, followersCount, label = followersCount)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  xlab("") + ylab("") + ggtitle("Most influential Open Science MOOC-ers", subtitle = "Followers count") +
+  
+  xlab("") + ylab("Followers count") + ggtitle("Most influential Open Science MOOC-ers", subtitle = " ") +
  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
  coord_flip() + viz_theme
 ```
@@ -380,7 +380,7 @@ data_df %>%
  mutate(screenName = reorder(screenName, statuses_day)) %>%
  ggplot(aes(screenName, statuses_day, label = statuses_day)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  xlab("") + ylab("") + ggtitle("Most active Open Science MOOC-ers", subtitle = "Average number of tweets per day") +
+  
+  xlab("") + ylab("Average number of tweets per day") + ggtitle("Most active Open Science MOOC-ers", subtitle = " ") +
  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) + ylim(0, 200) +
  coord_flip() + viz_theme
 ```
@@ -393,7 +393,7 @@ data_df %>%
    geom_smooth(method = "lm", color = "grey50", fill = "grey90", alpha = 0.8) +
    geom_point(alpha = 0.8) +
  scale_color_continuous("Number of days since \naccount was created", low = "#91aac3", high = "#2C3E50") +
-  
-    xlab("Number of followers (log2)") + ylab("Average number of tweets per day (log2)") + ggtitle("Correlation between the Open Science MOOC-ers followers count and tweets per day", subtitle = " ") +
+  
+    xlab("Number of followers (log2)") + ylab("Average number of tweets per day (log2)") + ggtitle("Correlation between followers count and tweets per day", subtitle = " ") +
    viz_theme
 ```
@@ -435,8 +435,8 @@ desc_tidy %>%
  top_n(20, n) %>%
  ggplot(aes(x = reorder(word_stem, n), y = n)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  ylab("") + xlab("") +
-  
-  ggtitle(label = "Count of words in the Open Science MOOC-ers profile descriptions", subtitle = " ") + ylim(0, 1250) +
+  
+  ylab("Count") + xlab("") +
+  
+  ggtitle(label = "Most common words in Open Science MOOC-ers' profiles", subtitle = " ") + ylim(0, 1250) +
  coord_flip() + viz_theme
 ```
@@ -463,7 +463,7 @@ desc_tidy %>%
  top_n(20, n) %>%
  ggplot(aes(word, n)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  xlab("") + ylab("") + ggtitle("Most frequently used hashtags in the Open Science MOOC-ers profile descriptions", subtitle = " ") +
+  
+  xlab("") + ylab("Count") + ggtitle("Most frequently used hashtags in Open Science MOOC-ers' profiles", subtitle = " ") +
  coord_flip() + viz_theme + ylim(0, 250)
 ```
@@ -482,7 +482,7 @@ desc_tidy %>%
  top_n(20, n) %>%
  ggplot(aes(word, n)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  xlab("") + ylab("") + ggtitle("Most frequently used mentions in the Open Science MOOC-ers profile descriptions", subtitle = " ") + ylim(0, 15) +
+  
+  xlab("") + ylab("Count") + ggtitle("Most frequent mentions in Open Science MOOC-ers' profiles", subtitle = " ") + ylim(0, 15) +
  coord_flip() + viz_theme
 ```
@@ -512,7 +512,7 @@ bigrams_tidy %>%
  top_n(20, n) %>%
  ggplot(aes(x = reorder(bigram, n), y = n)) +
  geom_bar(stat = "identity", width = 0.5, alpha = 0.8, color = "#2c3e50", fill = "#2c3e50") +
-  
-  xlab("") + ylab("") + ggtitle("Most common bigrams in the Open Science MOOC-ers profile descriptions", subtitle = " ") + ylim(0, 200) +
+  
+  xlab("") + ylab("Count") + ggtitle("Most common bigrams in Open Science MOOC-ers' profiles", subtitle = " ") + ylim(0, 200) +
  coord_flip() + viz_theme
 ```
@@ -535,7 +535,7 @@ ggraph(bigram_graph, layout = "nicely") +
  geom_node_label(aes(label = name), vjust = 1, hjust = 0.5, label.size = 0.05,
                  label.padding = unit(0.15, "lines"), label.r = 0.05, fill = "#ffffff66",
                  color = "#2c3e50", repel = TRUE) +
-  
-  ggtitle("Bigram network of the Open Science MOOC-ers profile descriptions", subtitle = "Edge width relative to number of times each bigram occurs") +
+  
+  ggtitle("Bigram network of Open Science MOOC-ers' profile texts", subtitle = "Edge width relative to number of times each bigram occurs") +
  viz_theme + theme(axis.line = element_blank(),
                    axis.text = element_blank(),
                    axis.title = element_blank(),
@@ -559,7 +559,7 @@ desc_tidy %>%
  geom_bar(aes(fill = sentiment), stat = "identity", alpha = 0.8, width = 0.5) +
  scale_fill_manual(values = c("#620000","#006262")) +
  scale_x_discrete(labels = c("Negative", "Positive")) +
-  
-  xlab("") + ylab("") + ggtitle("Count of positive and negative sentiments in the Open Science MOOC-ers profile descriptions", subtitle = "Sentiment lexicon by Bing Liu and collaborators") + ylim(0, 2000) +
+  
+  xlab("") + ylab("Count") + ggtitle("Positive and negative sentiments in Open Science MOOC-ers' profiles", subtitle = "Sentiment lexicon by Bing Liu and collaborators") + ylim(0, 2000) +
  theme(legend.position = "none") + viz_theme
 ```
@@ -575,7 +575,7 @@ desc_tidy %>%
  mutate(difference = positive - negative) %>%
  ggplot(aes(x = difference)) +
  geom_density(color = "#2c3e50", fill = "#2c3e50", alpha = 0.8) +
-  
-  xlab("Sentiment") + ylab("Density") + ggtitle("Distribution of sentiments in the Open Science MOOC-ers profile descriptions", subtitle = "Sentiment lexicon by Bing Liu and collaborators") + ylim(0, 1.6) +
+  
+  xlab("Sentiment") + ylab("Density") + ggtitle("Distribution of sentiments in Open Science MOOC-ers' profiles", subtitle = "Sentiment lexicon by Bing Liu and collaborators") + ylim(0, 1.6) +
  viz_theme
 ```
@@ -596,7 +596,7 @@ desc_tidy %>%
  scale_fill_manual(name = "Sentiment",
                    labels = c("Negative", "Positive"),
                    values = c("negative" = "#620000", "positive" = "#006262")) +
-  
-  xlab("") + ylab("") + ggtitle("Most common positive and negative words in the Open Science MOOC-ers profile descriptions", subtitle = "Sentiment lexicon by Bing Liu and collaborators") +
+  
+  xlab("") + ylab("") + ggtitle("Most common positive and negative words in Open Science MOOC-ers' profiles", subtitle = "Sentiment lexicon by Bing Liu and collaborators") +
  viz_theme + coord_flip() + ylim(0, 150)
 ```
@@ -609,7 +609,7 @@ desc_tidy %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("#620000", "#006262"),
-  
-                   max.words = 100)
+  
+                   max.words = 200)
 ```
 ## Conclusion

diff --git a/OpenScienceMOOC-follower-analysis.pdf b/OpenScienceMOOC-follower-analysis.pdf