Dividing the data to 10 selected series
To make visualizations regarding character personalities, as there are a total of 800 characters, it would be difficult to compare their personality spectrums. Hence, the 10 renowned fictional series of the following will be selected and dataset will be prepared in order to potentially make separate visualizations, and these 10 series would be combined when possible for an aggregate visualization.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
setwd("~/COMM2501 Portfolio - z5218332")
#Extracting full data
data_full <- read.csv("~/COMM2501 Portfolio - z5218332/files/raw_data_cleaned.csv")
#Separating individual data from 10 selected series and all 10 series combined
data_ala <- data_full %>% filter(fictional_work=="Avatar: The Last Airbender")
data_got <- data_full %>% filter(fictional_work=="Game of Thrones")
data_hp <- data_full %>% filter(fictional_work=="Harry Potter")
data_lotr <- data_full %>% filter(fictional_work=="Lord of the Rings")
data_sl <- data_full %>% filter(fictional_work=="Sherlock")
data_sw <- data_full %>% filter(fictional_work=="Star Wars")
data_mcu <- data_full %>% filter(fictional_work=="Marvel Cinematic Universe")
data_gp <- data_full %>% filter(fictional_work=="The Good Place")
data_lk <- data_full %>% filter(fictional_work=="The Lion King")
data_wd <- data_full %>% filter(fictional_work=="The Walking Dead")
data_10series <- rbind(data_ala, data_got, data_hp, data_lotr, data_sl,
data_sw, data_mcu, data_gp, data_lk, data_wd)
Index dataframes construction
For an code index on the characters and spectrums, index dataframes are constructed for simple reference.
#Creating index tables for information
character_index <- data_full %>% select(character_code, fictional_work,
character_name, gender) %>%
distinct() %>% arrange(character_code)
spectrum_index <- data_full %>% select(spectrum, spectrum_positive, spectrum_negative) %>%
distinct() %>% arrange(spectrum)
spectrum_index <- data_full %>% select(spectrum, spectrum_positive, spectrum_negative) %>%
distinct() %>% arrange(spectrum)
head(character_index)
## character_code fictional_work character_name gender
## 1 A/01 Alien Dallas Male
## 2 A/02 Alien Ellen Ripley Female
## 3 A/03 Alien Lambert Female
## 4 A/04 Alien Ash Male
## 5 A/05 Alien the Alien Male
## 6 A/06 Alien Parker Male
head(spectrum_index)
## spectrum spectrum_positive spectrum_negative
## 1 BAP111 high-tech low-tech
## 2 BAP114 arcane mainstream
## 3 BAP115 outlaw sheriff
## 4 BAP12 artistic scientific
## 5 BAP121 sarcastic genuine
## 6 BAP133 adventurous stick-in-the-mud
Personality spectrum matrices construction
Consequently, the spectrum and mean columns of each characters would be distributed across rows and columns for conciseness of dataset and for correlation calculation.
#Characters spectrum and mean matrix
char_spec_mean_full <- data_full %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_full <- char_spec_mean_full[,-1]
rownames(mean_matrix_full) <- char_spec_mean_full[,1]
char_spec_mean_ala <- data_ala %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_ala <- char_spec_mean_ala[,-1]
rownames(mean_matrix_ala) <- char_spec_mean_ala[,1]
char_spec_mean_got <- data_got %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_got <- char_spec_mean_got[,-1]
rownames(mean_matrix_got) <- char_spec_mean_got[,1]
char_spec_mean_hp <- data_hp %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_hp <- char_spec_mean_hp[,-1]
rownames(mean_matrix_hp) <- char_spec_mean_hp[,1]
char_spec_mean_lotr <- data_lotr %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_lotr <- char_spec_mean_lotr[,-1]
rownames(mean_matrix_lotr) <- char_spec_mean_lotr[,1]
char_spec_mean_sl <- data_sl %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_sl <- char_spec_mean_sl[,-1]
rownames(mean_matrix_sl) <- char_spec_mean_sl[,1]
char_spec_mean_sw <- data_sw %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_sw <- char_spec_mean_sw[,-1]
rownames(mean_matrix_sw) <- char_spec_mean_sw[,1]
char_spec_mean_mcu <- data_mcu %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_mcu <- char_spec_mean_mcu[,-1]
rownames(mean_matrix_mcu) <- char_spec_mean_mcu[,1]
char_spec_mean_gp <- data_gp %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_gp <- char_spec_mean_gp[,-1]
rownames(mean_matrix_gp) <- char_spec_mean_gp[,1]
char_spec_mean_lk <- data_lk %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_lk <- char_spec_mean_lk[,-1]
rownames(mean_matrix_lk) <- char_spec_mean_lk[,1]
char_spec_mean_wd <- data_wd %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_wd <- char_spec_mean_wd[,-1]
rownames(mean_matrix_wd) <- char_spec_mean_wd[,1]
char_spec_mean_10series <- data_10series %>% select(character_code, spectrum, mean) %>%
spread(spectrum, mean)
mean_matrix_10series <- char_spec_mean_10series[,-1]
rownames(mean_matrix_10series) <- char_spec_mean_10series[,1]
head(mean_matrix_10series)
## BAP111 BAP114 BAP115 BAP12 BAP121 BAP133 BAP134 BAP139 BAP143 BAP15
## ALA/01 -24.1 23.0 -7.6 32.2 -36.3 43.6 -16.2 0.5 -16.1 -18.3
## ALA/02 -4.2 21.3 22.7 -5.9 -1.6 -0.3 -23.0 16.8 26.9 -15.5
## ALA/03 -12.9 7.4 -21.3 10.7 -31.5 0.4 -11.2 19.5 -1.4 31.9
## ALA/04 -18.6 27.4 26.0 -5.4 30.6 31.1 -41.5 37.3 36.5 -19.9
## ALA/05 2.0 -5.0 -4.8 -4.2 22.6 23.8 -21.7 -2.1 26.7 -21.8
## ALA/06 22.0 17.5 -6.0 -22.2 36.6 7.0 -3.4 43.6 39.2 -17.1
## BAP166 BAP179 BAP180 BAP186 BAP20 BAP206 BAP212 BAP223 BAP229 BAP231
## ALA/01 17.7 -40.4 -35.9 19.1 40.7 -20.8 38.8 -38.8 -19.1 28.4
## ALA/02 23.5 4.6 -7.1 -0.2 -22.3 -2.4 -32.2 27.9 11.0 28.3
## ALA/03 -9.6 -42.1 -34.4 -32.1 25.5 -34.7 20.0 -22.2 -7.1 26.5
## ALA/04 32.5 -2.0 -17.1 7.1 -25.6 -4.4 -22.8 -2.5 -9.4 23.5
## ALA/05 23.3 -25.8 -22.9 18.6 -25.8 2.3 35.8 -20.0 -23.9 7.0
## ALA/06 37.8 43.3 14.4 2.9 -32.4 34.9 -19.0 34.1 25.7 -10.4
## BAP267 BAP28 BAP29 BAP35 BAP37 BAP4 BAP44 BAP5 BAP57 BAP66 BAP77 BAP79
## ALA/01 -19.5 45.8 36.2 26.6 -6.2 2.1 -1.1 3.0 -19.5 19.1 18.6 -38.8
## ALA/02 -22.3 17.5 -7.2 28.2 -13.6 31.0 -18.8 -14.1 26.8 4.7 -17.9 0.1
## ALA/03 -37.8 44.3 11.6 14.1 -29.7 -32.3 -24.5 17.7 24.8 24.7 20.6 -34.0
## ALA/04 0.4 36.4 26.5 -11.3 -32.3 15.5 -3.5 -9.0 37.1 14.9 -28.6 0.6
## ALA/05 -0.5 43.5 25.1 5.8 1.8 25.0 21.3 -6.0 11.7 -1.4 -11.6 -10.6
## ALA/06 -10.6 -11.5 7.4 -2.1 -35.0 -1.6 12.4 14.7 46.5 -34.1 -32.6 41.9
## BAP8 BAP81 BAP87 BAP88
## ALA/01 -33.1 38.2 10.9 8.8
## ALA/02 30.3 -1.4 -19.6 -10.7
## ALA/03 15.3 33.8 16.7 -23.0
## ALA/04 12.1 -11.4 7.2 18.2
## ALA/05 -15.1 12.4 29.0 3.4
## ALA/06 42.7 -43.6 -35.9 -33.8
Personality correlation dataframe construction
Consequently, correlations are calculated from the resulted matrices.
#Construct correlation matrices of characters in each series
char_cor_ala <- cor(t(mean_matrix_ala))
char_cor_got <- cor(t(mean_matrix_got))
char_cor_hp <- cor(t(mean_matrix_hp))
char_cor_lotr <- cor(t(mean_matrix_lotr))
char_cor_sl <- cor(t(mean_matrix_sl))
char_cor_sw <- cor(t(mean_matrix_sw))
char_cor_mcu <- cor(t(mean_matrix_mcu))
char_cor_gp <- cor(t(mean_matrix_gp))
char_cor_lk <- cor(t(mean_matrix_lk))
char_cor_wd <- cor(t(mean_matrix_wd))
char_cor_10series <- cor(t(mean_matrix_10series))
#Saving dataframes to project directory
Lastly, as these dataframes would be used repeatedly as a base for visualizations, they are then saved inside the project directory.
#Save data frames for later use
save(character_index, file="~/COMM2501 Portfolio - z5218332/files/character_index.Rda")
save(spectrum_index, file="~/COMM2501 Portfolio - z5218332/files/spectrum_index.Rda")
save(data_full, file="~/COMM2501 Portfolio - z5218332/files/data_full.Rda")
save(data_ala, file="~/COMM2501 Portfolio - z5218332/files/data_ala.Rda")
save(data_got, file="~/COMM2501 Portfolio - z5218332/files/data_got.Rda")
save(data_hp, file="~/COMM2501 Portfolio - z5218332/files/data_hp.Rda")
save(data_lotr, file="~/COMM2501 Portfolio - z5218332/files/data_lotr.Rda")
save(data_sl, file="~/COMM2501 Portfolio - z5218332/files/data_sl.Rda")
save(data_sw, file="~/COMM2501 Portfolio - z5218332/files/data_sw.Rda")
save(data_mcu, file="~/COMM2501 Portfolio - z5218332/files/data_mcu.Rda")
save(data_gp, file="~/COMM2501 Portfolio - z5218332/files/data_gp.Rda")
save(data_lk, file="~/COMM2501 Portfolio - z5218332/files/data_lk.Rda")
save(data_wd, file="~/COMM2501 Portfolio - z5218332/files/data_wd.Rda")
save(data_10series, file="~/COMM2501 Portfolio - z5218332/files/data_10series.Rda")
save(mean_matrix_full, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_full.Rda")
save(mean_matrix_ala, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_ala.Rda")
save(mean_matrix_got, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_got.Rda")
save(mean_matrix_hp, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_hp.Rda")
save(mean_matrix_lotr, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_lotr.Rda")
save(mean_matrix_sl, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_sl.Rda")
save(mean_matrix_sw, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_sw.Rda")
save(mean_matrix_mcu, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_mcu.Rda")
save(mean_matrix_gp, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_gp.Rda")
save(mean_matrix_lk, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_lk.Rda")
save(mean_matrix_wd, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_wd.Rda")
save(mean_matrix_10series, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_10series.Rda")
save(char_spec_mean_full, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_full.Rda")
save(char_spec_mean_ala, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_ala.Rda")
save(char_spec_mean_got, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_got.Rda")
save(char_spec_mean_hp, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_hp.Rda")
save(char_spec_mean_lotr, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_lotr.Rda")
save(char_spec_mean_sl, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_sl.Rda")
save(char_spec_mean_sw, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_sw.Rda")
save(char_spec_mean_mcu, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_mcu.Rda")
save(char_spec_mean_gp, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_gp.Rda")
save(char_spec_mean_lk, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_lk.Rda")
save(char_spec_mean_wd, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_wd.Rda")
save(char_spec_mean_10series, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_10series.Rda")
save(char_cor_ala, file="~/COMM2501 Portfolio - z5218332/files/char_cor_ala.Rda")
save(char_cor_got, file="~/COMM2501 Portfolio - z5218332/files/char_cor_got.Rda")
save(char_cor_hp, file="~/COMM2501 Portfolio - z5218332/files/char_cor_hp.Rda")
save(char_cor_lotr, file="~/COMM2501 Portfolio - z5218332/files/char_cor_lotr.Rda")
save(char_cor_sl, file="~/COMM2501 Portfolio - z5218332/files/char_cor_sl.Rda")
save(char_cor_sw, file="~/COMM2501 Portfolio - z5218332/files/char_cor_sw.Rda")
save(char_cor_mcu, file="~/COMM2501 Portfolio - z5218332/files/char_cor_mcu.Rda")
save(char_cor_gp, file="~/COMM2501 Portfolio - z5218332/files/char_cor_gp.Rda")
save(char_cor_lk, file="~/COMM2501 Portfolio - z5218332/files/char_cor_lk.Rda")
save(char_cor_wd, file="~/COMM2501 Portfolio - z5218332/files/char_cor_wd.Rda")
save(char_cor_10series, file="~/COMM2501 Portfolio - z5218332/files/char_cor_10series.Rda")
These dataframes are then ready to be used for the following DVs I will be making!