Dividing the data to 10 selected series

To make visualizations regarding character personalities, as there are a total of 800 characters, it would be difficult to compare their personality spectrums. Hence, the 10 renowned fictional series of the following will be selected and dataset will be prepared in order to potentially make separate visualizations, and these 10 series would be combined when possible for an aggregate visualization.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
setwd("~/COMM2501 Portfolio - z5218332")

#Extracting full data
data_full <- read.csv("~/COMM2501 Portfolio - z5218332/files/raw_data_cleaned.csv")

#Separating individual data from 10 selected series and all 10 series combined
data_ala <- data_full %>% filter(fictional_work=="Avatar: The Last Airbender")
data_got <- data_full %>% filter(fictional_work=="Game of Thrones")
data_hp <- data_full %>% filter(fictional_work=="Harry Potter")
data_lotr <- data_full %>% filter(fictional_work=="Lord of the Rings")
data_sl <- data_full %>% filter(fictional_work=="Sherlock")
data_sw <- data_full %>% filter(fictional_work=="Star Wars")
data_mcu <- data_full %>% filter(fictional_work=="Marvel Cinematic Universe")
data_gp <- data_full %>% filter(fictional_work=="The Good Place")
data_lk <- data_full %>% filter(fictional_work=="The Lion King")
data_wd <- data_full %>% filter(fictional_work=="The Walking Dead")
data_10series <- rbind(data_ala, data_got, data_hp, data_lotr, data_sl,
                       data_sw, data_mcu, data_gp, data_lk, data_wd)

Index dataframes construction

For an code index on the characters and spectrums, index dataframes are constructed for simple reference.

#Creating index tables for information
character_index <- data_full %>% select(character_code, fictional_work,
                                        character_name, gender) %>% 
  distinct() %>% arrange(character_code)
spectrum_index <- data_full %>% select(spectrum, spectrum_positive, spectrum_negative) %>%
  distinct() %>% arrange(spectrum)

spectrum_index <- data_full %>% select(spectrum, spectrum_positive, spectrum_negative) %>%
  distinct() %>% arrange(spectrum)

head(character_index)
##   character_code fictional_work character_name gender
## 1           A/01          Alien         Dallas   Male
## 2           A/02          Alien   Ellen Ripley Female
## 3           A/03          Alien        Lambert Female
## 4           A/04          Alien            Ash   Male
## 5           A/05          Alien      the Alien   Male
## 6           A/06          Alien         Parker   Male
head(spectrum_index)
##   spectrum spectrum_positive spectrum_negative
## 1   BAP111         high-tech          low-tech
## 2   BAP114            arcane        mainstream
## 3   BAP115            outlaw           sheriff
## 4    BAP12          artistic        scientific
## 5   BAP121         sarcastic           genuine
## 6   BAP133       adventurous  stick-in-the-mud

Personality spectrum matrices construction

Consequently, the spectrum and mean columns of each characters would be distributed across rows and columns for conciseness of dataset and for correlation calculation.

#Characters spectrum and mean matrix
char_spec_mean_full <- data_full %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_full <- char_spec_mean_full[,-1]
rownames(mean_matrix_full) <- char_spec_mean_full[,1]

char_spec_mean_ala <- data_ala %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_ala <- char_spec_mean_ala[,-1]
rownames(mean_matrix_ala) <- char_spec_mean_ala[,1]

char_spec_mean_got <- data_got %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_got <- char_spec_mean_got[,-1]
rownames(mean_matrix_got) <- char_spec_mean_got[,1]

char_spec_mean_hp <- data_hp %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_hp <- char_spec_mean_hp[,-1]
rownames(mean_matrix_hp) <- char_spec_mean_hp[,1]

char_spec_mean_lotr <- data_lotr %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_lotr <- char_spec_mean_lotr[,-1]
rownames(mean_matrix_lotr) <- char_spec_mean_lotr[,1]

char_spec_mean_sl <- data_sl %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_sl <- char_spec_mean_sl[,-1]
rownames(mean_matrix_sl) <- char_spec_mean_sl[,1]

char_spec_mean_sw <- data_sw %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_sw <- char_spec_mean_sw[,-1]
rownames(mean_matrix_sw) <- char_spec_mean_sw[,1]

char_spec_mean_mcu <- data_mcu %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_mcu <- char_spec_mean_mcu[,-1]
rownames(mean_matrix_mcu) <- char_spec_mean_mcu[,1]

char_spec_mean_gp <- data_gp %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_gp <- char_spec_mean_gp[,-1]
rownames(mean_matrix_gp) <- char_spec_mean_gp[,1]

char_spec_mean_lk <- data_lk %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_lk <- char_spec_mean_lk[,-1]
rownames(mean_matrix_lk) <- char_spec_mean_lk[,1]

char_spec_mean_wd <- data_wd %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_wd <- char_spec_mean_wd[,-1]
rownames(mean_matrix_wd) <- char_spec_mean_wd[,1]

char_spec_mean_10series <- data_10series %>% select(character_code, spectrum, mean) %>% 
  spread(spectrum, mean)
mean_matrix_10series <- char_spec_mean_10series[,-1]
rownames(mean_matrix_10series) <- char_spec_mean_10series[,1]


head(mean_matrix_10series)
##        BAP111 BAP114 BAP115 BAP12 BAP121 BAP133 BAP134 BAP139 BAP143 BAP15
## ALA/01  -24.1   23.0   -7.6  32.2  -36.3   43.6  -16.2    0.5  -16.1 -18.3
## ALA/02   -4.2   21.3   22.7  -5.9   -1.6   -0.3  -23.0   16.8   26.9 -15.5
## ALA/03  -12.9    7.4  -21.3  10.7  -31.5    0.4  -11.2   19.5   -1.4  31.9
## ALA/04  -18.6   27.4   26.0  -5.4   30.6   31.1  -41.5   37.3   36.5 -19.9
## ALA/05    2.0   -5.0   -4.8  -4.2   22.6   23.8  -21.7   -2.1   26.7 -21.8
## ALA/06   22.0   17.5   -6.0 -22.2   36.6    7.0   -3.4   43.6   39.2 -17.1
##        BAP166 BAP179 BAP180 BAP186 BAP20 BAP206 BAP212 BAP223 BAP229 BAP231
## ALA/01   17.7  -40.4  -35.9   19.1  40.7  -20.8   38.8  -38.8  -19.1   28.4
## ALA/02   23.5    4.6   -7.1   -0.2 -22.3   -2.4  -32.2   27.9   11.0   28.3
## ALA/03   -9.6  -42.1  -34.4  -32.1  25.5  -34.7   20.0  -22.2   -7.1   26.5
## ALA/04   32.5   -2.0  -17.1    7.1 -25.6   -4.4  -22.8   -2.5   -9.4   23.5
## ALA/05   23.3  -25.8  -22.9   18.6 -25.8    2.3   35.8  -20.0  -23.9    7.0
## ALA/06   37.8   43.3   14.4    2.9 -32.4   34.9  -19.0   34.1   25.7  -10.4
##        BAP267 BAP28 BAP29 BAP35 BAP37  BAP4 BAP44  BAP5 BAP57 BAP66 BAP77 BAP79
## ALA/01  -19.5  45.8  36.2  26.6  -6.2   2.1  -1.1   3.0 -19.5  19.1  18.6 -38.8
## ALA/02  -22.3  17.5  -7.2  28.2 -13.6  31.0 -18.8 -14.1  26.8   4.7 -17.9   0.1
## ALA/03  -37.8  44.3  11.6  14.1 -29.7 -32.3 -24.5  17.7  24.8  24.7  20.6 -34.0
## ALA/04    0.4  36.4  26.5 -11.3 -32.3  15.5  -3.5  -9.0  37.1  14.9 -28.6   0.6
## ALA/05   -0.5  43.5  25.1   5.8   1.8  25.0  21.3  -6.0  11.7  -1.4 -11.6 -10.6
## ALA/06  -10.6 -11.5   7.4  -2.1 -35.0  -1.6  12.4  14.7  46.5 -34.1 -32.6  41.9
##         BAP8 BAP81 BAP87 BAP88
## ALA/01 -33.1  38.2  10.9   8.8
## ALA/02  30.3  -1.4 -19.6 -10.7
## ALA/03  15.3  33.8  16.7 -23.0
## ALA/04  12.1 -11.4   7.2  18.2
## ALA/05 -15.1  12.4  29.0   3.4
## ALA/06  42.7 -43.6 -35.9 -33.8

Personality correlation dataframe construction

Consequently, correlations are calculated from the resulted matrices.

#Construct correlation matrices of characters in each series
char_cor_ala <- cor(t(mean_matrix_ala))
char_cor_got <- cor(t(mean_matrix_got))
char_cor_hp <- cor(t(mean_matrix_hp))
char_cor_lotr <- cor(t(mean_matrix_lotr))
char_cor_sl <- cor(t(mean_matrix_sl))
char_cor_sw <- cor(t(mean_matrix_sw))
char_cor_mcu <- cor(t(mean_matrix_mcu))
char_cor_gp <- cor(t(mean_matrix_gp))
char_cor_lk <- cor(t(mean_matrix_lk))
char_cor_wd <- cor(t(mean_matrix_wd))
char_cor_10series <- cor(t(mean_matrix_10series))

#Saving dataframes to project directory

Lastly, as these dataframes would be used repeatedly as a base for visualizations, they are then saved inside the project directory.

#Save data frames for later use
save(character_index, file="~/COMM2501 Portfolio - z5218332/files/character_index.Rda")
save(spectrum_index, file="~/COMM2501 Portfolio - z5218332/files/spectrum_index.Rda")

save(data_full, file="~/COMM2501 Portfolio - z5218332/files/data_full.Rda")
save(data_ala, file="~/COMM2501 Portfolio - z5218332/files/data_ala.Rda")
save(data_got, file="~/COMM2501 Portfolio - z5218332/files/data_got.Rda")
save(data_hp, file="~/COMM2501 Portfolio - z5218332/files/data_hp.Rda")
save(data_lotr, file="~/COMM2501 Portfolio - z5218332/files/data_lotr.Rda")
save(data_sl, file="~/COMM2501 Portfolio - z5218332/files/data_sl.Rda")
save(data_sw, file="~/COMM2501 Portfolio - z5218332/files/data_sw.Rda")
save(data_mcu, file="~/COMM2501 Portfolio - z5218332/files/data_mcu.Rda")
save(data_gp, file="~/COMM2501 Portfolio - z5218332/files/data_gp.Rda")
save(data_lk, file="~/COMM2501 Portfolio - z5218332/files/data_lk.Rda")
save(data_wd, file="~/COMM2501 Portfolio - z5218332/files/data_wd.Rda")
save(data_10series, file="~/COMM2501 Portfolio - z5218332/files/data_10series.Rda")

save(mean_matrix_full, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_full.Rda")
save(mean_matrix_ala, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_ala.Rda")
save(mean_matrix_got, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_got.Rda")
save(mean_matrix_hp, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_hp.Rda")
save(mean_matrix_lotr, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_lotr.Rda")
save(mean_matrix_sl, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_sl.Rda")
save(mean_matrix_sw, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_sw.Rda")
save(mean_matrix_mcu, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_mcu.Rda")
save(mean_matrix_gp, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_gp.Rda")
save(mean_matrix_lk, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_lk.Rda")
save(mean_matrix_wd, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_wd.Rda")
save(mean_matrix_10series, file="~/COMM2501 Portfolio - z5218332/files/mean_matrix_10series.Rda")

save(char_spec_mean_full, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_full.Rda")
save(char_spec_mean_ala, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_ala.Rda")
save(char_spec_mean_got, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_got.Rda")
save(char_spec_mean_hp, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_hp.Rda")
save(char_spec_mean_lotr, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_lotr.Rda")
save(char_spec_mean_sl, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_sl.Rda")
save(char_spec_mean_sw, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_sw.Rda")
save(char_spec_mean_mcu, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_mcu.Rda")
save(char_spec_mean_gp, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_gp.Rda")
save(char_spec_mean_lk, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_lk.Rda")
save(char_spec_mean_wd, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_wd.Rda")
save(char_spec_mean_10series, file="~/COMM2501 Portfolio - z5218332/files/char_spec_mean_10series.Rda")

save(char_cor_ala, file="~/COMM2501 Portfolio - z5218332/files/char_cor_ala.Rda")
save(char_cor_got, file="~/COMM2501 Portfolio - z5218332/files/char_cor_got.Rda")
save(char_cor_hp, file="~/COMM2501 Portfolio - z5218332/files/char_cor_hp.Rda")
save(char_cor_lotr, file="~/COMM2501 Portfolio - z5218332/files/char_cor_lotr.Rda")
save(char_cor_sl, file="~/COMM2501 Portfolio - z5218332/files/char_cor_sl.Rda")
save(char_cor_sw, file="~/COMM2501 Portfolio - z5218332/files/char_cor_sw.Rda")
save(char_cor_mcu, file="~/COMM2501 Portfolio - z5218332/files/char_cor_mcu.Rda")
save(char_cor_gp, file="~/COMM2501 Portfolio - z5218332/files/char_cor_gp.Rda")
save(char_cor_lk, file="~/COMM2501 Portfolio - z5218332/files/char_cor_lk.Rda")
save(char_cor_wd, file="~/COMM2501 Portfolio - z5218332/files/char_cor_wd.Rda")
save(char_cor_10series, file="~/COMM2501 Portfolio - z5218332/files/char_cor_10series.Rda")

These dataframes are then ready to be used for the following DVs I will be making!