generated from jtr13/EDAVtemplate
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path03-cleaning.Rmd
189 lines (155 loc) · 8.01 KB
/
03-cleaning.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# Data transformation
```{r}
library(ggplot2)
library(raster)
library(ncdf4)
library(dplyr)
library(latex2exp)
library(tidyverse)
library(zoo)
```
## Data for patterns of climate change
For the data that illustrate the patterns of climate change, we mainly preprocess the data by aggregate the data to month and up to year level so that we can have a clearer view of the pattern over the past two centuries. For the [HadCRUT](https://crudata.uea.ac.uk/cru/data/temperature) data, We mask out the months with zero coverage to keep the format consistent. We also filter out the data at year level, as we only use this dataset to investigate climate pattern at monthly level. We aggregate the [GISTEMP v4](https://data.giss.nasa.gov/gistemp/) data to yearly level to support analysis on the **cause** part. Finally, we preprocess the [NACR](https://climatedataguide.ucar.edu/climate-data/terraclimate-global-high-resolution-gridded-temperature-precipitation-and-other-water) data to the **Raster** or "gridded" data formatted in pixels to represent an area on the Earth's surface.
```{r}
temp <- read.table("data/raw/GLB.Ts+dSST.csv", header = TRUE, skip = 1, sep = ",")
temp_monthly <- temp %>% select(-J.D, -D.N, -DJF, -MAM, -JJA, -SON) %>%
transform(Nov = as.double(Nov), Dec = as.double(Dec)) %>%
pivot_longer(cols = !Year, names_to = "Month", values_to = "temp") %>%
transform(time = as.yearmon(paste(Month, Year))) %>%
select(time, temp)
write.csv(temp_monthly, "data/clean/temp_monthly.csv", row.names = FALSE)
```
```{r}
temp_yearly <- temp %>% select(Year, J.D) %>%
transform(J.D = as.double(J.D)) %>%
rename(temp = J.D)
write.csv(temp_yearly, "data/clean/temp_yearly.csv", row.names = FALSE)
```
```{r}
read_cru_hemi <- function(filename) {
# read in whole file as table
tab <- read.table(filename,fill=TRUE)
nrows <- nrow(tab)
# create frame
hemi <- data.frame(
year=tab[seq(1,nrows,2),1],
annual=tab[seq(1,nrows,2),14],
month=array(tab[seq(1,nrows,2),2:13]),
cover=array(tab[seq(2,nrows,2),2:13])
)
# mask out months with 0 coverage
hemi$month.1 [which(hemi$cover.1 ==0)] <- NA
hemi$month.2 [which(hemi$cover.2 ==0)] <- NA
hemi$month.3 [which(hemi$cover.3 ==0)] <- NA
hemi$month.4 [which(hemi$cover.4 ==0)] <- NA
hemi$month.5 [which(hemi$cover.5 ==0)] <- NA
hemi$month.6 [which(hemi$cover.6 ==0)] <- NA
hemi$month.7 [which(hemi$cover.7 ==0)] <- NA
hemi$month.8 [which(hemi$cover.8 ==0)] <- NA
hemi$month.9 [which(hemi$cover.9 ==0)] <- NA
hemi$month.10[which(hemi$cover.10==0)] <- NA
hemi$month.11[which(hemi$cover.11==0)] <- NA
hemi$month.12[which(hemi$cover.12==0)] <- NA
#
return(hemi)
}
temp_dat <- read_cru_hemi("data/raw/HadCRUT4-gl.dat")
#remove cover
temp_dat_monthly <- temp_dat %>%
select(-starts_with("cover")) %>%
select(-starts_with("annual")) %>%
gather(month, anomaly, -year) %>%
mutate(month = gsub("month\\.", "", month)) %>%
mutate(month = as.numeric(month)) %>%
filter(year !=2016)
write.csv(temp_dat_monthly, 'data/clean/temp_dat_monthly.csv', row.names = FALSE)
```
```{r}
read_nc_to_raster <- function(filename, idx, output){
nc_data = nc_open(filename)
temp = ncvar_get(nc_data, 'tmax')
lon = ncvar_get(nc_data, 'lon')
lat = ncvar_get(nc_data, 'lat')
year = temp[,,idx]
year=year[order(nrow(year):1),]
world_map = raster(t(year), xmn=min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat))
writeRaster(world_map, output, overwrite=TRUE)
}
read_nc_to_raster('data/raw/TerraClimate_tmax_1960.nc', 1, 'data/clean/1960_tmax')
read_nc_to_raster('data/raw/TerraClimate_tmax_1990.nc', 2, 'data/clean/1990_tmax')
read_nc_to_raster('data/raw/TerraClimate_tmax_2020.nc', 3, 'data/clean/2020_tmax')
```
## Data for causes of climate change
In this part, except the temperature data, we use the data about Earth's orbit and solar irradiation, and the data about greenhouse gas. For the Milankovitch orbital data, we choose to download the data from 2800s BC to now on the [website](https://biocycle.atmos.colostate.edu/shiny/Milankovitch/), and select the variables that is helpful for us to solve problems. The data of four greenhouse gases are separated, so we write functions to read mole fraction data and growth rate data, and combine the data of four gases into one table. For the convenience of following table combination and plotting, the time variables in the tables are transformed to the same format.
```{r}
read_greenHouseGas <- function(file){
GAS <- read.table(file, header = FALSE, comment.char = "#")
GAS <- GAS %>% select(V1, V2, V3, V4) %>% rename(year = V1, month = V2, decimal = V3, gas = V4)
GAS$time <- as.yearmon(paste(GAS$year, GAS$month), "%Y %m")
GAS <- GAS %>% select(time, decimal, gas)
}
```
```{r}
CO2 <- read_greenHouseGas("https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_gl.txt")
CO2 <- CO2 %>% rename(CO2 = gas)
CH4 <- read_greenHouseGas("https://gml.noaa.gov/webdata/ccgg/trends/ch4/ch4_mm_gl.txt")
CH4 <- CH4 %>% rename(CH4 = gas)
N2O <- read_greenHouseGas("https://gml.noaa.gov/webdata/ccgg/trends/n2o/n2o_mm_gl.txt")
N2O <- N2O %>% rename(N2O = gas)
SF6 <- read_greenHouseGas("https://gml.noaa.gov/webdata/ccgg/trends/sf6/sf6_mm_gl.txt")
SF6 <- SF6 %>% rename(SF6 = gas)
```
```{r}
gh_gas <- CO2 %>% full_join(CH4, by = c("time" = "time", "decimal" = "decimal")) %>% full_join(N2O, by = c("time" = "time", "decimal" = "decimal")) %>% full_join(SF6, by = c("time" = "time", "decimal" = "decimal"))
write.csv(gh_gas, "data/clean/gh_gas_monthly.csv", row.names = FALSE)
```
```{r}
read_gasGrowthRate <- function(file){
GAS <- read.table(file, header = FALSE, comment.char = "#")
GAS <- GAS %>% select(V1, V2) %>% rename(year = V1, rate = V2)
}
```
```{r}
CO2_rate = read_gasGrowthRate("https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_gr_gl.txt") %>% rename(CO2 = rate)
CH4_rate = read_gasGrowthRate("https://gml.noaa.gov/webdata/ccgg/trends/ch4/ch4_gr_gl.txt") %>% rename(CH4 = rate)
N2O_rate = read_gasGrowthRate("https://gml.noaa.gov/webdata/ccgg/trends/n2o/n2o_gr_gl.txt") %>% rename(N2O = rate)
SF6_rate = read_gasGrowthRate("https://gml.noaa.gov/webdata/ccgg/trends/sf6/sf6_gr_gl.txt") %>% rename(SF6 = rate)
```
```{r}
gas_rate <- CO2_rate %>% full_join(CH4_rate, by = c("year" = "year")) %>% full_join(N2O_rate, by = c("year" = "year")) %>% full_join(SF6_rate, by = c("year" = "year")) %>% filter(year >= 1980)
write.csv(gas_rate, "data/clean/gas_rate_yearly.csv", row.names = FALSE)
```
## Data for effects of climate change
In this part, the data sets about ice sheet mass in Antarctic, ice sheet mass in Greenland and Global sea level change are in txt. files, so we just simply read the table.
```{r}
df <- read.table("data/clean/ice_sheet_antarctica_mass_200204_202109.txt",
skip=31,
col.names = c('TIME', 'antarctica', 'antarctica_sigma'))
```
```{r}
dt <- read.table("data/clean/ice_sheet_greenland_mass_200204_202109.txt",
skip=31,
col.names = c('TIME', 'greenland', 'greenland_sigma'))
```
```{r}
tt <- read.table("data/clean/sea_level_GMSL_TPJAOS_5.1_199209_202108.txt", skip = 48)
head(tt)
```
Another data set about the extreme events we use is from NOAA database. Since every year's data is gathered in one csv. file and we wants to use data from 2000 to 2021, we download the 22 csv. files into one folder and then try to compile all the data into one csv. file.
```{r}
library(readr)
library(data.table)
library(tidyverse)
dir = "data/clean/stormevents_2000_2021" # Search the file under some specific directory
#get the csv file list
file_list = list.files(path = dir, pattern = "*.csv$",recursive = TRUE,full.names = TRUE)
#form a new place to save the csv file
store_csv = "data/clean/stormevents_2000_2021_new.csv" #paste(dir,"new.csv")
for(i in 1:length(file_list)) #loop
{
df = fread(file = file_list[i],encoding = 'UTF-8') #read the csv files
#if not exist, then write a new csv file
write_csv(df,path = store_csv,append = TRUE, col_names = FALSE)
}
stormevents_2000_2021_new <- read_csv("data/clean/stormevents_2000_2021_new.csv")
```