new_schemas <- "CREATE SCHEMA ssb_grids"
dbSendQuery(con, new_schemas)
2 Setup
In this chapter we set up the database schemas. We start with the SSB500 dataset
2.1 SSB500
2.1.1 Setup schema
Setting up a new schema called ssb_grids. Here we can store SSB500, but also SSB10km which may become relevant.
Write queries to grant read only access to all.
priv <- "ALTER DEFAULT PRIVILEGES IN SCHEMA ssb_grids GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_ro"
priv2 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA ssb_grids GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_rw"
priv3 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA ssb_grids GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_admin"
priv4 <- "GRANT USAGE ON SCHEMA ssb_grids TO ag_pgsql_ano_moduler_admin"
priv5 <- "GRANT USAGE ON SCHEMA ssb_grids TO ag_pgsql_ano_moduler_rw"
priv6 <- "GRANT USAGE ON SCHEMA ssb_grids TO ag_pgsql_ano_moduler_ro"
dbSendStatement(con, priv)
dbSendStatement(con, priv2)
dbSendStatement(con, priv3)
dbSendStatement(con, priv4)
dbSendStatement(con, priv5)
dbSendStatement(con, priv6)
2.1.2 Read data into R
We use RStudio as the interface when adding new data to the database. We start by bringing the data into our environment.
First we can get the entire SSB500 dataset.
SSBpath <- "/data/P-Prosjekter2/412421_okologisk_tilstand_2024/Data/SSB0500M_L/ruter500m_Norge.shp"
SSB500 <- read_sf(SSBpath) |>
st_transform(25833)
Strip down the number of columns
SSB500 <- SSB500 |>
select(ssbid = SSBid) # postgre doesnt like capital letters
# the geometry column needs to be named 'geom'
st_geometry(SSB500) <- "geom"
This data consists of perfect 500x500 grid cells arranged on rounded coordinates in the UTM sone 33 CRS.
2.1.3 Define table properties
First we define the table properties
q1 <- "create table ssb_grids.ssb_500 (
ssbid character varying(50) primary key,
geom geometry(polygon,25833)
);"
# indices makes the database work faster. It should be added to all tables that are looked up frequently
q2 <- "create index on ssb_grids.ssb_500 using btree(ssbid);"
q3 <- "create index on ssb_grids.ssb_500 using gist(geom);"
# sending the queries:
dbSendStatement(con, q1)
dbSendStatement(con, q2)
dbSendStatement(con, q3)
We defined geom to be polygon. Now let’s just check that that is trua, and there are no multi-polygons for example.
st_geometry_type(SSB500, by_geometry = F)
Yes, they are all polygons.
2.1.4 Write to db
Then we write data to the ssb_500 table.
write_sf(SSB500, dsn = con,
layer = Id(schema = "ssb_grids", table = "ssb_500"),
append = T)
2.2 Sampling frames
We will have different sampling frames for the different ANO-modules. For ANO Havstrand, SSB500 has been masked to only include grid celle that overlap with the Norwegian coastline. The script for doing that is written in python ad can be found here: /data/P-Prosjekter2/412421_okologisk_tilstand_2024/Jan/
2.2.1 Setup schema
schema_ur <- "CREATE SCHEMA sampling_frames"
dbSendQuery(con, schema_ur)
Write queries to grant read only access to all.
priv <- "ALTER DEFAULT PRIVILEGES IN SCHEMA sampling_frames GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_ro"
priv2 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA sampling_frames GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_rw"
priv3 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA sampling_frames GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_admin"
priv4 <- "GRANT USAGE ON SCHEMA sampling_frames TO ag_pgsql_ano_moduler_admin"
priv5 <- "GRANT USAGE ON SCHEMA sampling_frames TO ag_pgsql_ano_moduler_rw"
priv6 <- "GRANT USAGE ON SCHEMA sampling_frames TO ag_pgsql_ano_moduler_ro"
dbSendStatement(con, priv)
dbSendStatement(con, priv2)
dbSendStatement(con, priv3)
dbSendStatement(con, priv4)
dbSendStatement(con, priv5)
dbSendStatement(con, priv6)
2.2.2 Read data into R
coast_path <- "/data/P-Prosjekter2/412421_okologisk_tilstand_2024/Jan/CoastalSampling3.gdb"
sf::st_layers(coast_path)
Driver: OpenFileGDB
Available layers:
layer_name geometry_type features fields
1 fcNorgeKyst_l Multi Line String 101331 2
2 fcKystline_500mSegmenter Multi Line String 240861 1
3 Samples62_5 Multi Point 769342 2
4 Samples83_3 Multi Point 555958 2
5 Samples125 Multi Point 350676 2
6 SSB500_Coast4 Multi Polygon 121920 6
7 Hexagon62_5_Coast Multi Polygon 2729978 3
8 Hexagon83_3_Coast Multi Polygon 1622139 3
9 Hexagon125_Coast Multi Polygon 796190 3
crs_name
1 ETRS89 / UTM zone 33N
2 ETRS89 / UTM zone 33N
3 ETRS89 / UTM zone 33N
4 ETRS89 / UTM zone 33N
5 ETRS89 / UTM zone 33N
6 ETRS89 / UTM zone 33N
7 WGS 84 / UTM zone 33N
8 WGS 84 / UTM zone 33N
9 WGS 84 / UTM zone 33N
SSB500_Coast4 contains the SSB500 grid cells that overlap with the coastline.
coastSSB <- read_sf(coast_path, layer = "SSB500_Coast4")
The SSBid will be the foregn key, linking to ssb_500. We also need a primary key, and therefore I add a unique identifier.
ids <- UUIDgenerate(n = nrow(coastSSB))
anyDuplicated(ids)
coastSSB <- coastSSB |>
select(ssbid = SSBid)|>
mutate(havstrand_id = ids)
We don’t need the geometry
coastSSB <- as_tibble(coastSSB) |>
select(ssbid, havstrand_id)
2.2.3 Define table properties
We will name the table samplingframe_havstrand_2024.
q1 <- "create table sampling_frames.samplingframe_havstrand_2024 (
havstrand_id character varying(50) primary key,
ssbid character varying(50),
CONSTRAINT fk_ssb_500
FOREIGN KEY (ssbid)
REFERENCES ssb_grids.ssb_500 (ssbid)
);"
# indices makes the database work faster. It should be added to all tables that are looked up frequently
q2 <- "create index on sampling_frames.samplingframe_havstrand_2024 using btree(ssbid);"
q3 <- "create index on sampling_frames.samplingframe_havstrand_2024 using btree(havstrand_id);"
# sending the queries:
dbSendStatement(con, q1)
dbSendStatement(con, q2)
dbSendStatement(con, q3)
2.2.4 Write to db
Then we write data to the samplingframe_havstrand_2024 table.
write_sf(coastSSB, dsn = con,
layer = Id(schema = "sampling_frames", table = "samplingframe_havstrand_2024"),
append = T)