Caclulate tree visibility statistics per district¶

Author: Willeke A’Campo

Description: This notebooks shows how to calculate the Ecosystem Service statistics for tree visibility and impact per district using DuckDB. The results are stored in a new table in the database and exported to GeoJSON.

Documentation:

Data conversion | GeoJSON to GeoParquet¶

[1]:

import geopandas as gpd
from shapely.geometry import Point
from shapely.wkb import loads
import pyarrow
import os
import leafmap
import os
import duckdb
import pandas as pd

# TODO move to kedro pipeline
municipality = "kristiansand"
raw_dir = r"/workspaces/urban-climate/data/01_raw"
interim_dir = r"/workspaces/urban-climate/data/02_intermediate"
reporting_dir = r"/workspaces/urban-climate/data/08_reporting/"

db_path = os.path.join(raw_dir, f"{municipality}.db")

# Define the table names
file_names = [
    f"{municipality}_study_area",
    f"{municipality}_districts",
    f"{municipality}_bldg",
    f"{municipality}_res_bldg",
    f"{municipality}_green_space",
    f"{municipality}_open_space",
    f"{municipality}_public_open_space",
    f"{municipality}_private_open_space",
    f"{municipality}_tree_crowns"
    ]

table_names = [
    "study_area", "districts", "bldg", "res_bldg", "green_space",
    "open_space", "public_open_space", "private_open_space", "tree_crowns"
    ]

# Define the parquet_dict
parquet_dict = {
    name: os.path.join(interim_dir, f"{name}.parquet")
    for name in file_names}

# Check if the parquet files exist, if not convert  to parquet
for key in parquet_dict.keys():
    if os.path.exists(parquet_dict[key]):
        continue
    else:
        # Define the gdf_dict
        gdf_dict = {
            name: gpd.read_file(os.path.join(raw_dir, f"{name}.geojson"))
            for name in file_names}

        # Convert GeoDataFrame to Parquet
        for key, gdf in gdf_dict.items():
            gdf.to_parquet(
                path = interim_dir + "/" + key + ".parquet",
                index = None,
                compression = "snappy"
            )

[2]:

# Create a connection to the DuckDB database
con = duckdb.connect(database=db_path, read_only=False)
con.install_extension("spatial")
con.load_extension("spatial")

# Create a table for each parquet file or GeoDataFrame
for key,table in zip(parquet_dict.keys(), table_names):
    con.execute(
        f"""
        CREATE TABLE {table}
        AS SELECT *, ST_GeomFromWKB(geometry)
        FROM parquet_scan('{parquet_dict[key]}')
        """
        )


# Fetch and print all table names
result = con.execute(
    """
    SELECT table_name
    FROM information_schema.tables
    WHERE table_schema = 'main'
    """
    )

print(result.fetchall())

[('study_area',), ('districts',), ('bldg',), ('res_bldg',), ('green_space',), ('open_space',), ('public_open_space',), ('private_open_space',), ('tree_crowns',)]

Create a new Table with Tree Crown Center Points

[19]:

# Check if the 'crowns' table exists
table_exists = "tree_crowns" in [
    row[0]
    for row in con.execute(
        """
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema = 'main'
        """
    ).fetchall()
]

if table_exists:
    # convert dtype to DuckDB GEOMETRY
    result = con.execute(
        """
        SELECT
        ST_X(ST_Centroid(ST_GeomFromWKB(geometry))),
        ST_Y(ST_Centroid(ST_GeomFromWKB(geometry)))
        FROM tree_crowns"""
        )
    # xy_crowns to pd
    df = pd.DataFrame(result.fetchall(), columns=["X", "Y"])

    xy_crowns = gpd.GeoDataFrame(
        df,
        geometry= gpd.points_from_xy(df.X, df.Y)
        )
    xy_crowns.crs = "EPSG:25832"


    # Create a new table in DuckDB
    con.execute(
        """
        CREATE TABLE tree_crowns_xy AS
        SELECT
        ST_X(ST_Centroid(ST_GeomFromWKB(geometry))) AS X,
        ST_Y(ST_Centroid(ST_GeomFromWKB(geometry))) AS Y,
        ST_Point(ST_X(ST_Centroid(ST_GeomFromWKB(geometry))), ST_Y(ST_Centroid(ST_GeomFromWKB(geometry)))) AS geometry
        FROM tree_crowns"""
    )

Map study area and Tree Crown Center Points (10%-sample)

[ ]:

# add layers to gdf for mapping
gdf_study_area = leafmap.read_parquet(
    parquet_dict[f"{municipality}_study_area"],
    return_type='gdf',
    src_crs="EPSG:25832",
    dst_crs="EPSG:4326"
    )

# convert xy_crowns to wgs84
xy_crowns_sample = xy_crowns.sample(frac=0.05)
trees_xy = xy_crowns_sample.to_crs("EPSG:4326")
points_geojson = trees_xy.__geo_interface__
print("Map the tree crown center points (10% sample).")
print(trees_xy.head(2))

# Calculate the center of the study_area GeoDataFrame
center = gdf_study_area.geometry.unary_union.centroid

# --------------------------------------------------
# INIT MAP
# --------------------------------------------------
map = leafmap.Map()
# center
map.set_center(center.x, center.y, zoom=13)
# add Basemap
map.add_basemap("CartoDB.Positron")

# add study area as vector layer
map.add_gdf(
        gdf_study_area,
        layer_name="study_area",
        get_fill_color=[0, 0, 255, 128]
        )

map.add_gdf(
    trees_xy,
    layer_name ="trees",
    color= "black"
)

map

Database Connection | DuckDB¶

[3]:

#%%capture
columns = con.execute("PRAGMA table_info(districts)").fetchall()
for column in columns:
    print(column[1])

OBJECTID
fylkesnummer
fylkesnavn
kommunenummer
kommunenavn
delomradenummer
delomradenavn
grunnkretsnummer
grunnkretsnavn
kilde_admin
kilde_befolkning
id_befolkning
year_pop_stat
pop_total
pop_elderly
a_district
a_unit
a_clipped
SHAPE_Length
SHAPE_Area
geometry
st_geomfromwkb(geometry)

Split open space, buildings and tree crowns by district

[4]:

%%capture
# create new table split_open_space
# with open space split by district boundaries
con.execute(
    """
    CREATE TABLE split_open_space AS
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(open_space.geometry)) AS geom
    FROM
        districts, open_space
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(open_space.geometry));
    """
    )

[5]:

%%capture
# create new table split_private_space
# with public space split by district boundaries
con.execute(
    """
    CREATE TABLE split_private_space AS
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(private_open_space.geometry)) AS geom
    FROM
        districts, private_open_space
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(private_open_space.geometry));
    """
    )

[6]:

%%capture
# create new table split_public_space
# with private space split by district boundaries
con.execute(
    """
    CREATE TABLE split_public_space AS
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(public_open_space.geometry)) AS geom
    FROM
        districts, public_open_space
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(public_open_space.geometry));
    """
    )

[7]:

%%capture
# create new table split_buildings
# with buildings split by district boundaries
con.execute(
    """
    CREATE TABLE split_bldg AS
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(bldg.geometry)) AS geom
    FROM
        districts, bldg
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(bldg.geometry));
    """
    )

[9]:

%%capture
# create new table split_res_bldg
# with buildings split by district boundaries
con.execute(
    """
    CREATE TABLE split_res_bldg AS
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(res_bldg.geometry)) AS geom
    FROM
        districts, res_bldg
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(res_bldg.geometry));
    """
    )

---------------------------------------------------------------------------
CatalogException                          Traceback (most recent call last)
Cell In[9], line 3
      1 # create new table split_res_bldg
      2 # with buildings split by district boundaries
----> 3 con.execute(
      4     """
      5     CREATE TABLE split_res_bldg AS 
      6     SELECT
      7         districts.grunnkretsnummer,
      8         ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(res_bldg.geometry)) AS geom
      9     FROM 
     10         districts, res_bldg
     11     WHERE
     12         ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(res_bldg.geometry));
     13     """
     14     )

CatalogException: Catalog Error: Table with name "split_res_bldg" already exists!

[10]:

%%capture
# create new table split_tree_crowns
# with tree crowns split by district boundaries
con.execute(
    """
    CREATE TABLE split_tree_crowns AS
    SELECT
        districts.grunnkretsnummer,
        ST_Intersection(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(tree_crowns.geometry)) AS geom
    FROM
        districts, tree_crowns
    WHERE
        ST_Intersects(ST_GeomFromWKB(districts.geometry), ST_GeomFromWKB(tree_crowns.geometry));
    """
    )

[11]:

%%capture
# print columns split_open_space
columns = con.execute("PRAGMA table_info(split_open_space)").fetchall()
for column in columns:
    print(column[1])

# Query to fetch all table names
tables = con.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()

# Print the first row of each table
for table in tables:
    first_row = con.execute(f"SELECT * FROM {table[0]} LIMIT 1").fetchone()
    print(f"First row of {table[0]}: {first_row}")

Generate Columns with Count Statistics¶

Name	Alias	Description	Type
n_trees	Antall trær	Number of trees in the district	INT
n_bldg	Antall bygninger	Number of buildings in the district	INT
n_res_bldg	Antall boliger	Number of residential buildings in the district	INT
n_res_bldg_near_gs	Antall boliger nær grøntområde (300 m)	Number of residential buildings near green space (300 m)	INT
n_bldg_near_trees	Antall trær nær boliger (15 m)	Number of trees near residential buildings (15 m)	INT
n_viewshed	Antall viewshed piksler	Number of viewshed pixels that intersect with the building edge	INT

Add columns to districts table

[13]:

# update districts
# add count columns: n_trees, n_bldg, n_res_bldg, n_res_bldg_near_gs, n_bldg_near_trees
con.execute("ALTER TABLE districts ADD COLUMN n_trees INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_bldg INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_res_bldg INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_green_spaces INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_res_bldg_near_gs INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN perc_near_gs INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_near_trees INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN n_bldg_near_trees INTEGER")
con.execute("ALTER TABLE districts ADD COLUMN perc_near_trees INTEGER")

---------------------------------------------------------------------------
CatalogException                          Traceback (most recent call last)
Cell In[13], line 3
      1 # update districts
      2 # add count columns: n_trees, n_bldg, n_res_bldg, n_res_bldg_near_gs, n_bldg_near_trees
----> 3 con.execute("ALTER TABLE districts ADD COLUMN n_trees INTEGER")
      4 con.execute("ALTER TABLE districts ADD COLUMN n_bldg INTEGER")
      5 con.execute("ALTER TABLE districts ADD COLUMN n_res_bldg INTEGER")

CatalogException: Catalog Error: Column with name n_trees already exists!

[17]:

con.execute("ALTER TABLE res_bldg ADD COLUMN n_trees INTEGER")
con.execute("ALTER TABLE res_bldg ADD COLUMN n_green_spaces INTEGER")

[17]:

<duckdb.duckdb.DuckDBPyConnection at 0x7f55e03490b0>

[ ]:

%%capture
# print columns districts
columns = con.execute("PRAGMA table_info(split_bldg)").fetchall()
for column in columns:
    print(column[1])

Calculate COUNT attributes

[22]:

# COUNT number of trees per district
n_trees = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*)
    FROM tree_crowns_xy
    JOIN districts ON ST_Within(tree_crowns_xy.geometry, ST_GeomFromWKB(districts.geometry))
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of buildings per district
n_bldg = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*)
    FROM split_bldg
    JOIN districts ON ST_Within(split_bldg.geom, ST_GeomFromWKB(districts.geometry))
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of residential buildings per district
n_res_bldg = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*)
    FROM split_res_bldg
    JOIN districts ON ST_Within(split_res_bldg.geom, ST_GeomFromWKB(districts.geometry))
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

# COUNT number of res buildings WITHIN 300m distance of green space
n_res_bldg_near_gs = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*)
    FROM res_bldg
    JOIN districts ON ST_Within(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(districts.geometry))
    WHERE res_bldg.geometry IS NOT NULL AND districts.geometry IS NOT NULL AND EXISTS (
        SELECT 1
        FROM green_space
        WHERE green_space.geometry IS NOT NULL AND ST_DWithin(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(green_space.geometry), 300)
    )
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()



# COUNT
# count all trees within 15m distance of a res bldg
# if count >= 3, set n_trees to 1
# if count < 3, set n_trees to 0

n_near_trees = con.execute(
    f"""
    UPDATE res_bldg
    SET n_trees = (
        CASE
            WHEN (
                SELECT COUNT(*)
                FROM tree_crowns
                WHERE tree_crowns.geometry IS NOT NULL
                AND ST_DWithin(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(tree_crowns.geometry), 15)
            ) >= 3 THEN 1
            ELSE 0
        END
    )
    """
).fetchall()

# COUNT number of res buildings with value=1 in n_trees column
# add to districts table
n_res_bldg_near_trees = con.execute(
    """
    SELECT districts.grunnkretsnummer, COUNT(*)
    FROM res_bldg
    JOIN districts ON ST_Within(ST_GeomFromWKB(res_bldg.geometry), ST_GeomFromWKB(districts.geometry))
    WHERE res_bldg.n_trees = 1
    GROUP BY districts.grunnkretsnummer
    """
    ).fetchall()

[23]:

# Update districts table
for id, count in n_trees:
    con.execute(f"UPDATE districts SET n_trees = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_bldg:
    con.execute(f"UPDATE districts SET n_bldg = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_res_bldg:
    con.execute(f"UPDATE districts SET n_res_bldg = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_res_bldg_near_gs:
    con.execute(f"UPDATE districts SET n_res_bldg_near_gs = {count} WHERE grunnkretsnummer = {id}")

for id, count in n_res_bldg_near_trees:
    print(id, count)
    con.execute(f"UPDATE districts SET n_bldg_near_trees = {count} WHERE grunnkretsnummer = {id}")

# print columns districts
columns = con.execute("PRAGMA table_info(districts)").fetchall()
for column in columns:
    print(column[1])

42040904 67
42040807 1
42040702 223
42040908 29
42040910 72
42040907 124
42040701 59
42040803 65
42040804 105
42041104 1
42040918 8
42040705 52
42040704 33
42040902 178
42040903 94
42040806 28
42040919 1
42040909 18
42040911 121
42040905 134
42040802 23
42041103 12
42040915 1
42041101 21
42040912 15
42040913 52
42040901 71
42040801 33
OBJECTID
fylkesnummer
fylkesnavn
kommunenummer
kommunenavn
delomradenummer
delomradenavn
grunnkretsnummer
grunnkretsnavn
kilde_admin
kilde_befolkning
id_befolkning
year_pop_stat
pop_total
pop_elderly
a_district
a_unit
a_clipped
SHAPE_Length
SHAPE_Area
geometry
st_geomfromwkb(geometry)
n_trees
n_bldg
n_res_bldg
n_res_bldg_near_gs
perc_near_gs
n_bldg_near_trees
perc_near_trees
n_green_spaces
n_near_trees

[24]:

# if NAN set to 0
con.execute(
    """
    UPDATE districts
    SET n_res_bldg_near_gs = COALESCE(n_res_bldg_near_gs, 0)
    """
)

con.execute(
    """
    UPDATE districts
    SET n_bldg = COALESCE(n_bldg, 0)
    """
)

con.execute(
    """
    UPDATE districts
    SET n_res_bldg = COALESCE(n_res_bldg, 0)
    """
)


# normalize perc_near_gs
con.execute(
    """
    UPDATE districts
    SET perc_near_gs = (n_res_bldg_near_gs / n_res_bldg) * 100
    """
)

# normalize perc_near_trees
con.execute(
    """
    UPDATE districts
    SET perc_near_trees = (n_bldg_near_trees / n_res_bldg) * 100
    """
)

[24]:

<duckdb.duckdb.DuckDBPyConnection at 0x7f55e03490b0>

Generate Columns with Area Statistics¶

Name	Alias	Description	Type	Unit
a_district	Grunnkretsareal	Area of the district	FLOAT	m2
a_open_space	Åpent område	Area of open space	FLOAT	m2
a_private_space	Privat område	Area of private space	FLOAT	m2
a_public_space	Offentlig område	Area of public space	FLOAT	m2
a_crown	Kroneareal	Crown coverage area within the district	FLOAT	m2
perc_crown	Trekronedekningsgrad (%)	Tree crown coverage %	FLOAT	%

Add columns to districts table

[25]:

# update districts
# add area columns: a_open_space, a_private_space, a_public_space, a_green_space, a_crown, a_crown_public, a_crown_private
con.execute("ALTER TABLE districts ADD COLUMN a_open_space FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN a_private_space FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN a_public_space FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN a_crown FLOAT")
con.execute("ALTER TABLE districts ADD COLUMN perc_crown FLOAT")

[25]:

<duckdb.duckdb.DuckDBPyConnection at 0x7f55e03490b0>

Caluclate AREA attributes

[26]:

# AREA of open space per district
# calculate the overlapping area of open space with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_open_space = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_open_space.geom)))
        FROM split_open_space
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_open_space.geom)
    )
    """
)

# AREA of private open space per district
# calculate the overlapping area of private open space with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_private_space = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_private_space.geom)))
        FROM split_private_space
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_private_space.geom)
    )
    """
)

# AREA of public open space per district
# calculate the overlapping area of public open space with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_public_space = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_public_space.geom)))
        FROM split_public_space
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_public_space.geom)
    )
    """
)

# AREA of tree crowns per district
# calculate the overlapping area of tree crowns with district X, A etc.
con.execute(
    """
    UPDATE districts
    SET a_crown = (
        SELECT SUM(ST_Area(ST_Intersection(ST_GeomFromWKB(districts.geometry), split_tree_crowns.geom)))
        FROM split_tree_crowns
        WHERE ST_Intersects(ST_GeomFromWKB(districts.geometry), split_tree_crowns.geom)
    )
    """
)

# PERCENTAGE of tree crown coverage per district
con.execute(
    """
    UPDATE districts
    SET perc_crown = (a_crown / a_clipped) * 100
    """
)

[26]:

<duckdb.duckdb.DuckDBPyConnection at 0x7f55e03490b0>

Export to dataframe

[27]:

# Export districts to DataFrame
df = pd.read_sql("SELECT * FROM districts", con)

# Convert geometry column from WKB to shapely geometry
df['geometry'] = df['geometry'].apply(loads, hex=True)

# Convert DataFrame to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')
gdf_sorted = gdf.sort_values(by='grunnkretsnummer', ascending=True)
gdf_sorted = gdf_sorted.round(2)

# Define the %-bins and labels
labels = ["no data", "0-25%", "25-50%", "50-75%", "75-100%"]
bins = pd.IntervalIndex.from_tuples([(-0.01, 25), (25, 50), (50, 75), (75, 100)])
dict = {
    "nan":"no data",
    "(-0.01, 25.0]": "0-25%",
    "(25.0, 50.0]": "25-50%",
    "(50.0, 75.0]": "50-75%",
    "(75.0, 100.0]": "75-100%"
    }

# Near Residential Buildings % Categories
gdf_sorted['bldg_bins'] = pd.cut(gdf_sorted['perc_near_trees'], bins)
gdf_sorted['bldg_bins'] = gdf_sorted['bldg_bins'].astype(str)
gdf_sorted['labels_near_rbldg'] = gdf_sorted['bldg_bins'].map(dict)

# Near Green Space % Categories
gdf_sorted['gs_bins'] = pd.cut(gdf_sorted['perc_near_gs'], bins)
gdf_sorted['gs_bins'] = gdf_sorted['gs_bins'].astype(str)
gdf_sorted['labels_near_gs'] = gdf_sorted['gs_bins'].map(dict)

# Crown Coverate % Categories
gdf_sorted['crown_bins'] = pd.cut(gdf_sorted['perc_crown'], bins)
gdf_sorted['crown_bins'] = gdf_sorted['crown_bins'].astype(str)
gdf_sorted['labels_perc_crown'] = gdf_sorted['crown_bins'].map(dict)

gdf_sorted.drop(columns=['bldg_bins', 'gs_bins', 'crown_bins'], inplace=True)

# display col "kommunenavn","grunnkrestnummer", "grunnkretsnavn" n_bldg, n_res_bldg, n_res_bldg_near_gs, n_bldg_near_trees, a_open_space, a_private_space, a_public_space, a_crown, perc_crown
display(gdf_sorted[[
    "kommunenavn","grunnkretsnummer", "grunnkretsnavn",
    "n_trees", "n_bldg", "n_res_bldg",
    "n_res_bldg_near_gs", "perc_near_gs","labels_near_gs",
    "n_bldg_near_trees", "perc_near_trees", "labels_near_rbldg",
    "a_open_space", "a_private_space", "a_public_space", "a_crown",
    "perc_crown", "labels_perc_crown"
    ]]
        )
gdf.crs = "EPSG:25832"
gdf_sorted.crs = "EPSG:25832"


gdf_mapping = gdf.to_crs("EPSG:4326")

	kommunenavn	grunnkretsnummer	grunnkretsnavn	n_trees	n_bldg	n_res_bldg	n_res_bldg_near_gs	perc_near_gs	labels_near_gs	n_bldg_near_trees	perc_near_trees	labels_near_rbldg	a_open_space	a_private_space	a_public_space	a_crown	perc_crown	labels_perc_crown
2	Kristiansand	42040701	Grim - Dueknipen	298	204	87	61	70.0	50-75%	59.0	68.0	50-75%	104499.24	32663.19	71273.56	24228.22	15.40	0-25%
9	Kristiansand	42040702	Grimsmyra	632	609	310	286	92.0	75-100%	223.0	72.0	50-75%	161599.53	83491.13	75987.51	39153.62	15.43	0-25%
15	Kristiansand	42040704	Møllevannet - Klappane	154	82	40	40	100.0	75-100%	33.0	82.0	75-100%	47883.75	16124.35	31553.15	9176.21	13.43	0-25%
20	Kristiansand	42040705	Enrum - Paradis	433	190	78	77	99.0	75-100%	52.0	67.0	50-75%	58644.02	27538.81	30578.40	35490.59	32.06	25-50%
1	Kristiansand	42040801	Kvadraturen Sørvest	440	307	72	0	0.0	0-25%	33.0	46.0	25-50%	205287.23	52343.63	153860.23	25854.21	5.59	0-25%
5	Kristiansand	42040802	Kvadraturen Nordvest	439	416	104	0	0.0	0-25%	23.0	22.0	0-25%	194937.12	47966.65	147339.89	31861.02	9.60	0-25%
3	Kristiansand	42040803	Kvadraturen Sørøst	432	375	170	13	8.0	0-25%	65.0	38.0	25-50%	190027.25	57009.83	132599.47	26190.16	4.59	0-25%
6	Kristiansand	42040804	Kvadraturen Nordøst	216	695	382	54	14.0	0-25%	105.0	27.0	25-50%	145466.62	60967.31	84394.45	9775.37	3.26	0-25%
10	Kristiansand	42040806	Eg	366	86	32	32	100.0	75-100%	28.0	88.0	75-100%	59291.48	16598.72	42233.00	23503.05	24.93	0-25%
21	Kristiansand	42040807	Sykehuset	1291	99	1	1	100.0	75-100%	1.0	100.0	75-100%	257912.45	159733.95	97249.30	96850.88	24.05	0-25%
0	Kristiansand	42040901	Galgeberg	333	285	129	129	100.0	75-100%	71.0	55.0	50-75%	118018.59	45730.00	71898.16	15768.14	6.06	0-25%
7	Kristiansand	42040902	Hamreheia	481	564	250	248	99.0	75-100%	178.0	71.0	50-75%	173022.23	90216.37	81218.94	23997.30	8.18	0-25%
4	Kristiansand	42040903	Kuholmen	332	395	160	160	100.0	75-100%	94.0	59.0	50-75%	188112.45	64551.17	122261.60	16465.98	5.34	0-25%
12	Kristiansand	42040904	Valhalla Sør	128	241	124	114	92.0	75-100%	67.0	54.0	50-75%	62455.76	34136.45	27904.97	4095.61	4.88	0-25%
13	Kristiansand	42040905	Valhalla Midt	276	499	261	82	31.0	25-50%	134.0	51.0	50-75%	124027.37	64567.89	58371.44	12991.49	7.35	0-25%
25	Kristiansand	42040907	Valhalla Nord - Kongsgård	439	303	155	155	100.0	75-100%	124.0	80.0	75-100%	148043.27	68896.92	77977.48	25669.20	12.04	0-25%
17	Kristiansand	42040908	Solbygg	122	98	50	50	100.0	75-100%	29.0	58.0	50-75%	73094.88	29007.49	43483.53	6955.10	6.78	0-25%
23	Kristiansand	42040909	Kjempegravane	47	132	61	61	100.0	75-100%	18.0	30.0	25-50%	34834.33	18410.68	15887.13	2604.27	5.22	0-25%
28	Kristiansand	42040910	Tobienborg	381	302	148	148	100.0	75-100%	72.0	49.0	25-50%	131510.67	50772.92	79382.28	23394.14	12.40	0-25%
11	Kristiansand	42040911	Nedre Lund	402	526	187	177	95.0	75-100%	121.0	65.0	50-75%	205347.41	89383.95	114498.01	21780.21	6.46	0-25%
24	Kristiansand	42040912	Oddemarka	118	108	31	31	100.0	75-100%	15.0	48.0	25-50%	54084.15	21627.48	32029.34	7577.51	10.27	0-25%
22	Kristiansand	42040913	Flaten	103	162	80	80	100.0	75-100%	52.0	65.0	50-75%	45788.73	23253.30	22286.44	5368.99	8.09	0-25%
29	Kristiansand	42040915	Gimlemoen - Jegersberg	580	40	1	1	100.0	75-100%	1.0	100.0	75-100%	145747.75	31836.04	113747.86	45532.23	21.19	0-25%
16	Kristiansand	42040918	Gimlevang	34	42	19	19	100.0	75-100%	8.0	42.0	25-50%	28194.61	10814.88	17202.92	1886.51	4.09	0-25%
27	Kristiansand	42040919	Gimle	1175	21	2	2	100.0	75-100%	1.0	50.0	25-50%	87002.41	15415.42	71380.45	89392.76	48.56	25-50%
8	Kristiansand	42040920	Marviksletta	48	6	0	0	NaN	no data	NaN	NaN	no data	12271.83	6491.21	5721.18	3426.57	18.12	0-25%
14	Kristiansand	42040921	Lund Industriområde	40	9	0	0	NaN	no data	NaN	NaN	no data	16471.13	11996.89	4519.13	2207.87	8.89	0-25%
18	Kristiansand	42041101	Nedre Kongsgård	1008	390	29	29	100.0	75-100%	21.0	72.0	50-75%	218743.47	87159.60	130853.74	57912.16	18.42	0-25%
19	Kristiansand	42041102	Kongsgard 1 - Vige	166	9	0	0	NaN	no data	NaN	NaN	no data	9656.72	4293.83	5215.83	11158.61	49.82	25-50%
26	Kristiansand	42041103	Bjørndalsheia	129	38	12	12	100.0	75-100%	12.0	100.0	75-100%	40086.45	5665.55	34368.30	8886.66	17.37	0-25%
30	Kristiansand	42041104	Vestre Bjørndalen	36	20	2	2	100.0	75-100%	1.0	50.0	25-50%	45113.70	11336.80	33673.90	1164.64	2.24	0-25%

[28]:

%%capture
print(gdf_sorted.columns)
#drop duckdb geom col
gdf_sorted.drop(columns=['st_geomfromwkb(geometry)'], inplace=True)

# sort columns
new_order = ['OBJECTID', 'fylkesnummer', 'fylkesnavn', 'kommunenummer',
       'kommunenavn', 'delomradenummer', 'delomradenavn', 'grunnkretsnummer',
       'grunnkretsnavn', 'kilde_admin', 'kilde_befolkning', 'id_befolkning',
       'year_pop_stat', 'pop_total', 'pop_elderly', 'a_district', 'a_unit',
       'a_clipped', 'n_trees', 'n_bldg', 'n_res_bldg',
       'n_res_bldg_near_gs', 'perc_near_gs', 'labels_near_gs',
       'n_bldg_near_trees', 'perc_near_trees', 'labels_near_rbldg',
       'a_open_space', 'a_private_space', 'a_public_space',
       'a_crown', 'perc_crown','labels_perc_crown',
       'SHAPE_Length', 'SHAPE_Area', 'geometry']

# Reorder the columns
gdf_sorted = gdf_sorted[new_order]
print(gdf_sorted.columns)

Export to Parquet, GeoJSON and CSV format

[29]:

# Export GDF to file
filepath = os.path.join(reporting_dir, f"{municipality}_district_treeVis_stat")

# Write to .parquet
gdf_sorted.to_parquet(os.path.join(filepath + '.parquet'))

# Write to .geojson
gdf_sorted.to_file(os.path.join(filepath + '.geojson'), driver='GeoJSON')

# Write to .csv
gdf_sorted.to_csv(os.path.join(filepath + '.csv'))