Source code for trackintel.io.file

import warnings
from functools import wraps
from inspect import signature

import geopandas as gpd
import pandas as pd
from geopandas.geodataframe import GeoDataFrame
from shapely import wkt
from trackintel.io.from_geopandas import (
    read_locations_gpd,
    read_positionfixes_gpd,
    read_staypoints_gpd,
    read_tours_gpd,
    read_triplegs_gpd,
    read_trips_gpd,
)


def _index_warning_default_none(func):
    """Decorator function that warns if index_col None is not set explicit."""

    @wraps(func)  # copy all metadata
    def wrapper(*args, **kwargs):
        bound_values = signature(func).bind(*args, **kwargs)  # binds only available args and kwargs
        if "index_col" not in bound_values.arguments:
            warnings.warn(
                "Assuming default index as unique identifier. "
                "Pass 'index_col=None' as explicit argument to avoid a warning when reading csv files."
            )
        return func(*args, **kwargs)

    return wrapper


[docs]@_index_warning_default_none def read_positionfixes_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs): """ Read positionfixes from csv file. Wraps the pandas read_csv function, extracts longitude and latitude and builds a geopandas GeoDataFrame (POINT). This also validates that the ingested data conforms to the trackintel understanding of positionfixes (see :doc:`/modules/model`). Parameters ---------- args Arguments as passed to pd.read_csv(). columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. The required columns for this function include: "user_id", "tracked_at", "latitude" and "longitude". tz : str, optional pytz compatible timezone string. If None UTC is assumed. index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. geom_col : str, default "geom" Name of the column containing the geometry. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg 'EPSG:4326') or a WKT string. kwargs Additional keyword arguments passed to pd.read_csv(). Returns ------- pfs : GeoDataFrame (as trackintel positionfixes) A GeoDataFrame containing the positionfixes. Notes ----- Note that this function is primarily useful if data is available in a longitude/latitude format. If your data already contains a WKT column, might be easier to just use the GeoPandas import functions :func:`trackintel.io.from_geopandas.read_positionfixes_gpd`. Examples -------- >>> trackintel.read_positionfixes_csv('data.csv') >>> trackintel.read_positionfixes_csv('data.csv', columns={'time':'tracked_at', 'User':'user_id'}) tracked_at user_id geom id 0 2008-10-23 02:53:04+00:00 0 POINT (116.31842 39.98470) 1 2008-10-23 02:53:10+00:00 0 POINT (116.31845 39.98468) 2 2008-10-23 02:53:15+00:00 0 POINT (116.31842 39.98469) 3 2008-10-23 02:53:20+00:00 0 POINT (116.31839 39.98469) 4 2008-10-23 02:53:25+00:00 0 POINT (116.31826 39.98465) """ columns = {} if columns is None else columns df = pd.read_csv(*args, index_col=index_col, **kwargs) df.rename(columns=columns, inplace=True) df["tracked_at"] = pd.to_datetime(df["tracked_at"]) df[geom_col] = gpd.points_from_xy(df["longitude"], df["latitude"]) df.drop(columns=["longitude", "latitude"], inplace=True) return read_positionfixes_gpd(df, geom_col=geom_col, crs=crs, tz=tz)
[docs]def write_positionfixes_csv(positionfixes, filename, *args, **kwargs): """ Write positionfixes to csv file. Wraps the pandas to_csv function, but strips the geometry column and stores the longitude and latitude in respective columns. Parameters ---------- positionfixes : GeoDataFrame (as trackintel positionfixes) The positionfixes to store to the CSV file. filename : str The file to write to. args Additional arguments passed to pd.DataFrame.to_csv(). kwargs Additional keyword arguments passed to pd.DataFrame.to_csv(). Notes ----- "longitude" and "latitude" is extracted from the geometry column and the orignal geometry column is dropped. Examples --------- >>> ps.as_positionfixes.to_csv("export_pfs.csv") """ gdf = positionfixes.copy() gdf["longitude"] = positionfixes.geometry.x gdf["latitude"] = positionfixes.geometry.y df = gdf.drop(columns=[gdf.geometry.name]) df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none def read_triplegs_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs): """ Read triplegs from csv file. Wraps the pandas read_csv function, extracts a WKT for the tripleg geometry (LINESTRING) and builds a geopandas GeoDataFrame. This also validates that the ingested data conforms to the trackintel understanding of triplegs (see :doc:`/modules/model`). Parameters ---------- args Arguments as passed to pd.read_csv(). columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. The required columns for this function include: "user_id", "started_at", "finished_at" and "geom". tz : str, optional pytz compatible timezone string. If None UTC is assumed. index_col : str, optional Column name to be used as index. If None the default index is assumed as unique identifier. geom_col : str, default "geom" Name of the column containing the geometry as WKT. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg “EPSG:4326”) or a WKT string. kwargs Additional keyword arguments passed to pd.read_csv(). Returns ------- tpls : GeoDataFrame (as trackintel triplegs) A GeoDataFrame containing the triplegs. Examples -------- >>> trackintel.read_triplegs_csv('data.csv') >>> trackintel.read_triplegs_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'}) user_id started_at finished_at geom id 0 1 2015-11-27 08:00:00+00:00 2015-11-27 10:00:00+00:00 LINESTRING (8.54878 47.37652, 8.52770 47.39935... 1 1 2015-11-27 12:00:00+00:00 2015-11-27 14:00:00+00:00 LINESTRING (8.56340 47.95600, 8.64560 47.23345... """ columns = {} if columns is None else columns df = pd.read_csv(*args, index_col=index_col, **kwargs) df.rename(columns=columns, inplace=True) df["started_at"] = pd.to_datetime(df["started_at"]) df["finished_at"] = pd.to_datetime(df["finished_at"]) df[geom_col] = gpd.GeoSeries.from_wkt(df[geom_col]) return read_triplegs_gpd(df, geom_col=geom_col, crs=crs, tz=tz, mapper=columns)
[docs]def write_triplegs_csv(triplegs, filename, *args, **kwargs): """ Write triplegs to csv file. Wraps the pandas to_csv function, but transforms the geometry into WKT before writing. Parameters ---------- triplegs : GeoDataFrame (as trackintel triplegs) The triplegs to store to the CSV file. filename : str The file to write to. args Additional arguments passed to pd.DataFrame.to_csv(). kwargs Additional keyword arguments passed to pd.DataFrame.to_csv(). Examples -------- >>> tpls.as_triplegs.to_csv("export_tpls.csv") """ geo_col_name = triplegs.geometry.name df = pd.DataFrame(triplegs, copy=True) df[geo_col_name] = triplegs.geometry.apply(wkt.dumps) df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none def read_staypoints_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs): """ Read staypoints from csv file. Wraps the pandas read_csv function, extracts a WKT for the staypoint geometry (POINT) and builds a geopandas GeoDataFrame. This also validates that the ingested data conforms to the trackintel understanding of staypoints (see :doc:`/modules/model`). Parameters ---------- args Arguments as passed to pd.read_csv(). columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. The required columns for this function include: "user_id", "started_at", "finished_at" and "geom". tz : str, optional pytz compatible timezone string. If None UTC is assumed. index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. geom_col : str, default "geom" Name of the column containing the geometry as WKT. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg “EPSG:4326”) or a WKT string. kwargs Additional keyword arguments passed to pd.read_csv(). Returns ------- sp : GeoDataFrame (as trackintel staypoints) A GeoDataFrame containing the staypoints. Examples -------- >>> trackintel.read_staypoints_csv('data.csv') >>> trackintel.read_staypoints_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'}) user_id started_at finished_at geom id 0 1 2015-11-27 08:00:00+00:00 2015-11-27 10:00:00+00:00 POINT (8.52822 47.39519) 1 1 2015-11-27 12:00:00+00:00 2015-11-27 14:00:00+00:00 POINT (8.54340 47.95600) """ columns = {} if columns is None else columns df = pd.read_csv(*args, index_col=index_col, **kwargs) df.rename(columns=columns, inplace=True) df["started_at"] = pd.to_datetime(df["started_at"]) df["finished_at"] = pd.to_datetime(df["finished_at"]) df[geom_col] = gpd.GeoSeries.from_wkt(df[geom_col]) return read_staypoints_gpd(df, geom_col=geom_col, crs=crs, tz=tz)
[docs]def write_staypoints_csv(staypoints, filename, *args, **kwargs): """ Write staypoints to csv file. Wraps the pandas to_csv function, but transforms the geometry into WKT before writing. Parameters ---------- staypoints : GeoDataFrame (as trackintel staypoints) The staypoints to store to the CSV file. filename : str The file to write to. args Additional arguments passed to pd.DataFrame.to_csv(). kwargs Additional keyword arguments passed to pd.DataFrame.to_csv(). Examples -------- >>> tpls.as_triplegs.to_csv("export_tpls.csv") """ geo_col_name = staypoints.geometry.name df = pd.DataFrame(staypoints, copy=True) df[geo_col_name] = staypoints.geometry.apply(wkt.dumps) df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none def read_locations_csv(*args, columns=None, index_col=None, crs=None, **kwargs): """ Read locations from csv file. Wraps the pandas read_csv function, extracts a WKT for the location center (POINT) (and extent (POLYGON)) and builds a geopandas GeoDataFrame. This also validates that the ingested data conforms to the trackintel understanding of locations (see :doc:`/modules/model`). Parameters ---------- args Arguments as passed to pd.read_csv(). columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. The required columns for this function include: "user_id" and "center". index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg “EPSG:4326”) or a WKT string. kwargs Additional keyword arguments passed to pd.read_csv(). Returns ------- locs : GeoDataFrame (as trackintel locations) A GeoDataFrame containing the locations. Examples -------- >>> trackintel.read_locations_csv('data.csv') >>> trackintel.read_locations_csv('data.csv', columns={'User':'user_id'}) user_id center extent id 0 1 POINT (8.54878 47.37652) POLYGON ((8.548779487999999 47.37651505, 8.527... 1 1 POINT (8.56340 47.95600) POLYGON ((8.5634 47.956, 8.6456 47.23345, 8.45... """ columns = {} if columns is None else columns df = pd.read_csv(*args, index_col=index_col, **kwargs) df.rename(columns=columns, inplace=True) df["center"] = gpd.GeoSeries.from_wkt(df["center"]) if "extent" in df.columns: df["extent"] = gpd.GeoSeries.from_wkt(df["extent"]) return read_locations_gpd(df, crs=crs)
[docs]def write_locations_csv(locations, filename, *args, **kwargs): """ Write locations to csv file. Wraps the pandas to_csv function, but transforms the center (and extent) into WKT before writing. Parameters ---------- locations : GeoDataFrame (as trackintel locations) The locations to store to the CSV file. filename : str The file to write to. args Additional arguments passed to pd.DataFrame.to_csv(). kwargs Additional keyword arguments passed to pd.DataFrame.to_csv(). Examples -------- >>> locs.as_locations.to_csv("export_locs.csv") """ df = pd.DataFrame(locations, copy=True) df["center"] = locations["center"].apply(wkt.dumps) if "extent" in df.columns: df["extent"] = locations["extent"].apply(wkt.dumps) df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none def read_trips_csv(*args, columns=None, tz=None, index_col=None, geom_col=None, crs=None, **kwargs): """ Read trips from csv file. Wraps the pandas read_csv function and extracts proper datetimes. This also validates that the ingested data conforms to the trackintel understanding of trips (see :doc:`/modules/model`). Parameters ---------- args Arguments as passed to pd.read_csv(). columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. The required columns for this function include: "user_id", "started_at", "finished_at", "origin_staypoint_id" and "destination_staypoint_id". An optional column is "geom" of type MultiPoint, containing start and destination points of the trip tz : str, optional pytz compatible timezone string. If None UTC is assumed. index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. geom_col : str, default None Name of the column containing the geometry as WKT. If None no geometry gets added. crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string (eg “EPSG:4326”) or a WKT string. Ignored if geom_col is None. kwargs Additional keyword arguments passed to pd.read_csv(). Returns ------- trips : (Geo)DataFrame (as trackintel trips) A DataFrame containing the trips. GeoDataFrame if geometry column exists. Notes ----- Geometry is not mandatory for trackintel trips. Examples -------- >>> trackintel.read_trips_csv('data.csv') >>> trackintel.read_trips_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'}) user_id started_at finished_at origin_staypoint_id destination_staypoint_id\ id 0 1 2015-11-27 08:00:00+00:00 2015-11-27 08:15:00+00:00 2 5 1 1 2015-11-27 08:20:22+00:00 2015-11-27 08:35:22+00:00 5 3 geom id 0 MULTIPOINT (116.31842 39.98470, 116.29873 39.999729) 1 MULTIPOINT (116.29873 39.98402, 116.32480 40.009269) """ columns = {} if columns is None else columns trips = pd.read_csv(*args, index_col=index_col, **kwargs) trips.rename(columns=columns, inplace=True) trips["started_at"] = pd.to_datetime(trips["started_at"]) trips["finished_at"] = pd.to_datetime(trips["finished_at"]) if geom_col is not None: trips[geom_col] = gpd.GeoSeries.from_wkt(trips[geom_col]) return read_trips_gpd(trips, geom_col=geom_col, crs=crs, tz=tz)
[docs]def write_trips_csv(trips, filename, *args, **kwargs): """ Write trips to csv file. Wraps the pandas to_csv function. Geometry get transformed to WKT before writing. Parameters ---------- trips : (Geo)DataFrame (as trackintel trips) The trips to store to the CSV file. filename : str The file to write to. args Additional arguments passed to pd.DataFrame.to_csv(). kwargs Additional keyword arguments passed to pd.DataFrame.to_csv(). Examples -------- >>> trips.as_trips.to_csv("export_trips.csv") """ df = trips.copy() if isinstance(df, GeoDataFrame): geom_col_name = df.geometry.name df[geom_col_name] = df[geom_col_name].to_wkt() df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none def read_tours_csv(*args, columns=None, index_col=None, tz=None, **kwargs): """ Read tours from csv file. Wraps the pandas read_csv function and extracts proper datetimes. This also validates that the ingested data conforms to the trackintel understanding of tours (see :doc:`/modules/model`). Parameters ---------- args Arguments as passed to pd.read_csv(). columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. tz : str, optional pytz compatible timezone string. If None UTC is assumed. kwargs Additional keyword arguments passed to pd.read_csv(). Returns ------- tours : DataFrame (as trackintel tours) A DataFrame containing the tours. Examples -------- >>> trackintel.read_tours_csv('data.csv', columns={'uuid':'user_id'}) """ columns = {} if columns is None else columns tours = pd.read_csv(*args, index_col=index_col, **kwargs) tours.rename(columns=columns, inplace=True) tours["started_at"] = pd.to_datetime(tours["started_at"]) tours["finished_at"] = pd.to_datetime(tours["finished_at"]) return read_tours_gpd(tours, tz=tz)
[docs]def write_tours_csv(tours, filename, *args, **kwargs): """ Write tours to csv file. Wraps the pandas to_csv function. Parameters ---------- tours : DataFrame (as trackintel tours) The tours to store to the CSV file. filename : str The file to write to. args Additional arguments passed to pd.DataFrame.to_csv(). kwargs Additional keyword arguments passed to pd.DataFrame.to_csv(). Examples -------- >>> tours.as_tours.to_csv("export_tours.csv") """ tours.to_csv(filename, index=True, *args, **kwargs)