import warnings
from functools import wraps
from inspect import signature
import geopandas as gpd
import pandas as pd
from geopandas.geodataframe import GeoDataFrame
from shapely import wkt
from trackintel.io.from_geopandas import (
read_locations_gpd,
read_positionfixes_gpd,
read_staypoints_gpd,
read_tours_gpd,
read_triplegs_gpd,
read_trips_gpd,
)
def _index_warning_default_none(func):
"""Decorator function that warns if index_col None is not set explicit."""
@wraps(func) # copy all metadata
def wrapper(*args, **kwargs):
bound_values = signature(func).bind(*args, **kwargs) # binds only available args and kwargs
if "index_col" not in bound_values.arguments:
warnings.warn(
"Assuming default index as unique identifier. "
"Pass 'index_col=None' as explicit argument to avoid a warning when reading csv files."
)
return func(*args, **kwargs)
return wrapper
[docs]@_index_warning_default_none
def read_positionfixes_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs):
"""
Read positionfixes from csv file.
Wraps the pandas read_csv function, extracts longitude and latitude and
builds a geopandas GeoDataFrame (POINT). This also validates that the ingested data
conforms to the trackintel understanding of positionfixes (see
:doc:`/modules/model`).
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "tracked_at", "latitude"
and "longitude".
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default "geom"
Name of the column containing the geometry.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg 'EPSG:4326') or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
pfs : GeoDataFrame (as trackintel positionfixes)
A GeoDataFrame containing the positionfixes.
Notes
-----
Note that this function is primarily useful if data is available in a
longitude/latitude format. If your data already contains a WKT column,
might be easier to just use the GeoPandas import functions
:func:`trackintel.io.from_geopandas.read_positionfixes_gpd`.
Examples
--------
>>> trackintel.read_positionfixes_csv('data.csv')
>>> trackintel.read_positionfixes_csv('data.csv', columns={'time':'tracked_at', 'User':'user_id'})
tracked_at user_id geom
id
0 2008-10-23 02:53:04+00:00 0 POINT (116.31842 39.98470)
1 2008-10-23 02:53:10+00:00 0 POINT (116.31845 39.98468)
2 2008-10-23 02:53:15+00:00 0 POINT (116.31842 39.98469)
3 2008-10-23 02:53:20+00:00 0 POINT (116.31839 39.98469)
4 2008-10-23 02:53:25+00:00 0 POINT (116.31826 39.98465)
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["tracked_at"] = pd.to_datetime(df["tracked_at"])
df[geom_col] = gpd.points_from_xy(df["longitude"], df["latitude"])
df.drop(columns=["longitude", "latitude"], inplace=True)
return read_positionfixes_gpd(df, geom_col=geom_col, crs=crs, tz=tz)
[docs]def write_positionfixes_csv(positionfixes, filename, *args, **kwargs):
"""
Write positionfixes to csv file.
Wraps the pandas to_csv function, but strips the geometry column and
stores the longitude and latitude in respective columns.
Parameters
----------
positionfixes : GeoDataFrame (as trackintel positionfixes)
The positionfixes to store to the CSV file.
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Notes
-----
"longitude" and "latitude" is extracted from the geometry column and the orignal
geometry column is dropped.
Examples
---------
>>> ps.as_positionfixes.to_csv("export_pfs.csv")
"""
gdf = positionfixes.copy()
gdf["longitude"] = positionfixes.geometry.x
gdf["latitude"] = positionfixes.geometry.y
df = gdf.drop(columns=[gdf.geometry.name])
df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none
def read_triplegs_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs):
"""
Read triplegs from csv file.
Wraps the pandas read_csv function, extracts a WKT for the tripleg geometry (LINESTRING)
and builds a geopandas GeoDataFrame. This also validates that the ingested data
conforms to the trackintel understanding of triplegs (see :doc:`/modules/model`).
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "started_at", "finished_at"
and "geom".
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
Column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default "geom"
Name of the column containing the geometry as WKT.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
tpls : GeoDataFrame (as trackintel triplegs)
A GeoDataFrame containing the triplegs.
Examples
--------
>>> trackintel.read_triplegs_csv('data.csv')
>>> trackintel.read_triplegs_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'})
user_id started_at finished_at geom
id
0 1 2015-11-27 08:00:00+00:00 2015-11-27 10:00:00+00:00 LINESTRING (8.54878 47.37652, 8.52770 47.39935...
1 1 2015-11-27 12:00:00+00:00 2015-11-27 14:00:00+00:00 LINESTRING (8.56340 47.95600, 8.64560 47.23345...
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["started_at"] = pd.to_datetime(df["started_at"])
df["finished_at"] = pd.to_datetime(df["finished_at"])
df[geom_col] = gpd.GeoSeries.from_wkt(df[geom_col])
return read_triplegs_gpd(df, geom_col=geom_col, crs=crs, tz=tz, mapper=columns)
[docs]def write_triplegs_csv(triplegs, filename, *args, **kwargs):
"""
Write triplegs to csv file.
Wraps the pandas to_csv function, but transforms the geometry into WKT
before writing.
Parameters
----------
triplegs : GeoDataFrame (as trackintel triplegs)
The triplegs to store to the CSV file.
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Examples
--------
>>> tpls.as_triplegs.to_csv("export_tpls.csv")
"""
geo_col_name = triplegs.geometry.name
df = pd.DataFrame(triplegs, copy=True)
df[geo_col_name] = triplegs.geometry.apply(wkt.dumps)
df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none
def read_staypoints_csv(*args, columns=None, tz=None, index_col=None, geom_col="geom", crs=None, **kwargs):
"""
Read staypoints from csv file.
Wraps the pandas read_csv function, extracts a WKT for the staypoint
geometry (POINT) and builds a geopandas GeoDataFrame. This also validates that
the ingested data conforms to the trackintel understanding of staypoints
(see :doc:`/modules/model`).
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "started_at", "finished_at"
and "geom".
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default "geom"
Name of the column containing the geometry as WKT.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
sp : GeoDataFrame (as trackintel staypoints)
A GeoDataFrame containing the staypoints.
Examples
--------
>>> trackintel.read_staypoints_csv('data.csv')
>>> trackintel.read_staypoints_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'})
user_id started_at finished_at geom
id
0 1 2015-11-27 08:00:00+00:00 2015-11-27 10:00:00+00:00 POINT (8.52822 47.39519)
1 1 2015-11-27 12:00:00+00:00 2015-11-27 14:00:00+00:00 POINT (8.54340 47.95600)
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["started_at"] = pd.to_datetime(df["started_at"])
df["finished_at"] = pd.to_datetime(df["finished_at"])
df[geom_col] = gpd.GeoSeries.from_wkt(df[geom_col])
return read_staypoints_gpd(df, geom_col=geom_col, crs=crs, tz=tz)
[docs]def write_staypoints_csv(staypoints, filename, *args, **kwargs):
"""
Write staypoints to csv file.
Wraps the pandas to_csv function, but transforms the geometry into WKT
before writing.
Parameters
----------
staypoints : GeoDataFrame (as trackintel staypoints)
The staypoints to store to the CSV file.
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Examples
--------
>>> tpls.as_triplegs.to_csv("export_tpls.csv")
"""
geo_col_name = staypoints.geometry.name
df = pd.DataFrame(staypoints, copy=True)
df[geo_col_name] = staypoints.geometry.apply(wkt.dumps)
df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none
def read_locations_csv(*args, columns=None, index_col=None, crs=None, **kwargs):
"""
Read locations from csv file.
Wraps the pandas read_csv function, extracts a WKT for the location
center (POINT) (and extent (POLYGON)) and builds a geopandas GeoDataFrame. This also
validates that the ingested data conforms to the trackintel understanding
of locations (see :doc:`/modules/model`).
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id" and "center".
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
locs : GeoDataFrame (as trackintel locations)
A GeoDataFrame containing the locations.
Examples
--------
>>> trackintel.read_locations_csv('data.csv')
>>> trackintel.read_locations_csv('data.csv', columns={'User':'user_id'})
user_id center extent
id
0 1 POINT (8.54878 47.37652) POLYGON ((8.548779487999999 47.37651505, 8.527...
1 1 POINT (8.56340 47.95600) POLYGON ((8.5634 47.956, 8.6456 47.23345, 8.45...
"""
columns = {} if columns is None else columns
df = pd.read_csv(*args, index_col=index_col, **kwargs)
df.rename(columns=columns, inplace=True)
df["center"] = gpd.GeoSeries.from_wkt(df["center"])
if "extent" in df.columns:
df["extent"] = gpd.GeoSeries.from_wkt(df["extent"])
return read_locations_gpd(df, crs=crs)
[docs]def write_locations_csv(locations, filename, *args, **kwargs):
"""
Write locations to csv file.
Wraps the pandas to_csv function, but transforms the center (and
extent) into WKT before writing.
Parameters
----------
locations : GeoDataFrame (as trackintel locations)
The locations to store to the CSV file.
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Examples
--------
>>> locs.as_locations.to_csv("export_locs.csv")
"""
df = pd.DataFrame(locations, copy=True)
df["center"] = locations["center"].apply(wkt.dumps)
if "extent" in df.columns:
df["extent"] = locations["extent"].apply(wkt.dumps)
df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none
def read_trips_csv(*args, columns=None, tz=None, index_col=None, geom_col=None, crs=None, **kwargs):
"""
Read trips from csv file.
Wraps the pandas read_csv function and extracts proper datetimes. This also
validates that the ingested data conforms to the trackintel understanding
of trips (see :doc:`/modules/model`).
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
The required columns for this function include: "user_id", "started_at",
"finished_at", "origin_staypoint_id" and "destination_staypoint_id".
An optional column is "geom" of type MultiPoint, containing start and destination points of the trip
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
index_col : str, optional
column name to be used as index. If None the default index is assumed
as unique identifier.
geom_col : str, default None
Name of the column containing the geometry as WKT.
If None no geometry gets added.
crs : pyproj.crs or str, optional
Set coordinate reference system. The value can be anything accepted
by pyproj.CRS.from_user_input(), such as an authority string
(eg “EPSG:4326”) or a WKT string. Ignored if geom_col is None.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
trips : (Geo)DataFrame (as trackintel trips)
A DataFrame containing the trips. GeoDataFrame if geometry column exists.
Notes
-----
Geometry is not mandatory for trackintel trips.
Examples
--------
>>> trackintel.read_trips_csv('data.csv')
>>> trackintel.read_trips_csv('data.csv', columns={'start_time':'started_at', 'User':'user_id'})
user_id started_at finished_at origin_staypoint_id destination_staypoint_id\
id
0 1 2015-11-27 08:00:00+00:00 2015-11-27 08:15:00+00:00 2 5
1 1 2015-11-27 08:20:22+00:00 2015-11-27 08:35:22+00:00 5 3
geom
id
0 MULTIPOINT (116.31842 39.98470, 116.29873 39.999729)
1 MULTIPOINT (116.29873 39.98402, 116.32480 40.009269)
"""
columns = {} if columns is None else columns
trips = pd.read_csv(*args, index_col=index_col, **kwargs)
trips.rename(columns=columns, inplace=True)
trips["started_at"] = pd.to_datetime(trips["started_at"])
trips["finished_at"] = pd.to_datetime(trips["finished_at"])
if geom_col is not None:
trips[geom_col] = gpd.GeoSeries.from_wkt(trips[geom_col])
return read_trips_gpd(trips, geom_col=geom_col, crs=crs, tz=tz)
[docs]def write_trips_csv(trips, filename, *args, **kwargs):
"""
Write trips to csv file.
Wraps the pandas to_csv function.
Geometry get transformed to WKT before writing.
Parameters
----------
trips : (Geo)DataFrame (as trackintel trips)
The trips to store to the CSV file.
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Examples
--------
>>> trips.as_trips.to_csv("export_trips.csv")
"""
df = trips.copy()
if isinstance(df, GeoDataFrame):
geom_col_name = df.geometry.name
df[geom_col_name] = df[geom_col_name].to_wkt()
df.to_csv(filename, index=True, *args, **kwargs)
[docs]@_index_warning_default_none
def read_tours_csv(*args, columns=None, index_col=None, tz=None, **kwargs):
"""
Read tours from csv file.
Wraps the pandas read_csv function and extracts proper datetimes. This also
validates that the ingested data conforms to the trackintel understanding
of tours (see :doc:`/modules/model`).
Parameters
----------
args
Arguments as passed to pd.read_csv().
columns : dict, optional
The column names to rename in the format {'old_name':'trackintel_standard_name'}.
index_col : str, optional
column name to be used as index. If None the default index is assumed as unique identifier.
tz : str, optional
pytz compatible timezone string. If None UTC is assumed.
kwargs
Additional keyword arguments passed to pd.read_csv().
Returns
-------
tours : DataFrame (as trackintel tours)
A DataFrame containing the tours.
Examples
--------
>>> trackintel.read_tours_csv('data.csv', columns={'uuid':'user_id'})
"""
columns = {} if columns is None else columns
tours = pd.read_csv(*args, index_col=index_col, **kwargs)
tours.rename(columns=columns, inplace=True)
tours["started_at"] = pd.to_datetime(tours["started_at"])
tours["finished_at"] = pd.to_datetime(tours["finished_at"])
return read_tours_gpd(tours, tz=tz)
[docs]def write_tours_csv(tours, filename, *args, **kwargs):
"""
Write tours to csv file.
Wraps the pandas to_csv function.
Parameters
----------
tours : DataFrame (as trackintel tours)
The tours to store to the CSV file.
filename : str
The file to write to.
args
Additional arguments passed to pd.DataFrame.to_csv().
kwargs
Additional keyword arguments passed to pd.DataFrame.to_csv().
Examples
--------
>>> tours.as_tours.to_csv("export_tours.csv")
"""
tours.to_csv(filename, index=True, *args, **kwargs)