Skip to content

data_report

measurement.dash_apps.data_report ¤

Attributes¤

MAX_POINTS = 1000 module-attribute ¤

Maximum number of points to display in the graph.

app = DjangoDash('DataReport', external_stylesheets=[dbc.themes.BOOTSTRAP, '/static/styles/dashstyle.css']) module-attribute ¤

filters = html.Div(style={'width': '286px'}, children=[html.Label('Temporality:', style={'font-weight': 'bold'}), dcc.Dropdown(id='temporality_drop', options=[{'label': 'Raw measurement', 'value': 'measurement'}, {'label': 'Validated measurement', 'value': 'validated'}, {'label': 'Hourly', 'value': 'hourly'}, {'label': 'Daily', 'value': 'daily'}, {'label': 'Monthly', 'value': 'monthly'}], value='measurement'), html.Br(), html.Label('Station:', style={'font-weight': 'bold'}), dcc.Dropdown(id='station_drop', options=[], value=None), html.Br(), html.Label('Variable:', style={'font-weight': 'bold'}), dcc.Dropdown(id='variable_drop', options=[], value=None), html.Br(), html.Label('Date Range:', style={'font-weight': 'bold'}), dcc.DatePickerRange(id='date_range_picker', display_format='YYYY-MM-DD', start_date=None, end_date=None), html.Br(), html.Div(id='csv_div', style={'margin-top': '30px'})]) module-attribute ¤

Classes¤

Variable ¤

Bases: PermissionsBase

A variable with a physical meaning.

Such as precipitation, wind speed, wind direction, soil moisture, including the associated unit. It also includes metadata to help identify what is a reasonable value for the data, to flag outliers and to help with the validation process.

The nature of the variable can be one of the following:

  • sum: Cumulative value over a period of time.
  • average: Average value over a period of time.
  • value: One-off value.

Attributes:

Name Type Description
variable_id AutoField

Primary key.

variable_code CharField

Code of the variable, eg. airtemperature.

name CharField

Human-readable name of the variable, eg. Air temperature.

unit ForeignKey

Unit of the variable.

maximum DecimalField

Maximum value allowed for the variable.

minimum DecimalField

Minimum value allowed for the variable.

diff_error DecimalField

If two sequential values in the time-series data of this variable differ by more than this value, the validation process can mark this with an error flag.

outlier_limit DecimalField

The statistical deviation for defining outliers, in times the standard deviation (sigma).

null_limit DecimalField

The max % of null values (missing, caused by e.g. equipment malfunction) allowed for hourly, daily, monthly data. Cumulative values are not deemed trustworthy if the number of missing values in a given period is greater than the null_limit.

nature CharField

Nature of the variable, eg. if it represents a one-off value, the average over a period of time or the cumulative value over a period

Attributes¤
is_cumulative: bool property ¤

Return True if the nature of the variable is sum.

Functions¤
__str__() ¤

Return the string representation of the object.

Source code in variable/models.py
165
166
167
def __str__(self) -> str:
    """Return the string representation of the object."""
    return str(self.name)
clean() ¤

Validate the model fields.

Source code in variable/models.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def clean(self) -> None:
    """Validate the model fields."""
    if self.maximum < self.minimum:
        raise ValidationError(
            {
                "maximum": "The maximum value must be greater than the minimum "
                "value."
            }
        )
    if not self.variable_code.isidentifier():
        raise ValidationError(
            {
                "variable_code": "The variable code must be a valid Python "
                "identifier. Only letters, numbers and underscores are allowed, and"
                " it cannot start with a number."
            }
        )
    return super().clean()
get_absolute_url() ¤

Get the absolute URL of the object.

Source code in variable/models.py
169
170
171
def get_absolute_url(self) -> str:
    """Get the absolute URL of the object."""
    return reverse("variable:variable_detail", kwargs={"pk": self.pk})

Functions¤

create_empty_plot() ¤

Creates empty plot

Returns:

Type Description
scatter

px.Scatter: Plot

Source code in measurement/dash_apps/plots.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def create_empty_plot() -> px.scatter:
    """Creates empty plot

    Returns:
        px.Scatter: Plot
    """
    fig = px.scatter(title="No data to plot")
    fig.update_layout(
        autosize=True,
        margin=dict(
            l=50,
            r=20,
            b=0,
            t=50,
        ),
        title_font=dict(
            size=14,
        ),
    )
    return fig

create_report_plot(data, variable_name, station_code, agg='') ¤

Creates plot for Report app

Parameters:

Name Type Description Default
data DataFrame

Data

required
variable_name str

Variable name

required
station_code str

Station code

required
agg str

Aggregation level. Defaults to "".

''

Returns:

Type Description
Figure

go.Figure: Plot

Source code in measurement/dash_apps/plots.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def create_report_plot(
    data: pd.DataFrame,
    variable_name: str,
    station_code: str,
    agg: str = "",
) -> go.Figure:
    """Creates plot for Report app

    Args:
        data (pd.DataFrame): Data
        variable_name (str): Variable name
        station_code (str): Station code
        agg (str, optional): Aggregation level. Defaults to "".

    Returns:
        go.Figure: Plot
    """

    fig = px.scatter(
        data,
        x="time",
        y=["value", "minimum", "maximum"],
        title=f"{station_code} - {variable_name}" + agg,
        labels={
            "time": "Date",
        },
    )

    fig.for_each_trace(
        lambda trace: trace.update(name=trace.name.title()),
    )
    fig.update_traces(marker=dict(size=3))
    fig.update_layout(
        legend=dict(
            title=dict(text="", font=dict(size=12)),
            x=1,
            y=1,
            xanchor="auto",
            yanchor="auto",
        ),
        autosize=True,
        margin=dict(
            l=50,
            r=20,
            b=0,
            t=50,
        ),
        yaxis_title=f"{variable_name}",
        title_font=dict(
            size=14,
        ),
    )

    return fig

download_csv_report(n_clicks, temporality, station, variable, start_time, end_time) ¤

Source code in measurement/dash_apps/data_report.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
@app.callback(
    [
        Output("download_csv", "data"),
        Output("csv_alert_div", "children"),
    ],
    Input("csv_button", "n_clicks"),
    [
        State("temporality_drop", "value"),
        State("station_drop", "value"),
        State("variable_drop", "value"),
        State("date_range_picker", "start_date"),
        State("date_range_picker", "end_date"),
    ],
    prevent_initial_call=True,
)
def download_csv_report(
    n_clicks: int,
    temporality: str,
    station: str,
    variable: str,
    start_time: str,
    end_time: str,
):
    if n_clicks and n_clicks > 0:
        try:
            file = (
                get_report_data_from_db(
                    station=station,
                    variable=variable,
                    start_time=start_time,
                    end_time=end_time,
                    report_type=temporality,
                    whole_months=False,
                )
                .drop(columns=["station", "variable", "data_import_id"])
                .dropna(axis=1, how="all")
                .dropna(axis=0)
                .to_csv(index=False)
            )
            return (
                dict(
                    content=file,
                    filename=f"{station}_{variable}_{temporality}_{start_time}-{end_time}.csv",
                ),
                [],
            )
        except Exception as e:
            alert = dbc.Alert(f"Could not export data to CSV: {e}", color="warning")
            return None, [alert]

    return None, []

get_aggregation_level(timeseries, aggregate=False) ¤

Calculates the aggregation level based on the timeseries separation.

Parameters:

Name Type Description Default
timeseries Series

Time data to be aggregated.

required
aggregate bool

Flag indicating if there should be aggregation

False
Return

String indicating the aggregation level as " - LEVEL UNITS aggregation" or an empty string if no aggregation is required.

Source code in measurement/dash_apps/plots.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def get_aggregation_level(timeseries: pd.Series, aggregate: bool = False) -> str:
    """Calculates the aggregation level based on the timeseries separation.

    Args:
        timeseries: Time data to be aggregated.
        aggregate: Flag indicating if there should be aggregation

    Return:
        String indicating the aggregation level as " - LEVEL UNITS aggregation" or an
        empty string if no aggregation is required.
    """
    if not aggregate:
        return ""

    aggregation = timeseries.diff().dt.seconds.median() / 60
    unit = "minutes"
    if aggregation > 60:
        aggregation = aggregation / 60
        unit = "hours"
        if aggregation > 24:
            aggregation = aggregation / 24
            unit = "days"
    return f" - {aggregation:.1f} {unit} aggregation"

get_date_range(station, variable) ¤

Get the date range covered by a chosen station and variable.

Parameters:

Name Type Description Default
station str

Code for the chosen station

required
variable str

Code for the chosen variable

required

Returns:

Type Description
tuple[str, str]

tuple[str, str]: Start date, end date

Source code in measurement/filters.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def get_date_range(station: str, variable: str) -> tuple[str, str]:
    """Get the date range covered by a chosen station and variable.

    Args:
        station (str): Code for the chosen station
        variable (str): Code for the chosen variable

    Returns:
        tuple[str, str]: Start date, end date
    """
    filter_vals = Measurement.objects.filter(
        station__station_code=station,
        variable__variable_code=variable,
    ).aggregate(
        first_date=Min("time"),
        last_date=Max("time"),
    )

    first_date = to_local_time(filter_vals["first_date"]).strftime("%Y-%m-%d")
    last_date = to_local_time(filter_vals["last_date"]).strftime("%Y-%m-%d")
    return first_date, last_date

get_report_data_from_db(station, variable, start_time, end_time, report_type, whole_months=True) cached ¤

Retrieves the report data from the database.

Time is set to the station timezone and the time range is inclusive of both start and end times.

Parameters:

Name Type Description Default
station str

Station of interest.

required
variable str

Variable of interest.

required
start_time str

Start time.

required
end_time str

End time.

required
report_type str

Type of report to retrieve.

required
whole_months bool

Whether to cover whole months or not.

True

Returns:

Type Description
DataFrame

A dataframe with the report data.

Source code in measurement/reporting.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
@lru_cache(1)
def get_report_data_from_db(
    station: str,
    variable: str,
    start_time: str,
    end_time: str,
    report_type: str,
    whole_months: bool = True,
) -> pd.DataFrame:
    """Retrieves the report data from the database.

    Time is set to the station timezone and the time range is inclusive of both
    start and end times.

    Args:
        station: Station of interest.
        variable: Variable of interest.
        start_time: Start time.
        end_time: End time.
        report_type: Type of report to retrieve.
        whole_months: Whether to cover whole months or not.

    Returns:
        A dataframe with the report data.
    """
    start_time_, end_time_ = reformat_dates(start_time, end_time, whole_months)

    if report_type == "measurement":
        data = pd.DataFrame.from_records(
            Measurement.objects.filter(
                station__station_code=station,
                variable__variable_code=variable,
                time__date__range=(start_time_.date(), end_time_.date()),
            ).values()
        )
        raw_cols = [col for col in data.columns if col.startswith("raw_")]
        normal = [col.strip("raw_") for col in raw_cols]
        data = data.drop(columns=normal).rename(columns=dict(zip(raw_cols, normal)))

    elif report_type == "validated":
        data = pd.DataFrame.from_records(
            Measurement.objects.filter(
                station__station_code=station,
                variable__variable_code=variable,
                time__date__range=(start_time_.date(), end_time_.date()),
                is_validated=True,
                is_active=True,
            ).values()
        )
        raw_cols = [col for col in data.columns if col.startswith("raw_")]
        data = data.drop(columns=raw_cols)

    else:
        data = pd.DataFrame.from_records(
            Report.objects.filter(
                station__station_code=station,
                variable__variable_code=variable,
                time__date__range=(start_time_.date(), end_time_.date()),
                report_type=report_type,
            ).values()
        )

    data = data.rename(columns={"station_id": "station", "variable_id": "variable"})

    if data.empty:
        return data

    tz = timezone.get_current_timezone()
    data["time"] = data["time"].dt.tz_convert(tz)
    return data.sort_values("time")

get_station_options(station_codes) ¤

Get valid station options and default value based on permissions and data availability.

Parameters:

Name Type Description Default
station_codes list[str]

List of station codes based on permissions

required

Returns:

Type Description
tuple[list[dict[str, str]], str | None]

tuple[list[dict], str]: Options for the station dropdown, default value

Source code in measurement/filters.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def get_station_options(
    station_codes: list[str],
) -> tuple[list[dict[str, str]], str | None]:
    """Get valid station options and default value based on permissions and data
    availability.

    Args:
        station_codes (list[str]): List of station codes based on permissions

    Returns:
        tuple[list[dict], str]: Options for the station dropdown, default value
    """
    stations_with_measurements = Station.objects.filter(
        ~Q(variables=""), station_code__in=station_codes
    ).values_list("station_code", flat=True)

    station_options = [
        {"label": station_code, "value": station_code}
        for station_code in stations_with_measurements
    ]
    station_value = station_options[0]["value"] if station_options else None
    return station_options, station_value

get_variable_options(station) ¤

Get valid variable options and default value based on the chosen station.

Parameters:

Name Type Description Default
station str

Code for the chosen station

required

Returns:

Type Description
tuple[list[dict[str, str]], str | None]

tuple[list[dict], str]: Options for the variable dropdown, default value

Source code in measurement/filters.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def get_variable_options(station: str) -> tuple[list[dict[str, str]], str | None]:
    """Get valid variable options and default value based on the chosen station.

    Args:
        station (str): Code for the chosen station

    Returns:
        tuple[list[dict], str]: Options for the variable dropdown, default value
    """
    variable_codes = Station.objects.get(station_code=station).variables_list
    variable_dicts = Variable.objects.filter(variable_code__in=variable_codes).values(
        "name", "variable_code"
    )

    variable_options = [
        {
            "label": variable["name"],
            "value": variable["variable_code"],
        }
        for variable in variable_dicts
    ]

    variable_value = variable_options[0]["value"] if variable_options else None
    return variable_options, variable_value

populate_stations_dropdown(station_codes) ¤

Populate the station dropdown based on the list of station codes.

Source code in measurement/dash_apps/data_report.py
267
268
269
270
271
272
273
274
275
@app.callback(
    [Output("station_drop", "options"), Output("station_drop", "value")],
    Input("stations_list", "children"),
)
def populate_stations_dropdown(
    station_codes: list[str],
) -> tuple[list[dict[str, str]], str | None]:
    """Populate the station dropdown based on the list of station codes."""
    return get_station_options(station_codes)

populate_variable_dropdown(chosen_station) ¤

Populate the variable dropdown based on the chosen station.

Source code in measurement/dash_apps/data_report.py
278
279
280
281
282
283
284
285
286
@app.callback(
    [Output("variable_drop", "options"), Output("variable_drop", "value")],
    Input("station_drop", "value"),
)
def populate_variable_dropdown(
    chosen_station: str,
) -> tuple[list[dict[str, str]], str | None]:
    """Populate the variable dropdown based on the chosen station."""
    return get_variable_options(chosen_station)

set_date_range(chosen_station, chosen_variable) ¤

Set the default date range based on the chosen station and variable.

Source code in measurement/dash_apps/data_report.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
@app.callback(
    [
        Output("date_range_picker", "start_date"),
        Output("date_range_picker", "end_date"),
    ],
    [
        Input("station_drop", "value"),
        Input("variable_drop", "value"),
    ],
)
def set_date_range(
    chosen_station, chosen_variable
) -> tuple[
    str,
    str,
]:
    """Set the default date range based on the chosen station and variable."""
    return get_date_range(chosen_station, chosen_variable)

update_alert(temporality, station, variable, start_time, end_time, figure) ¤

Source code in measurement/dash_apps/data_report.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
@app.callback(
    [
        Output("data_alert_div", "children"),
        Output("csv_div", "children"),
    ],
    [
        Input("temporality_drop", "value"),
        Input("station_drop", "value"),
        Input("variable_drop", "value"),
        Input("date_range_picker", "start_date"),
        Input("date_range_picker", "end_date"),
    ],
    State("data_report_graph", "figure"),
)
def update_alert(
    temporality: str,
    station: str,
    variable: str,
    start_time: str,
    end_time: str,
    figure: go.Figure,
):
    if figure["layout"]["title"]["text"] == "Data not found":
        alert = dbc.Alert(
            "No data was found with the selected criteria", color="warning"
        )
        return [alert], []
    else:
        download = html.Button("Download CSV", id="csv_button")
        display = html.Button(
            "Display data",
            id="display_button",
            style={"margin-left": "10px"},
            n_clicks=0,
        )
        return [], [download, display]

update_graph(relayout_data, n_clicks, temporality, station, variable, start_time, end_time, figure, callback_context) ¤

Source code in measurement/dash_apps/data_report.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
@app.callback(
    Output("data_report_graph", "figure"),
    [
        Input("data_report_graph", "relayoutData"),
        Input("display_button", "n_clicks"),
    ],
    [
        State("temporality_drop", "value"),
        State("station_drop", "value"),
        State("variable_drop", "value"),
        State("date_range_picker", "start_date"),
        State("date_range_picker", "end_date"),
        State("data_report_graph", "figure"),
    ],
)
def update_graph(
    relayout_data: dict,
    n_clicks: int,
    temporality: str,
    station: str,
    variable: str,
    start_time: str,
    end_time: str,
    figure: go.Figure,
    callback_context,
) -> go.Figure:
    ctx = callback_context
    triggered_id = ctx.triggered[0]["prop_id"].split(".")[0] if ctx.triggered else ""

    if not n_clicks:
        # After the first load, n_clicks is always > 0, so zooming works
        return figure

    # This is cached, so it's not a big deal to call it multiple times
    data = get_report_data_from_db(
        station=station,
        variable=variable,
        start_time=start_time,
        end_time=end_time,
        report_type=temporality,
        whole_months=False,
    )
    if data.empty:
        return create_empty_plot()

    if triggered_id == "data_report_graph" and "xaxis.range[0]" in relayout_data:
        start = relayout_data["xaxis.range[0]"]
        end = relayout_data["xaxis.range[1]"]
        data = data[(data["time"] >= start) & (data["time"] <= end)]

    try:
        every = max(1, len(data) // MAX_POINTS)
        resampled = data.iloc[::every]
        agg = get_aggregation_level(resampled["time"], every > 1)

        plot = create_report_plot(
            data=resampled,
            variable_name=Variable.objects.get(variable_code=variable).name,
            station_code=station,
            agg=agg,
        )
        if "xaxis.range[0]" in relayout_data:
            plot["layout"]["xaxis"]["range"] = [
                relayout_data["xaxis.range[0]"],
                relayout_data["xaxis.range[1]"],
            ]
        return plot

    except Exception as e:
        getLogger().error(e)
        return create_empty_plot()