defmodule Obr do @moduledoc """ One billion row challenge. https://github.com/gunnarmorling/1brc """ require Explorer.DataFrame, as: DF @doc """ Read the provided file and perform calculations. """ def process_file(filepath) do filepath |> DF.from_csv!( header: false, delimiter: ";", eol_delimiter: "\n", dtypes: [column_1: :string, column_2: :float] ) |> process_dataframe() |> format_results() end @doc """ Process the dataframe and return a list of tuples containing the station name, min, mean and max temperatures". """ def process_dataframe(df) do df |> DF.group_by("column_1") |> DF.summarise(min: min(column_2), mean: mean(column_2), max: max(column_2)) |> DF.sort_by(column_1) |> DF.select(["column_1", "min", "mean", "max"]) |> DF.to_rows_stream() |> Enum.map(fn row -> {row["column_1"], row["min"], row["mean"], row["max"]} end) end def format_results(rows) do str = rows |> Enum.map(&format_row/1) |> Enum.join(", ") "{#{str}}" end def format_row({name, min, mean, max}) do temperatures = [min, mean, max] |> Enum.map(&Float.ceil(&1, 1)) |> Enum.join("/") "#{name}=#{temperatures}" end end