55 lines
1.2 KiB
Elixir
55 lines
1.2 KiB
Elixir
defmodule Obr do
|
|
@moduledoc """
|
|
One billion row challenge.
|
|
|
|
https://github.com/gunnarmorling/1brc
|
|
"""
|
|
|
|
require Explorer.DataFrame, as: DF
|
|
|
|
@doc """
|
|
Read the provided file and perform calculations.
|
|
"""
|
|
def process_file(filepath) do
|
|
filepath
|
|
|> DF.from_csv!(
|
|
header: false,
|
|
delimiter: ";",
|
|
eol_delimiter: "\n",
|
|
dtypes: [column_1: :string, column_2: :float]
|
|
)
|
|
|> process_dataframe()
|
|
|> format_results()
|
|
end
|
|
|
|
@doc """
|
|
Process the dataframe and return a list of tuples containing the station name, min, mean and max temperatures".
|
|
"""
|
|
def process_dataframe(df) do
|
|
df
|
|
|> DF.group_by("column_1")
|
|
|> DF.summarise(min: min(column_2), mean: mean(column_2), max: max(column_2))
|
|
|> DF.sort_by(column_1)
|
|
|> DF.select(["column_1", "min", "mean", "max"])
|
|
|> DF.to_rows_stream()
|
|
|> Enum.map(fn row -> {row["column_1"], row["min"], row["mean"], row["max"]} end)
|
|
end
|
|
|
|
def format_results(rows) do
|
|
str =
|
|
rows
|
|
|> Enum.map(&format_row/1)
|
|
|> Enum.join(", ")
|
|
|
|
"{#{str}}"
|
|
end
|
|
|
|
def format_row({name, min, mean, max}) do
|
|
temperatures =
|
|
[min, mean, max]
|
|
|> Enum.map(&Float.ceil(&1, 1))
|
|
|> Enum.join("/")
|
|
|
|
"#{name}=#{temperatures}"
|
|
end
|
|
end
|