56 lines
1.2 KiB
Elixir
56 lines
1.2 KiB
Elixir
|
defmodule Obr do
|
||
|
@moduledoc """
|
||
|
One billion row challenge.
|
||
|
|
||
|
https://github.com/gunnarmorling/1brc
|
||
|
"""
|
||
|
|
||
|
require Explorer.DataFrame, as: DF
|
||
|
|
||
|
@doc """
|
||
|
Read the provided file and perform calculations.
|
||
|
"""
|
||
|
def process_file(filepath) do
|
||
|
filepath
|
||
|
|> DF.from_csv!(
|
||
|
header: false,
|
||
|
delimiter: ";",
|
||
|
eol_delimiter: "\n",
|
||
|
dtypes: [column_1: :string, column_2: :float]
|
||
|
)
|
||
|
|> process_dataframe()
|
||
|
|> format_results()
|
||
|
end
|
||
|
|
||
|
@doc """
|
||
|
Process the dataframe and return a list of tuples containing the station name, min, mean and max temperatures".
|
||
|
"""
|
||
|
def process_dataframe(df) do
|
||
|
df
|
||
|
|> DF.group_by("column_1")
|
||
|
|> DF.summarise(min: min(column_2), mean: mean(column_2), max: max(column_2))
|
||
|
|> DF.sort_by(column_1)
|
||
|
|> DF.select(["column_1", "min", "mean", "max"])
|
||
|
|> DF.to_rows_stream()
|
||
|
|> Enum.map(fn row -> {row["column_1"], row["min"], row["mean"], row["max"]} end)
|
||
|
end
|
||
|
|
||
|
def format_results(rows) do
|
||
|
str =
|
||
|
rows
|
||
|
|> Enum.map(&format_row/1)
|
||
|
|> Enum.join(", ")
|
||
|
|
||
|
"{#{str}}"
|
||
|
end
|
||
|
|
||
|
def format_row({name, min, mean, max}) do
|
||
|
temperatures =
|
||
|
[min, mean, max]
|
||
|
|> Enum.map(&Float.ceil(&1, 1))
|
||
|
|> Enum.join("/")
|
||
|
|
||
|
"#{name}=#{temperatures}"
|
||
|
end
|
||
|
end
|