x.dataframe #

DataFrame

x.dataframe provides a small experimental tabular data API. It is intended as a foundation for data analysis workflows that need DataFrame-style operations without leaving V.

The module stores cells as strings and provides numeric helpers on Series for common analysis tasks.

import x.dataframe

const prices = 'symbol,price,qty
AAPL,189.5,2
MSFT,420.25,3
'

fn main() {
    df := dataframe.from_csv(prices, dataframe.CsvConfig{})!
    println(df.shape())

    price := df.column('price')!
    println(price.mean()!)

    liquid := df.filter(fn (row dataframe.Row) bool {
        return row.values['qty'].int() >= 3
    })
    println(liquid.rows)
}

Features

Load tabular data from CSV strings or files.
Access cells, rows, and columns by name.
Select columns and filter rows.
Sort by string or numeric column values.
Count distinct column values.
Calculate sum, mean, min, max, median, stddev, and describe summaries for numeric columns.

The API is experimental and may change while the module is under x.

fn empty #

fn empty(columns []string) !DataFrame

empty creates an empty DataFrame with the given columns.

fn from_columns #

fn from_columns(columns map[string][]string) !DataFrame

from_columns creates a DataFrame from a map of columns.

fn from_csv #

fn from_csv(text string, cfg CsvConfig) !DataFrame

from_csv creates a DataFrame from CSV text.

fn from_series #

fn from_series(series []Series) !DataFrame

from_series creates a DataFrame from named columns.

fn new #

fn new(columns []string, rows [][]string) !DataFrame

new creates a DataFrame from column names and rows.

fn read_csv #

fn read_csv(path string, cfg CsvConfig) !DataFrame

read_csv creates a DataFrame from a CSV file.

fn SortOrder.from #

fn SortOrder.from[W](input W) !SortOrder

fn (x.dataframe.Row) get #

fn (row Row) get(name string) !string

get returns a value from the Row by column name.

enum SortOrder #

enum SortOrder {
	asc
	desc
}

SortOrder controls the direction used by sorting helpers.

struct CsvConfig #

@[params]

struct CsvConfig {
pub:
	has_header   bool = true
	separator    u8   = `,`
	comment      u8   = `#`
	default_cell string
	empty_cell   string
	end_line_len int = csv.endline_cr_len
	quote        u8  = `"`
	quote_remove bool
}

CsvConfig configures CSV loading for a DataFrame.

struct DataFrame #

struct DataFrame {
	index map[string]int
pub:
	columns []string
	rows    [][]string
}

DataFrame stores rectangular tabular data as string cells.

fn (DataFrame) height #

fn (df DataFrame) height() int

height returns the number of rows.

fn (DataFrame) width #

fn (df DataFrame) width() int

width returns the number of columns.

fn (DataFrame) shape #

fn (df DataFrame) shape() (int, int)

shape returns the row and column count.

fn (DataFrame) cell #

fn (df DataFrame) cell(row_index int, column string) !string

cell returns a single cell by row index and column name.

fn (DataFrame) row #

fn (df DataFrame) row(row_index int) !Row

row returns a named row by index.

fn (DataFrame) column #

fn (df DataFrame) column(name string) !Series

column returns a Series by column name.

fn (DataFrame) select #

fn (df DataFrame) select(names []string) !DataFrame

select returns a DataFrame with only the requested columns.

fn (DataFrame) head #

fn (df DataFrame) head(n int) DataFrame

head returns the first n rows.

fn (DataFrame) tail #

fn (df DataFrame) tail(n int) DataFrame

tail returns the last n rows.

fn (DataFrame) filter #

fn (df DataFrame) filter(predicate fn (Row) bool) DataFrame

filter returns rows accepted by the predicate.

fn (DataFrame) sort_by #

fn (df DataFrame) sort_by(name string, order SortOrder) !DataFrame

sort_by returns rows sorted lexicographically by column.

fn (DataFrame) sort_by_f64 #

fn (df DataFrame) sort_by_f64(name string, order SortOrder) !DataFrame

sort_by_f64 returns rows sorted numerically by column.

fn (DataFrame) value_counts #

fn (df DataFrame) value_counts(name string) !map[string]int

value_counts counts unique values in a column.

fn (DataFrame) describe #

fn (df DataFrame) describe(name string) !Summary

describe returns numeric statistics for a named column.

struct Row #

struct Row {
pub:
	values map[string]string
}

Row is a named view of a DataFrame row.

struct Series #

struct Series {
pub:
	name   string
	values []string
}

Series is a single named string column with numeric helpers.

fn (Series) len #

fn (s Series) len() int

len returns the number of values in the Series.

fn (Series) get #

fn (s Series) get(index int) !string

get returns a value from the Series by row index.

fn (Series) f64s #

fn (s Series) f64s() ![]f64

f64s converts every value in the Series to f64.

fn (Series) sum #

fn (s Series) sum() !f64

sum returns the numeric sum of the Series.

fn (Series) mean #

fn (s Series) mean() !f64

mean returns the numeric mean of the Series.

fn (Series) min #

fn (s Series) min() !f64

min returns the smallest numeric value in the Series.

fn (Series) max #

fn (s Series) max() !f64

max returns the largest numeric value in the Series.

fn (Series) median #

fn (s Series) median() !f64

median returns the numeric median of the Series.

fn (Series) stddev #

fn (s Series) stddev() !f64

stddev returns the population standard deviation of the Series.

fn (Series) describe #

fn (s Series) describe() !Summary

describe returns basic numeric statistics for the Series.

struct Summary #

struct Summary {
pub:
	count  int
	sum    f64
	mean   f64
	min    f64
	max    f64
	median f64
	stddev f64
}

Summary contains basic numeric statistics for a Series.