/*===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*               National Center for Biotechnology Information
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's official duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government have not placed any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
*  Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
*/

/*==========================================================================
 * VDB built-in functions, formats and types
 */
version 1;


/*--------------------------------------------------------------------------
 * types
 */

// Row id range consist of row_id_start and row_id_stop
typedef I64 vdb:row_id_range [ 2 ];


/*--------------------------------------------------------------------------
 * typesets
 */
typeset integer_set { I8, U8, I16, U16, I32, U32, I64, U64 };
typeset float_set { F32, F64 };
typeset numeric_set { integer_set, float_set };
typeset text_set { utf8, utf16, utf32, ascii };
typeset text8_set { utf8, ascii };
typeset transpose_set { B8, B16, B32, B64 };


/*--------------------------------------------------------------------------
 * formats
 */
fmtdef merged_fmt;
fmtdef transposed_fmt;
fmtdef delta_averaged_fmt;


/*--------------------------------------------------------------------------
 * constants
 */
const U8 ALIGN_LEFT = 0;
const U8 ALIGN_RIGHT = 1;

/*--------------------------------------------------------------------------
 * functions
 */


/* cast
 *  performs a "C++ reinterpret_cast" style cast
 *  rewrites input as required to produce output
 *
 *  legal operations include numeric_set -> numeric_set,
 *  numeric <-> character, etc.
 */
function
any cast #1.0 ( any in )
    = vdb:cast;

/* bit_or
 *  performs a bitwise operation 'OR' for every byte in A and B
 *  A and B are not neccesarily have the same size
 *  the resulting row will have the size of B while OR operation is done for portion of A overlapping B
 *  For different sizes of A and B 'align' parameter provides what edge of A and B are aligned
 *  possible values ALIGN_LEFT, ALIGN_RIGHT
 * 
 */
function < type T >
T  bit_or #1 < U8 align > ( T  A, T B  )
    = vdb:bit_or;

/* trim
 * performs trimming of value val from column A
 * align provides left- or right- trimming
 */
function < type T >
T trim #1 < U8 align, T val > ( T A )
    = vdb:trim;


/* redimension
 *  performs a change of dimension without changing bit pattern
 */
function
any redimension #1.0 ( any in )
    = vdb:redimension;


/* row_id
 *  returns the row id of a request
 */
function
I64 row_id #1.0 ()
    = vdb:row_id;


/* row_len
 *  returns the number of elements in a row
 *
 *  "in" [ DATA ] - column supplying row. if row does not exist
 *  in column, the resultant length is 0.
 */
function
U32 row_len #1.0 ( any in )
    = vdb:row_len;


/* fixed_row_len
 *  returns non-zero if the entire page
 *  has a uniform row-length, zero otherwise
 *
 *  "in" [ DATA ] - column to query
 */
function
U32 fixed_row_len #1.0 ( any in )
    = vdb:fixed_row_len;


/* compare
 *  evaluates src [ i ] == cmp [ i ]
 *  causes writing exception if unequal.
 *
 *  For whole types, equality is bitwise equal
 *  for floating point types see below.
 *
 *  "T" [ TYPE ] - base element type to be processed
 *
 *  "sig_bits" [ OPTIONAL CONST >= 1 ] - for floating point types, ignored
 *  otherwise, the number of significant binary digits in the mantissas to
 *  compare such that |x - y| <= 1, for corresponding numbers x (in a) and
 *  y (in b) both scaled according to sig_bits and their common magnitude.
 *  "sig_bits" may be an array, if so "sel" is required (see below).
 *
 *  "src" [ DATA ] - standard input data derived from source
 *
 *  "cmp" [ DATA ] - feedback data after being written and re-read
 *
 */
validate function < type T >
void compare #1.0 < * U32 sig_bits > ( T src, T cmp )
    = vdb:compare;

validate function < type T >
void no_compare #1.0 ( T src, T cmp )
    = vdb:no_compare;


/* compare2f
 *  evaluates src [ i ] == cmp [ i ]
 *  causes writing exception if unequal.
 *
 *  "T" [ TYPE ] - base element type to be processed
 *
 *  "sig_bits" [ CONST >= 1 ] - for floating point types, ignored otherwise,
 *  array containing the number of significant binary digits in the mantissas
 *  to compare such that |x - y| <= 1, for corresponding numbers x (in a) and
 *  y (in b) both scaled according to sig_bits and their common magnitude.
 *
 *  "src" [ DATA ] - standard input data derived from source
 *
 *  "cmp" [ DATA ] - feedback data after being written and re-read
 *
 *  "sel" [ DATA ] - data to select which element of "sig_bits" to
 *  use for the comparison.  The valid values of "sel" are
 *  [0 .. length sig_bits).
 *
validate function < type T >
void compare2f #1.0 < U32 sig_bits > ( float_set src, float_set cmp, T sel )
    = vdb:compare2f;
*/

/* range_validate
 *  passes input through if all values fall between lower and
 *  upper bounds, INCLUSIVE
 *
 *  "T" [ TYPE ] - type to be validated
 *
 *  "lower" [ CONST ] and "upper" [ CONST ] - inclusive
 *  bounds on input values
 *
 *  "in" [ DATA ] - data to be validated
 */
function < type T >
T range_validate #1.0 < T lower, T upper > ( T in )
    = vdb:range_validate;


/* select
 *  return first non-empty input for id
 *  inputs are taken from first to last
 *
 *  "T" [ TYPE ] - data type of selection
 *
 *  "first" [ DATA ] - first of N inputs
 *
 *  "second" [ DATA ] - second of N inputs
 *  all other inputs are optional and must
 *  be compatible with type "T"
 */
function < type T >
T select #1.0 ( T first, T second, ... )
    = vdb:select;


/* transpose
 *  transpose a page of unformatted data
 *
 *  for example - convert a simple page of values,
 *  where vertical scale is row id and horizontal element index:
 *
 *        1   2   3
 *      +---+---+---+
 *    1 | a | b | c |
 *      +---+---+---+
 *    2 | d | e | f |
 *      +---+---+---+
 *    3 | g | h | i |
 *      +---+---+---+
 *    4 | j | k | l |
 *      +---+---+---+
 *
 *  into:
 *
 *        1   2   3   4
 *      +---+---+---+---+
 *    1 | a | d | g | j |
 *      +---+---+---+---+
 *    2 | b | e | h | k |
 *      +---+---+---+---+
 *    3 | c | f | i | l |
 *      +---+---+---+---+
 *
 *  variable row-lengths are supported. The output blob is
 *  formatted, meaning that the result can no longer be addressed
 *  as a matrix, but the transposition has be applied to data.
 *
 *  "in" [ DATA ] - unformatted data to be transposed
 */
function
transposed_fmt transpose #1 ( transpose_set in )
    = vdb:transpose;


/* detranspose
 *  pardoning the awful name, apply a transposition on the result
 *  of "transpose" to produce the original blob. "transpose"
 *  itself cannot be reused because of its signature.
 */
function
transpose_set detranspose #1 ( transposed_fmt in )
    = vdb:detranspose;

/*
 * delta_average computes average representation of the maximium 
 * lengh row and deltas every row against it
 */
function
delta_averaged_fmt delta_average #1 ( any in ) 
     = vdb:delta_average;

function
any undelta_average #1 ( delta_averaged_fmt in )
    = vdb:undelta_average;

/* merge
 *  merges all input blobs of any format/type into a single blob
 */
function
merged_fmt merge #1.0 ( any in, ... )
    = vdb:merge;


/* split
 *  extracts a single blob from a merged blob by index
 *
 *  "idx" [ CONST ] - blob index
 */

function
any split #1.0 < U32 idx > ( merged_fmt in )
    = vdb:split;


/* meta:read
 *  reads table metadata node as a row
 * meta:value
 *  reads metadata node as single value,
 *  performing size conversion if necessary,
 *  e.g. I8 TO I64, I32 TO I16
 *
 *  "T" [ TYPE ] - cast data type of metadata node
 *
 *  "node" [ CONST ] - path to metadata node
 */
function < type T >
T meta:read #1.0 < ascii node, * bool deterministic > ();

function < type T >
T meta:value #1.0 < ascii node, * bool deterministic > ();


/* meta:write
 *  writes row data to table metadata node
 *
 *  "T" [ TYPE ] - cast data type of metadata node
 *
 *  "node" [ CONST ] - path to metadata node
 *
 *  "in" [ DATA ] - source of row data
 */
function < type T >
T meta:write #1.0 < ascii node > ( T in );


/* meta:attr:read
 *  reads table metadata attribute as a row
 *
 *  "node" [ CONST ] - path to metadata node
 *
 *  "attr" [ CONST ] - attribute name on node
 */
function
ascii meta:attr:read #1.0 < ascii node, ascii attr, * bool deterministic > ();


/* meta:attr:write
 *  write row data as table metadata attribute
 *
 *  "node" [ CONST ] - path to metadata node
 *
 *  "attr" [ CONST ] - attribute name on node
 */
function
ascii meta:attr:write #1.0 < ascii node, ascii attr > ( ascii in );


/* parameter:read
 *  reads named cursor parameter text
 *
 *  "name" [ CONST ] - parameter name
 */
function
text8_set parameter:read #1.0 < ascii name, * bool deterministic > ();


/* environment:read
 *  reads named environment variable text
 *
 *  "name" [ CONST ] - environment variable name
 */
function
text8_set environment:read #1.0 < ascii name > ();

// case sensivity mode
const U8 CASE_SENSITIVE         =  0;
const U8 CASE_INSENSITIVE_LOWER =  1;
const U8 CASE_INSENSITIVE_UPPER =  2;

/* idx:text:project
 *  perform a reverse lookup in an index
 *  if key not found then use substitute
 *
 *  "index_name" [ CONST ] - name of text index
 *
 *  "substitute" [ DATA, OPTIONAL ] - source
 *  of values to substitute for values not
 *  found in the index.
 *
 *  Version 1.1: look into "substitute" (if available) first and then to the index,
 *  added parameter "case_sensitivity"
 */
function text8_set idx:text:project #1.1 < ascii index_name, * U8 case_sensitivity > ( * text8_set substitute );


/* idx:text:insert
 *  inserts "key" into index. returns key if insertion into index failed or
 *  when "case_insensitive" is true
 *
 *  Version 1.1: added parameter "case_sensitivity"
 */
function text8_set idx:text:insert #1.1 < ascii index_name, * U8 case_sensitivity > ( text8_set key );


/* idx:text:lookup
 *  perform a lookup in an index
 *  returns vdb:row_id_range associated with the
 *
 *  Version 1.1: added parameter "case_sensitivity"
 */
function vdb:row_id_range  idx:text:lookup #1.1 < ascii index_name , ascii query_by_name, * U8 case_sensitivity > ();