Reference#

class tca.pipeline.SearchIndex(embeddings, metadata, config)[source]#
Parameters:
classmethod from_embeddings(embeddings, metadata, config=None)[source]#
Parameters:
Return type:

SearchIndex

search(query, filters=None, *, query_mode='auto')[source]#
Parameters:
Return type:

SearchResults

search_exact(query, filters=None)[source]#
Parameters:
Return type:

SearchResults

to_manifest()[source]#
Return type:

dict[str, Any]

class tca.config.TurboQuantConfig(bit_width: 'int' = 3, candidate_k: 'int' = 128, rerank_k: 'int' = 20, oversample: 'int' = 2, seed: 'int' = 0, quantizer_kind: 'str' = 'prod', lloyd_max_iter: 'int' = 100, lloyd_tol: 'float' = 1e-06, monte_carlo_samples: 'int' = 20000, store_original_embeddings: 'bool' = True, auto_score_gap_threshold: 'float' = 0.06, auto_score_spread_threshold: 'float' = 0.015, max_candidate_k: 'int' = 2048, max_oversample: 'int' = 8)[source]#
Parameters:
  • bit_width (int)

  • candidate_k (int)

  • rerank_k (int)

  • oversample (int)

  • seed (int)

  • quantizer_kind (str)

  • lloyd_max_iter (int)

  • lloyd_tol (float)

  • monte_carlo_samples (int)

  • store_original_embeddings (bool)

  • auto_score_gap_threshold (float)

  • auto_score_spread_threshold (float)

  • max_candidate_k (int)

  • max_oversample (int)

bit_width: int#
candidate_k: int#
rerank_k: int#
oversample: int#
seed: int#
quantizer_kind: str#
lloyd_max_iter: int#
lloyd_tol: float#
monte_carlo_samples: int#
store_original_embeddings: bool#
auto_score_gap_threshold: float#
auto_score_spread_threshold: float#
max_candidate_k: int#
max_oversample: int#
validate()[source]#
Return type:

TurboQuantConfig

to_dict()[source]#
Return type:

dict[str, Any]

to_json(path)[source]#
Parameters:

path (str | Path)

Return type:

None

classmethod from_json(path)[source]#
Parameters:

path (str | Path)

Return type:

TurboQuantConfig

tca.quantization.fit_scalar_codebook(bit_width, dimension, n_samples=20000, seed=0, max_iter=100, tol=1e-06)[source]#
Parameters:
Return type:

ndarray[tuple[Any, …], dtype[float32]]

class tca.quantization.EncodedMSE(indices: 'IntArray')[source]#
Parameters:

indices (ndarray[tuple[Any, ...], dtype[int32]])

indices: ndarray[tuple[Any, ...], dtype[int32]]#
class tca.quantization.TurboQuantMSE(dimension, bit_width, *, seed=0, monte_carlo_samples=20000, lloyd_max_iter=100, lloyd_tol=1e-06)[source]#
Parameters:
  • dimension (int)

  • bit_width (int)

  • seed (int)

  • monte_carlo_samples (int)

  • lloyd_max_iter (int)

  • lloyd_tol (float)

encode(x)[source]#
Parameters:

x (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])

Return type:

EncodedMSE

decode(encoded)[source]#
Parameters:

encoded (EncodedMSE | _Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])

Return type:

ndarray[tuple[Any, …], dtype[float32]]

prepare_query(query)[source]#
Parameters:

query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])

Return type:

ndarray[tuple[Any, …], dtype[float32]]

approximate_inner_products(query, encoded)[source]#
Parameters:
Return type:

ndarray[tuple[Any, …], dtype[float32]]

class tca.quantization.EncodedProd(indices: 'IntArray', signs: 'SignArray', residual_norms: 'FloatArray')[source]#
Parameters:
indices: ndarray[tuple[Any, ...], dtype[int32]]#
signs: ndarray[tuple[Any, ...], dtype[int8]]#
residual_norms: ndarray[tuple[Any, ...], dtype[float32]]#
class tca.quantization.TurboQuantProd(dimension, bit_width, *, seed=0, monte_carlo_samples=20000, lloyd_max_iter=100, lloyd_tol=1e-06)[source]#
Parameters:
  • dimension (int)

  • bit_width (int)

  • seed (int)

  • monte_carlo_samples (int)

  • lloyd_max_iter (int)

  • lloyd_tol (float)

encode(x)[source]#
Parameters:

x (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])

Return type:

EncodedProd

decode(encoded)[source]#
Parameters:

encoded (EncodedProd)

Return type:

ndarray[tuple[Any, …], dtype[float32]]

prepare_query(query)[source]#
Parameters:

query (_Buffer | _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | complex | bytes | str | _NestedSequence[complex | bytes | str])

Return type:

tuple[ndarray[tuple[Any, …], dtype[float32]], ndarray[tuple[Any, …], dtype[float32]]]

approximate_inner_products(query, encoded)[source]#
Parameters:
Return type:

ndarray[tuple[Any, …], dtype[float32]]

tca.quantization.exact_topk(query, bank, top_k, ids=None)[source]#
Parameters:
Return type:

tuple[ndarray, ndarray]

tca.cli.build_parser()[source]#
Return type:

ArgumentParser

tca.cli.main()[source]#
Return type:

None