{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "BM25 Embedding Function Schema", "description": "Schema for the BM25 sparse embedding function configuration", "version": "1.0.0", "type": "object", "properties": { "task": { "type": "string", "enum": [ "document", "query" ], "description": "Task to perform, can be 'document' or 'query'" }, "query_config": { "type": "object", "description": "Configuration for the query", "properties": { "task": { "type": "string", "enum": [ "document", "query" ], "description": "Task to perform for query embedding" } }, "additionalProperties": false }, "cache_dir": { "type": [ "string", "null" ], "description": "The path to the cache directory" }, "k": { "type": [ "number", "null" ], "description": "The k parameter in the BM25 formula. Defines the saturation of the term frequency" }, "b": { "type": [ "number", "null" ], "description": "The b parameter in the BM25 formula. Defines the importance of the document length" }, "avg_len": { "type": [ "number", "null" ], "description": "The average length of the documents in the corpus" }, "language": { "type": [ "string", "null" ], "description": "Specifies the language for the stemmer" }, "token_max_length": { "type": [ "integer", "null" ], "description": "The maximum length of the tokens" }, "disable_stemmer": { "type": [ "boolean", "null" ], "description": "Disable the stemmer" }, "specific_model_path": { "type": [ "string", "null" ], "description": "The path to the specific model" }, "kwargs": { "type": "object", "description": "Additional arguments to pass to the BM25 model", "additionalProperties": { "type": [ "string", "integer", "number", "boolean", "array", "object" ] } } }, "additionalProperties": false }