group-wbl/.venv/lib/python3.13/site-packages/schemas/embedding_functions/bm25.json

104 lines
2.9 KiB
JSON
Raw Normal View History

2026-01-09 09:12:25 +08:00
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "BM25 Embedding Function Schema",
"description": "Schema for the BM25 sparse embedding function configuration",
"version": "1.0.0",
"type": "object",
"properties": {
"task": {
"type": "string",
"enum": [
"document",
"query"
],
"description": "Task to perform, can be 'document' or 'query'"
},
"query_config": {
"type": "object",
"description": "Configuration for the query",
"properties": {
"task": {
"type": "string",
"enum": [
"document",
"query"
],
"description": "Task to perform for query embedding"
}
},
"additionalProperties": false
},
"cache_dir": {
"type": [
"string",
"null"
],
"description": "The path to the cache directory"
},
"k": {
"type": [
"number",
"null"
],
"description": "The k parameter in the BM25 formula. Defines the saturation of the term frequency"
},
"b": {
"type": [
"number",
"null"
],
"description": "The b parameter in the BM25 formula. Defines the importance of the document length"
},
"avg_len": {
"type": [
"number",
"null"
],
"description": "The average length of the documents in the corpus"
},
"language": {
"type": [
"string",
"null"
],
"description": "Specifies the language for the stemmer"
},
"token_max_length": {
"type": [
"integer",
"null"
],
"description": "The maximum length of the tokens"
},
"disable_stemmer": {
"type": [
"boolean",
"null"
],
"description": "Disable the stemmer"
},
"specific_model_path": {
"type": [
"string",
"null"
],
"description": "The path to the specific model"
},
"kwargs": {
"type": "object",
"description": "Additional arguments to pass to the BM25 model",
"additionalProperties": {
"type": [
"string",
"integer",
"number",
"boolean",
"array",
"object"
]
}
}
},
"additionalProperties": false
}