104 lines
2.9 KiB
JSON
104 lines
2.9 KiB
JSON
|
|
{
|
||
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||
|
|
"title": "BM25 Embedding Function Schema",
|
||
|
|
"description": "Schema for the BM25 sparse embedding function configuration",
|
||
|
|
"version": "1.0.0",
|
||
|
|
"type": "object",
|
||
|
|
"properties": {
|
||
|
|
"task": {
|
||
|
|
"type": "string",
|
||
|
|
"enum": [
|
||
|
|
"document",
|
||
|
|
"query"
|
||
|
|
],
|
||
|
|
"description": "Task to perform, can be 'document' or 'query'"
|
||
|
|
},
|
||
|
|
"query_config": {
|
||
|
|
"type": "object",
|
||
|
|
"description": "Configuration for the query",
|
||
|
|
"properties": {
|
||
|
|
"task": {
|
||
|
|
"type": "string",
|
||
|
|
"enum": [
|
||
|
|
"document",
|
||
|
|
"query"
|
||
|
|
],
|
||
|
|
"description": "Task to perform for query embedding"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"additionalProperties": false
|
||
|
|
},
|
||
|
|
"cache_dir": {
|
||
|
|
"type": [
|
||
|
|
"string",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "The path to the cache directory"
|
||
|
|
},
|
||
|
|
"k": {
|
||
|
|
"type": [
|
||
|
|
"number",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "The k parameter in the BM25 formula. Defines the saturation of the term frequency"
|
||
|
|
},
|
||
|
|
"b": {
|
||
|
|
"type": [
|
||
|
|
"number",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "The b parameter in the BM25 formula. Defines the importance of the document length"
|
||
|
|
},
|
||
|
|
"avg_len": {
|
||
|
|
"type": [
|
||
|
|
"number",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "The average length of the documents in the corpus"
|
||
|
|
},
|
||
|
|
"language": {
|
||
|
|
"type": [
|
||
|
|
"string",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "Specifies the language for the stemmer"
|
||
|
|
},
|
||
|
|
"token_max_length": {
|
||
|
|
"type": [
|
||
|
|
"integer",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "The maximum length of the tokens"
|
||
|
|
},
|
||
|
|
"disable_stemmer": {
|
||
|
|
"type": [
|
||
|
|
"boolean",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "Disable the stemmer"
|
||
|
|
},
|
||
|
|
"specific_model_path": {
|
||
|
|
"type": [
|
||
|
|
"string",
|
||
|
|
"null"
|
||
|
|
],
|
||
|
|
"description": "The path to the specific model"
|
||
|
|
},
|
||
|
|
"kwargs": {
|
||
|
|
"type": "object",
|
||
|
|
"description": "Additional arguments to pass to the BM25 model",
|
||
|
|
"additionalProperties": {
|
||
|
|
"type": [
|
||
|
|
"string",
|
||
|
|
"integer",
|
||
|
|
"number",
|
||
|
|
"boolean",
|
||
|
|
"array",
|
||
|
|
"object"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"additionalProperties": false
|
||
|
|
}
|