50 lines
1.8 KiB
Python
50 lines
1.8 KiB
Python
|
|
import re
|
||
|
|
from typing import Tuple
|
||
|
|
from uuid import UUID
|
||
|
|
|
||
|
|
from chromadb.db.base import SqlDB
|
||
|
|
from chromadb.segment import SegmentManager, VectorReader
|
||
|
|
|
||
|
|
topic_regex = r"persistent:\/\/(?P<tenant>.+)\/(?P<namespace>.+)\/(?P<topic>.+)"
|
||
|
|
|
||
|
|
|
||
|
|
def parse_topic_name(topic_name: str) -> Tuple[str, str, str]:
|
||
|
|
"""Parse the topic name into the tenant, namespace and topic name"""
|
||
|
|
match = re.match(topic_regex, topic_name)
|
||
|
|
if not match:
|
||
|
|
raise ValueError(f"Invalid topic name: {topic_name}")
|
||
|
|
return match.group("tenant"), match.group("namespace"), match.group("topic")
|
||
|
|
|
||
|
|
|
||
|
|
def create_topic_name(tenant: str, namespace: str, collection_id: UUID) -> str:
|
||
|
|
return f"persistent://{tenant}/{namespace}/{str(collection_id)}"
|
||
|
|
|
||
|
|
|
||
|
|
def trigger_vector_segments_max_seq_id_migration(
|
||
|
|
db: SqlDB, segment_manager: SegmentManager
|
||
|
|
) -> None:
|
||
|
|
"""
|
||
|
|
Trigger the migration of vector segments' max_seq_id from the pickled metadata file to SQLite.
|
||
|
|
|
||
|
|
Vector segments migrate this field automatically on init—so this should be used when we know segments are likely unmigrated and unloaded.
|
||
|
|
|
||
|
|
This is a no-op if all vector segments have already migrated their max_seq_id.
|
||
|
|
"""
|
||
|
|
with db.tx() as cur:
|
||
|
|
cur.execute(
|
||
|
|
"""
|
||
|
|
SELECT collection
|
||
|
|
FROM "segments"
|
||
|
|
WHERE "id" NOT IN (SELECT "segment_id" FROM "max_seq_id") AND
|
||
|
|
"type" = 'urn:chroma:segment/vector/hnsw-local-persisted'
|
||
|
|
"""
|
||
|
|
)
|
||
|
|
collection_ids_with_unmigrated_segments = [row[0] for row in cur.fetchall()]
|
||
|
|
|
||
|
|
if len(collection_ids_with_unmigrated_segments) == 0:
|
||
|
|
return
|
||
|
|
|
||
|
|
for collection_id in collection_ids_with_unmigrated_segments:
|
||
|
|
# Loading the segment triggers the migration on init
|
||
|
|
segment_manager.get_segment(UUID(collection_id), VectorReader)
|