group-wbl/.venv/lib/python3.13/site-packages/huggingface_hub/cli/upload.py

295 lines
11 KiB
Python
Raw Permalink Normal View History

2026-01-09 09:48:03 +08:00
# coding=utf-8
# Copyright 2023-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains command to upload a repo or file with the CLI.
Usage:
# Upload file (implicit)
hf upload my-cool-model ./my-cool-model.safetensors
# Upload file (explicit)
hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
# Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
hf upload my-cool-model
# Upload directory (explicit)
hf upload my-cool-model ./models/my-cool-model .
# Upload filtered directory (example: tensorboard logs except for the last run)
hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
# Upload with wildcard
hf upload my-cool-model "./model/training/*.safetensors"
# Upload private dataset
hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
# Upload with token
hf upload Wauplin/my-cool-model --token=hf_****
# Sync local Space with Hub (upload new files, delete removed files)
hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
# Schedule commits every 30 minutes
hf upload Wauplin/my-cool-model --every=30
"""
import os
import time
import warnings
from typing import Annotated, Optional
import typer
from huggingface_hub import logging
from huggingface_hub._commit_scheduler import CommitScheduler
from huggingface_hub.errors import RevisionNotFoundError
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
logger = logging.get_logger(__name__)
def upload(
repo_id: RepoIdArg,
local_path: Annotated[
Optional[str],
typer.Argument(
help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
),
] = None,
path_in_repo: Annotated[
Optional[str],
typer.Argument(
help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
),
] = None,
repo_type: RepoTypeOpt = RepoType.model,
revision: RevisionOpt = None,
private: PrivateOpt = None,
include: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to match files to upload.",
),
] = None,
exclude: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns to exclude from files to upload.",
),
] = None,
delete: Annotated[
Optional[list[str]],
typer.Option(
help="Glob patterns for file to be deleted from the repo while committing.",
),
] = None,
commit_message: Annotated[
Optional[str],
typer.Option(
help="The summary / title / first line of the generated commit.",
),
] = None,
commit_description: Annotated[
Optional[str],
typer.Option(
help="The description of the generated commit.",
),
] = None,
create_pr: Annotated[
bool,
typer.Option(
help="Whether to upload content as a new Pull Request.",
),
] = False,
every: Annotated[
Optional[float],
typer.Option(
help="f set, a background job is scheduled to create commits every `every` minutes.",
),
] = None,
token: TokenOpt = None,
quiet: Annotated[
bool,
typer.Option(
help="Disable progress bars and warnings; print only the returned path.",
),
] = False,
) -> None:
"""Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
if every is not None and every <= 0:
raise typer.BadParameter("--every must be a positive value", param_hint="every")
repo_type_str = repo_type.value
api = get_hf_api(token=token)
# Resolve local_path and path_in_repo based on implicit/explicit rules
resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
)
def run_upload() -> str:
if os.path.isfile(resolved_local_path):
if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
warnings.warn("Ignoring --include since a single file is uploaded.")
if exclude is not None and len(exclude) > 0:
warnings.warn("Ignoring --exclude since a single file is uploaded.")
if delete is not None and len(delete) > 0:
warnings.warn("Ignoring --delete since a single file is uploaded.")
# Schedule commits if `every` is set
if every is not None:
if os.path.isfile(resolved_local_path):
# If file => watch entire folder + use allow_patterns
folder_path = os.path.dirname(resolved_local_path)
pi = (
resolved_path_in_repo[: -len(resolved_local_path)]
if resolved_path_in_repo.endswith(resolved_local_path)
else resolved_path_in_repo
)
allow_patterns = [resolved_local_path]
ignore_patterns: Optional[list[str]] = []
else:
folder_path = resolved_local_path
pi = resolved_path_in_repo
allow_patterns = (
resolved_include or []
if isinstance(resolved_include, list)
else [resolved_include]
if isinstance(resolved_include, str)
else []
)
ignore_patterns = exclude or []
if delete is not None and len(delete) > 0:
warnings.warn("Ignoring --delete when uploading with scheduled commits.")
scheduler = CommitScheduler(
folder_path=folder_path,
repo_id=repo_id,
repo_type=repo_type_str,
revision=revision,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
path_in_repo=pi,
private=private,
every=every,
hf_api=api,
)
print(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
try:
while True:
time.sleep(100)
except KeyboardInterrupt:
scheduler.stop()
return "Stopped scheduled commits."
# Otherwise, create repo and proceed with the upload
if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
created = api.create_repo(
repo_id=repo_id,
repo_type=repo_type_str,
exist_ok=True,
private=private,
space_sdk="gradio" if repo_type_str == "space" else None,
# ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
# ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
).repo_id
# Check if branch already exists and if not, create it
if revision is not None and not create_pr:
try:
api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
except RevisionNotFoundError:
logger.info(f"Branch '{revision}' not found. Creating it...")
api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
# ^ `exist_ok=True` to avoid race concurrency issues
# File-based upload
if os.path.isfile(resolved_local_path):
return api.upload_file(
path_or_fileobj=resolved_local_path,
path_in_repo=resolved_path_in_repo,
repo_id=created,
repo_type=repo_type_str,
revision=revision,
commit_message=commit_message,
commit_description=commit_description,
create_pr=create_pr,
)
# Folder-based upload
return api.upload_folder(
folder_path=resolved_local_path,
path_in_repo=resolved_path_in_repo,
repo_id=created,
repo_type=repo_type_str,
revision=revision,
commit_message=commit_message,
commit_description=commit_description,
create_pr=create_pr,
allow_patterns=(
resolved_include
if isinstance(resolved_include, list)
else [resolved_include]
if isinstance(resolved_include, str)
else None
),
ignore_patterns=exclude,
delete_patterns=delete,
)
if quiet:
disable_progress_bars()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
print(run_upload())
enable_progress_bars()
else:
print(run_upload())
logging.set_verbosity_warning()
def _resolve_upload_paths(
*, repo_id: str, local_path: Optional[str], path_in_repo: Optional[str], include: Optional[list[str]]
) -> tuple[str, str, Optional[list[str]]]:
repo_name = repo_id.split("/")[-1]
resolved_include = include
if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
if include is not None:
raise ValueError("Cannot set --include when local_path contains a wildcard.")
if path_in_repo is not None and path_in_repo != ".":
raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
return ".", local_path, ["."] # will be adjusted below; placeholder for type
if local_path is None and os.path.isfile(repo_name):
return repo_name, repo_name, resolved_include
if local_path is None and os.path.isdir(repo_name):
return repo_name, ".", resolved_include
if local_path is None:
raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
if path_in_repo is None and os.path.isfile(local_path):
return local_path, os.path.basename(local_path), resolved_include
if path_in_repo is None:
return local_path, ".", resolved_include
return local_path, path_in_repo, resolved_include