295 lines
11 KiB
Python
295 lines
11 KiB
Python
# coding=utf-8
|
|
# Copyright 2023-present, the HuggingFace Inc. team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Contains command to upload a repo or file with the CLI.
|
|
|
|
Usage:
|
|
# Upload file (implicit)
|
|
hf upload my-cool-model ./my-cool-model.safetensors
|
|
|
|
# Upload file (explicit)
|
|
hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
|
|
|
|
# Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
|
|
hf upload my-cool-model
|
|
|
|
# Upload directory (explicit)
|
|
hf upload my-cool-model ./models/my-cool-model .
|
|
|
|
# Upload filtered directory (example: tensorboard logs except for the last run)
|
|
hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
|
|
|
|
# Upload with wildcard
|
|
hf upload my-cool-model "./model/training/*.safetensors"
|
|
|
|
# Upload private dataset
|
|
hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
|
|
|
|
# Upload with token
|
|
hf upload Wauplin/my-cool-model --token=hf_****
|
|
|
|
# Sync local Space with Hub (upload new files, delete removed files)
|
|
hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
|
|
|
|
# Schedule commits every 30 minutes
|
|
hf upload Wauplin/my-cool-model --every=30
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
import warnings
|
|
from typing import Annotated, Optional
|
|
|
|
import typer
|
|
|
|
from huggingface_hub import logging
|
|
from huggingface_hub._commit_scheduler import CommitScheduler
|
|
from huggingface_hub.errors import RevisionNotFoundError
|
|
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
|
|
|
|
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
|
|
|
|
|
|
logger = logging.get_logger(__name__)
|
|
|
|
|
|
def upload(
|
|
repo_id: RepoIdArg,
|
|
local_path: Annotated[
|
|
Optional[str],
|
|
typer.Argument(
|
|
help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
|
|
),
|
|
] = None,
|
|
path_in_repo: Annotated[
|
|
Optional[str],
|
|
typer.Argument(
|
|
help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
|
|
),
|
|
] = None,
|
|
repo_type: RepoTypeOpt = RepoType.model,
|
|
revision: RevisionOpt = None,
|
|
private: PrivateOpt = None,
|
|
include: Annotated[
|
|
Optional[list[str]],
|
|
typer.Option(
|
|
help="Glob patterns to match files to upload.",
|
|
),
|
|
] = None,
|
|
exclude: Annotated[
|
|
Optional[list[str]],
|
|
typer.Option(
|
|
help="Glob patterns to exclude from files to upload.",
|
|
),
|
|
] = None,
|
|
delete: Annotated[
|
|
Optional[list[str]],
|
|
typer.Option(
|
|
help="Glob patterns for file to be deleted from the repo while committing.",
|
|
),
|
|
] = None,
|
|
commit_message: Annotated[
|
|
Optional[str],
|
|
typer.Option(
|
|
help="The summary / title / first line of the generated commit.",
|
|
),
|
|
] = None,
|
|
commit_description: Annotated[
|
|
Optional[str],
|
|
typer.Option(
|
|
help="The description of the generated commit.",
|
|
),
|
|
] = None,
|
|
create_pr: Annotated[
|
|
bool,
|
|
typer.Option(
|
|
help="Whether to upload content as a new Pull Request.",
|
|
),
|
|
] = False,
|
|
every: Annotated[
|
|
Optional[float],
|
|
typer.Option(
|
|
help="f set, a background job is scheduled to create commits every `every` minutes.",
|
|
),
|
|
] = None,
|
|
token: TokenOpt = None,
|
|
quiet: Annotated[
|
|
bool,
|
|
typer.Option(
|
|
help="Disable progress bars and warnings; print only the returned path.",
|
|
),
|
|
] = False,
|
|
) -> None:
|
|
"""Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
|
|
|
|
if every is not None and every <= 0:
|
|
raise typer.BadParameter("--every must be a positive value", param_hint="every")
|
|
|
|
repo_type_str = repo_type.value
|
|
|
|
api = get_hf_api(token=token)
|
|
|
|
# Resolve local_path and path_in_repo based on implicit/explicit rules
|
|
resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
|
|
repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
|
|
)
|
|
|
|
def run_upload() -> str:
|
|
if os.path.isfile(resolved_local_path):
|
|
if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
|
|
warnings.warn("Ignoring --include since a single file is uploaded.")
|
|
if exclude is not None and len(exclude) > 0:
|
|
warnings.warn("Ignoring --exclude since a single file is uploaded.")
|
|
if delete is not None and len(delete) > 0:
|
|
warnings.warn("Ignoring --delete since a single file is uploaded.")
|
|
|
|
# Schedule commits if `every` is set
|
|
if every is not None:
|
|
if os.path.isfile(resolved_local_path):
|
|
# If file => watch entire folder + use allow_patterns
|
|
folder_path = os.path.dirname(resolved_local_path)
|
|
pi = (
|
|
resolved_path_in_repo[: -len(resolved_local_path)]
|
|
if resolved_path_in_repo.endswith(resolved_local_path)
|
|
else resolved_path_in_repo
|
|
)
|
|
allow_patterns = [resolved_local_path]
|
|
ignore_patterns: Optional[list[str]] = []
|
|
else:
|
|
folder_path = resolved_local_path
|
|
pi = resolved_path_in_repo
|
|
allow_patterns = (
|
|
resolved_include or []
|
|
if isinstance(resolved_include, list)
|
|
else [resolved_include]
|
|
if isinstance(resolved_include, str)
|
|
else []
|
|
)
|
|
ignore_patterns = exclude or []
|
|
if delete is not None and len(delete) > 0:
|
|
warnings.warn("Ignoring --delete when uploading with scheduled commits.")
|
|
|
|
scheduler = CommitScheduler(
|
|
folder_path=folder_path,
|
|
repo_id=repo_id,
|
|
repo_type=repo_type_str,
|
|
revision=revision,
|
|
allow_patterns=allow_patterns,
|
|
ignore_patterns=ignore_patterns,
|
|
path_in_repo=pi,
|
|
private=private,
|
|
every=every,
|
|
hf_api=api,
|
|
)
|
|
print(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
|
|
try:
|
|
while True:
|
|
time.sleep(100)
|
|
except KeyboardInterrupt:
|
|
scheduler.stop()
|
|
return "Stopped scheduled commits."
|
|
|
|
# Otherwise, create repo and proceed with the upload
|
|
if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
|
|
raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
|
|
created = api.create_repo(
|
|
repo_id=repo_id,
|
|
repo_type=repo_type_str,
|
|
exist_ok=True,
|
|
private=private,
|
|
space_sdk="gradio" if repo_type_str == "space" else None,
|
|
# ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
|
|
# ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
|
|
).repo_id
|
|
|
|
# Check if branch already exists and if not, create it
|
|
if revision is not None and not create_pr:
|
|
try:
|
|
api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
|
|
except RevisionNotFoundError:
|
|
logger.info(f"Branch '{revision}' not found. Creating it...")
|
|
api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
|
|
# ^ `exist_ok=True` to avoid race concurrency issues
|
|
|
|
# File-based upload
|
|
if os.path.isfile(resolved_local_path):
|
|
return api.upload_file(
|
|
path_or_fileobj=resolved_local_path,
|
|
path_in_repo=resolved_path_in_repo,
|
|
repo_id=created,
|
|
repo_type=repo_type_str,
|
|
revision=revision,
|
|
commit_message=commit_message,
|
|
commit_description=commit_description,
|
|
create_pr=create_pr,
|
|
)
|
|
|
|
# Folder-based upload
|
|
return api.upload_folder(
|
|
folder_path=resolved_local_path,
|
|
path_in_repo=resolved_path_in_repo,
|
|
repo_id=created,
|
|
repo_type=repo_type_str,
|
|
revision=revision,
|
|
commit_message=commit_message,
|
|
commit_description=commit_description,
|
|
create_pr=create_pr,
|
|
allow_patterns=(
|
|
resolved_include
|
|
if isinstance(resolved_include, list)
|
|
else [resolved_include]
|
|
if isinstance(resolved_include, str)
|
|
else None
|
|
),
|
|
ignore_patterns=exclude,
|
|
delete_patterns=delete,
|
|
)
|
|
|
|
if quiet:
|
|
disable_progress_bars()
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
print(run_upload())
|
|
enable_progress_bars()
|
|
else:
|
|
print(run_upload())
|
|
logging.set_verbosity_warning()
|
|
|
|
|
|
def _resolve_upload_paths(
|
|
*, repo_id: str, local_path: Optional[str], path_in_repo: Optional[str], include: Optional[list[str]]
|
|
) -> tuple[str, str, Optional[list[str]]]:
|
|
repo_name = repo_id.split("/")[-1]
|
|
resolved_include = include
|
|
|
|
if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
|
|
if include is not None:
|
|
raise ValueError("Cannot set --include when local_path contains a wildcard.")
|
|
if path_in_repo is not None and path_in_repo != ".":
|
|
raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
|
|
return ".", local_path, ["."] # will be adjusted below; placeholder for type
|
|
|
|
if local_path is None and os.path.isfile(repo_name):
|
|
return repo_name, repo_name, resolved_include
|
|
if local_path is None and os.path.isdir(repo_name):
|
|
return repo_name, ".", resolved_include
|
|
if local_path is None:
|
|
raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
|
|
|
|
if path_in_repo is None and os.path.isfile(local_path):
|
|
return local_path, os.path.basename(local_path), resolved_include
|
|
if path_in_repo is None:
|
|
return local_path, ".", resolved_include
|
|
return local_path, path_in_repo, resolved_include
|