支持后台运行并记录日志

This commit is contained in:
2025-10-23 13:07:04 +08:00
parent 1822aca36b
commit 2fbb741d3f
2 changed files with 5 additions and 5 deletions

View File

@@ -44,12 +44,12 @@ def fed_run():
train_txt = cfg.get("train_txt", "")
if not train_txt:
ds_root = cfg.get("dataset_path", "")
guess = os.path.join(ds_root, "train.txt") if ds_root else ""
guess = os.path.join(ds_root, "train2017.txt") if ds_root else ""
train_txt = guess
if not train_txt or not os.path.exists(train_txt):
raise FileNotFoundError(
f"train.txt not found. Provide --config with 'train_txt' or ensure '{train_txt}' exists."
f"train2017.txt not found. Provide --config with 'train_txt' or ensure '{train_txt}' exists."
)
split = divide_trainset(
@@ -76,7 +76,7 @@ def fed_run():
# --- build server & optional validation set ---
server = FedYoloServer(client_list=users, model_name=model_name, params=cfg)
valset = build_valset_if_available(cfg, params=cfg, args=args_cli)
valset = build_valset_if_available(cfg, params=cfg, args=args_cli, val_name="val2017")
# valset is a Dataset class, not data loader
if valset is not None:
server.load_valset(valset)

View File

@@ -1,2 +1,2 @@
GPUS=$1
python3 -m torch.distributed.run --nproc_per_node=$GPUS fed_run.py ${@:2}
nohup python3 -m torch.distributed.run --nproc_per_node=$GPUS fed_run.py ${@:2} > train.log 2>&1 & disown