支持后台运行并记录日志

This commit is contained in:
2025-10-23 13:07:04 +08:00
parent 1822aca36b
commit 2fbb741d3f
2 changed files with 5 additions and 5 deletions

View File

@@ -30,7 +30,7 @@ def fed_run():
# For convenience we pass the same `params` dict used by Dataset/model/loss. # For convenience we pass the same `params` dict used by Dataset/model/loss.
# Here we re-use the top-level cfg directly as params. # Here we re-use the top-level cfg directly as params.
# params = dict(cfg) # params = dict(cfg)
if "names" in cfg and isinstance(cfg["names"], dict): if "names" in cfg and isinstance(cfg["names"], dict):
# Convert {0: 'uav', 1: 'car', ...} to list if you prefer list # Convert {0: 'uav', 1: 'car', ...} to list if you prefer list
# but we can leave dict; your utils appear to accept dict # but we can leave dict; your utils appear to accept dict
@@ -44,12 +44,12 @@ def fed_run():
train_txt = cfg.get("train_txt", "") train_txt = cfg.get("train_txt", "")
if not train_txt: if not train_txt:
ds_root = cfg.get("dataset_path", "") ds_root = cfg.get("dataset_path", "")
guess = os.path.join(ds_root, "train.txt") if ds_root else "" guess = os.path.join(ds_root, "train2017.txt") if ds_root else ""
train_txt = guess train_txt = guess
if not train_txt or not os.path.exists(train_txt): if not train_txt or not os.path.exists(train_txt):
raise FileNotFoundError( raise FileNotFoundError(
f"train.txt not found. Provide --config with 'train_txt' or ensure '{train_txt}' exists." f"train2017.txt not found. Provide --config with 'train_txt' or ensure '{train_txt}' exists."
) )
split = divide_trainset( split = divide_trainset(
@@ -76,7 +76,7 @@ def fed_run():
# --- build server & optional validation set --- # --- build server & optional validation set ---
server = FedYoloServer(client_list=users, model_name=model_name, params=cfg) server = FedYoloServer(client_list=users, model_name=model_name, params=cfg)
valset = build_valset_if_available(cfg, params=cfg, args=args_cli) valset = build_valset_if_available(cfg, params=cfg, args=args_cli, val_name="val2017")
# valset is a Dataset class, not data loader # valset is a Dataset class, not data loader
if valset is not None: if valset is not None:
server.load_valset(valset) server.load_valset(valset)

View File

@@ -1,2 +1,2 @@
GPUS=$1 GPUS=$1
python3 -m torch.distributed.run --nproc_per_node=$GPUS fed_run.py ${@:2} nohup python3 -m torch.distributed.run --nproc_per_node=$GPUS fed_run.py ${@:2} > train.log 2>&1 & disown