diff --git a/skyrl-train/docs/examples/mini_swe_agent.rst b/skyrl-train/docs/examples/mini_swe_agent.rst index 31e851f516..afcc79e271 100644 --- a/skyrl-train/docs/examples/mini_swe_agent.rst +++ b/skyrl-train/docs/examples/mini_swe_agent.rst @@ -56,10 +56,10 @@ By running this workflow as a Ray task, we are also able to scale up generation model = get_model(litellm_model_name, sweagent_config.get("model", {})) error = None try: - env = get_sb_environment(sweagent_config, instance, data_source) - agent = DefaultAgent(model, env, **sweagent_config.get("agent", {})) - exit_status, model_patch = agent.run(instance["problem_statement"]) - eval_result = evaluate_trajectory(instance, model_patch, sweagent_config, data_source) + env = get_sb_environment(sweagent_config, instance, data_source) + agent = DefaultAgent(model, env, **sweagent_config.get("agent", {})) + exit_status, model_patch = agent.run(instance["problem_statement"]) + eval_result = evaluate_trajectory(instance, model_patch, sweagent_config, data_source) except Exception as e: error = str(e) return agent.messages, eval_result, error @@ -90,7 +90,7 @@ Training Prerequisites: Ensure that you have the required environment backend installed for generating trajectories with Mini-SWE-Agent. By default, we use `Podman `_. This can be modified in :code_link:`examples/mini_swe_agent/swebench.yaml` -We provide two example scripts: One for Qwen3-8B model and another for the `Qwen/Qwen3-Coder-30B-A3B-Instruct ` model. While the first script for Qwen3-8B requires a single 8xH100 node, the script for the 30B model requires 2 8xH100 nodes for training. +We provide two example scripts: One for Qwen3-8B model and another for the `Qwen/Qwen3-Coder-30B-A3B-Instruct `_ model. While the first script for Qwen3-8B requires a single 8xH100 node, the script for the 30B model requires 2 8xH100 nodes for training. .. code-block:: bash diff --git a/skyrl-train/examples/mini_swe_agent/mini_swe_generator.py b/skyrl-train/examples/mini_swe_agent/mini_swe_generator.py index 315742d9e2..7de6eb9974 100644 --- a/skyrl-train/examples/mini_swe_agent/mini_swe_generator.py +++ b/skyrl-train/examples/mini_swe_agent/mini_swe_generator.py @@ -123,10 +123,9 @@ async def minisweagent_agent_loop( ) -> Tuple[List[int], float, str, List[int], List[int], Optional[List[int]]]: sweagent_config = yaml.safe_load(get_config_path(self.generator_cfg.miniswe_config_path).read_text()) - instance: Dict[str, Dict[str, Any]] = env_extras["instance"] # NOTE (sumanthrh): Input `prompt` is not used here because mini-swe-agent uses a similar entry from the `instance` obj messages, reward, error = await init_and_run.remote( - instance, + env_extras["instance"], self.litellm_model_name, sweagent_config, self.generator_cfg, @@ -136,7 +135,7 @@ async def minisweagent_agent_loop( if not len(messages): return None, None, None, None, None, None - # TODO (sumanthrh):This is currently hardcoded for SWEBench with 2 initial messages (system and user). + # TODO (sumanthrh): This is currently hardcoded for SWEBench with 2 initial messages (system and user). response_messages = messages[2:] for message in messages[:2]: diff --git a/skyrl-train/examples/mini_swe_agent/mini_swe_utils.py b/skyrl-train/examples/mini_swe_agent/mini_swe_utils.py index 9eb15b7cce..bff7355662 100644 --- a/skyrl-train/examples/mini_swe_agent/mini_swe_utils.py +++ b/skyrl-train/examples/mini_swe_agent/mini_swe_utils.py @@ -53,9 +53,8 @@ def get_docker_image_name(instance: dict, data_source: str) -> str: def evaluate_trajectory( instance: Dict[str, Any], model_patch: str, sweagent_config: dict, data_source: str ) -> MiniSWEEvaluationResult: - instance_id = instance["instance_id"] - ret = MiniSWEEvaluationResult(instance_id=instance_id, resolved=False, eval_error=None) + ret = MiniSWEEvaluationResult(instance_id=instance["instance_id"], resolved=False, eval_error=None) env = None try: diff --git a/skyrl-train/examples/mini_swe_agent/run_mini_swe_30B.sh b/skyrl-train/examples/mini_swe_agent/run_mini_swe_30B.sh index c8533d9b33..94bea6ea75 100644 --- a/skyrl-train/examples/mini_swe_agent/run_mini_swe_30B.sh +++ b/skyrl-train/examples/mini_swe_agent/run_mini_swe_30B.sh @@ -9,7 +9,9 @@ set -x DATA_DIR="$DATA/data/swe_gym_subset" CKPT_PATH="$DATA/ckpts/llm_mini_swe" -# save trajectories here for debugging. + +# Save trajectories here for debugging. +# NOTE: For a multi-node cluster, ensure that this is on NFS so that you can save all trajectories in the same path MINISWE_TRAJ_DIR="$HOME/mini_swe_agent_trajs_32B" NUM_GPUS=8 diff --git a/skyrl-train/examples/mini_swe_agent/run_mini_swe_8B.sh b/skyrl-train/examples/mini_swe_agent/run_mini_swe_8B.sh index b90d25fc6c..bd6e65b280 100644 --- a/skyrl-train/examples/mini_swe_agent/run_mini_swe_8B.sh +++ b/skyrl-train/examples/mini_swe_agent/run_mini_swe_8B.sh @@ -7,7 +7,9 @@ set -x DATA_DIR="$HOME/data/swe_gym_subset" CKPT_PATH="$HOME/ckpts/llm_mini_swe" -# save trajectories here + +# Save trajectories here for debugging +# NOTE: For a multi-node cluster, ensure that this is on NFS so that you can save all trajectories in the same path MINISWE_TRAJ_DIR="$HOME/mini_swe_agent_trajs" NUM_GPUS=8