Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,21 @@ export QWEN_CHAT_BASE_URL="http://localhost:8000/v1"
export QWEN_CHAT_MODEL="Qwen/Qwen3.5-4B"
```

`qwen_chat` can also be used as the optimizer backend. When optimizer and
target should point to different local vLLM services, use the role-specific
settings:

```bash
python scripts/train.py \
--config configs/searchqa/default.yaml \
--optimizer_backend qwen_chat \
--target_backend qwen_chat \
--optimizer_model Qwen/Qwen3.5-4B \
--target_model Qwen/Qwen3.5-4B \
--optimizer_qwen_chat_base_url http://localhost:8001/v1 \
--target_qwen_chat_base_url http://localhost:8000/v1
```

#### MiniMax

```bash
Expand Down
13 changes: 13 additions & 0 deletions docs/reference/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ Complete reference for all SkillOpt configuration parameters.
| `model.optimizer` | str | `gpt-5.5` | Optimizer model (for reflection & slow update) |
| `model.target` | str | `gpt-5.5` | Target model (for rollout execution) |
| `model.reasoning_effort` | str | `medium` | Reasoning effort level |
| `model.optimizer_backend` | str | `openai_chat` | Optimizer backend: `openai_chat` / `claude_chat` / `qwen_chat` / `minimax_chat` |
| `model.target_backend` | str | `openai_chat` | Target backend: chat backends plus execution harnesses |
| `model.qwen_chat_base_url` | str | `http://localhost:8000/v1` | Shared Qwen/vLLM OpenAI-compatible endpoint |
| `model.qwen_chat_enable_thinking` | bool | `false` | Shared Qwen thinking flag |
| `model.optimizer_qwen_chat_base_url` | str | — | Optimizer-specific Qwen/vLLM endpoint; overrides shared `qwen_chat_base_url` |
| `model.target_qwen_chat_base_url` | str | — | Target-specific Qwen/vLLM endpoint; overrides shared `qwen_chat_base_url` |

## Training (`train`)

Expand Down Expand Up @@ -70,3 +76,10 @@ Complete reference for all SkillOpt configuration parameters.
| `AZURE_OPENAI_API_KEY` / `model.azure_openai_api_key` | Azure API key |
| `OPENAI_API_KEY` | OpenAI API key (for `openai_chat` backend) |
| `ANTHROPIC_API_KEY` | Anthropic API key (for `claude_code_exec` backend) |
| `QWEN_CHAT_BASE_URL` | Shared local vLLM endpoint for `qwen_chat` |
| `QWEN_CHAT_MODEL` | Shared served model name for `qwen_chat` |
| `QWEN_CHAT_API_KEY` | Optional API key for the shared Qwen endpoint |
| `OPTIMIZER_QWEN_CHAT_BASE_URL` | Optimizer-specific local vLLM endpoint |
| `OPTIMIZER_QWEN_CHAT_MODEL` | Optimizer-specific served model name |
| `TARGET_QWEN_CHAT_BASE_URL` | Target-specific local vLLM endpoint |
| `TARGET_QWEN_CHAT_MODEL` | Target-specific served model name |
30 changes: 30 additions & 0 deletions scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,18 @@ def parse_args() -> argparse.Namespace:
p.add_argument("--qwen_chat_timeout_seconds", type=float)
p.add_argument("--qwen_chat_max_tokens", type=int)
p.add_argument("--qwen_chat_enable_thinking", type=_BOOL)
p.add_argument("--optimizer_qwen_chat_base_url", type=str)
p.add_argument("--optimizer_qwen_chat_api_key", type=str)
p.add_argument("--optimizer_qwen_chat_temperature", type=float)
p.add_argument("--optimizer_qwen_chat_timeout_seconds", type=float)
p.add_argument("--optimizer_qwen_chat_max_tokens", type=int)
p.add_argument("--optimizer_qwen_chat_enable_thinking", type=_BOOL)
p.add_argument("--target_qwen_chat_base_url", type=str)
p.add_argument("--target_qwen_chat_api_key", type=str)
p.add_argument("--target_qwen_chat_temperature", type=float)
p.add_argument("--target_qwen_chat_timeout_seconds", type=float)
p.add_argument("--target_qwen_chat_max_tokens", type=int)
p.add_argument("--target_qwen_chat_enable_thinking", type=_BOOL)
p.add_argument("--minimax_base_url", type=str)
p.add_argument("--minimax_api_key", type=str)
p.add_argument("--minimax_model", type=str)
Expand Down Expand Up @@ -295,6 +307,18 @@ def parse_args() -> argparse.Namespace:
"qwen_chat_timeout_seconds": "model.qwen_chat_timeout_seconds",
"qwen_chat_max_tokens": "model.qwen_chat_max_tokens",
"qwen_chat_enable_thinking": "model.qwen_chat_enable_thinking",
"optimizer_qwen_chat_base_url": "model.optimizer_qwen_chat_base_url",
"optimizer_qwen_chat_api_key": "model.optimizer_qwen_chat_api_key",
"optimizer_qwen_chat_temperature": "model.optimizer_qwen_chat_temperature",
"optimizer_qwen_chat_timeout_seconds": "model.optimizer_qwen_chat_timeout_seconds",
"optimizer_qwen_chat_max_tokens": "model.optimizer_qwen_chat_max_tokens",
"optimizer_qwen_chat_enable_thinking": "model.optimizer_qwen_chat_enable_thinking",
"target_qwen_chat_base_url": "model.target_qwen_chat_base_url",
"target_qwen_chat_api_key": "model.target_qwen_chat_api_key",
"target_qwen_chat_temperature": "model.target_qwen_chat_temperature",
"target_qwen_chat_timeout_seconds": "model.target_qwen_chat_timeout_seconds",
"target_qwen_chat_max_tokens": "model.target_qwen_chat_max_tokens",
"target_qwen_chat_enable_thinking": "model.target_qwen_chat_enable_thinking",
"minimax_base_url": "model.minimax_base_url",
"minimax_api_key": "model.minimax_api_key",
"minimax_model": "model.minimax_model",
Expand Down Expand Up @@ -431,6 +455,12 @@ def _has_model_override(dotted_key: str, legacy_key: str) -> bool:
and not _has_model_override("model.optimizer", "optimizer_model")
):
flat["optimizer_model"] = default_model_for_backend("claude_chat")
if flat.get("optimizer_backend") == "qwen_chat":
if (
str(flat.get("optimizer_model", "") or "").strip() in _OPENAI_DEFAULT_MODEL_SENTINELS
and not _has_model_override("model.optimizer", "optimizer_model")
):
flat["optimizer_model"] = default_model_for_backend("qwen_chat")
if flat.get("target_backend") == "claude_chat":
if (
str(flat.get("target_model", "") or "").strip() in _OPENAI_DEFAULT_MODEL_SENTINELS
Expand Down
12 changes: 12 additions & 0 deletions skillopt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@
"model.qwen_chat_timeout_seconds": "qwen_chat_timeout_seconds",
"model.qwen_chat_max_tokens": "qwen_chat_max_tokens",
"model.qwen_chat_enable_thinking": "qwen_chat_enable_thinking",
"model.optimizer_qwen_chat_base_url": "optimizer_qwen_chat_base_url",
"model.optimizer_qwen_chat_api_key": "optimizer_qwen_chat_api_key",
"model.optimizer_qwen_chat_temperature": "optimizer_qwen_chat_temperature",
"model.optimizer_qwen_chat_timeout_seconds": "optimizer_qwen_chat_timeout_seconds",
"model.optimizer_qwen_chat_max_tokens": "optimizer_qwen_chat_max_tokens",
"model.optimizer_qwen_chat_enable_thinking": "optimizer_qwen_chat_enable_thinking",
"model.target_qwen_chat_base_url": "target_qwen_chat_base_url",
"model.target_qwen_chat_api_key": "target_qwen_chat_api_key",
"model.target_qwen_chat_temperature": "target_qwen_chat_temperature",
"model.target_qwen_chat_timeout_seconds": "target_qwen_chat_timeout_seconds",
"model.target_qwen_chat_max_tokens": "target_qwen_chat_max_tokens",
"model.target_qwen_chat_enable_thinking": "target_qwen_chat_enable_thinking",
"model.minimax_base_url": "minimax_base_url",
"model.minimax_api_key": "minimax_api_key",
"model.minimax_model": "minimax_model",
Expand Down
28 changes: 20 additions & 8 deletions skillopt/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,14 +629,26 @@ def _build_eval_env(split: str, env_num: int, seed: int):
effort=cfg.get("claude_code_exec_effort", cfg.get("reasoning_effort", "medium")),
max_thinking_tokens=cfg.get("claude_code_exec_max_thinking_tokens", 16384),
)
configure_qwen_chat(
base_url=cfg.get("qwen_chat_base_url") or None,
api_key=cfg.get("qwen_chat_api_key") or None,
temperature=cfg.get("qwen_chat_temperature"),
timeout_seconds=cfg.get("qwen_chat_timeout_seconds"),
max_tokens=cfg.get("qwen_chat_max_tokens"),
enable_thinking=cfg.get("qwen_chat_enable_thinking"),
)
configure_qwen_chat(
base_url=cfg.get("qwen_chat_base_url") or None,
api_key=cfg.get("qwen_chat_api_key") or None,
temperature=cfg.get("qwen_chat_temperature"),
timeout_seconds=cfg.get("qwen_chat_timeout_seconds"),
max_tokens=cfg.get("qwen_chat_max_tokens"),
enable_thinking=cfg.get("qwen_chat_enable_thinking"),
optimizer_base_url=cfg.get("optimizer_qwen_chat_base_url") or None,
optimizer_api_key=cfg.get("optimizer_qwen_chat_api_key") or None,
optimizer_temperature=cfg.get("optimizer_qwen_chat_temperature"),
optimizer_timeout_seconds=cfg.get("optimizer_qwen_chat_timeout_seconds"),
optimizer_max_tokens=cfg.get("optimizer_qwen_chat_max_tokens"),
optimizer_enable_thinking=cfg.get("optimizer_qwen_chat_enable_thinking"),
target_base_url=cfg.get("target_qwen_chat_base_url") or None,
target_api_key=cfg.get("target_qwen_chat_api_key") or None,
target_temperature=cfg.get("target_qwen_chat_temperature"),
target_timeout_seconds=cfg.get("target_qwen_chat_timeout_seconds"),
target_max_tokens=cfg.get("target_qwen_chat_max_tokens"),
target_enable_thinking=cfg.get("target_qwen_chat_enable_thinking"),
)
configure_minimax_chat(
base_url=cfg.get("minimax_base_url") or None,
api_key=cfg.get("minimax_api_key") or None,
Expand Down
49 changes: 49 additions & 0 deletions skillopt/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def get_backend_name() -> str:
target = get_target_backend()
if optimizer == "claude_chat" and target == "claude_chat":
return "claude_chat"
if optimizer == "qwen_chat" and target == "qwen_chat":
return "qwen_chat"
if optimizer == "openai_chat" and target == "openai_chat":
return "azure_openai"
if optimizer == "openai_chat" and target == "codex_exec":
Expand Down Expand Up @@ -93,6 +95,16 @@ def chat_optimizer(
stage=stage,
timeout=timeout,
)
if get_optimizer_backend() == "qwen_chat":
return _qwen.chat_optimizer(
system=system,
user=user,
max_completion_tokens=max_completion_tokens,
retries=retries,
stage=stage,
reasoning_effort=reasoning_effort,
timeout=timeout,
)
return _openai.chat_optimizer(
system=system,
user=user,
Expand Down Expand Up @@ -179,6 +191,18 @@ def chat_optimizer_messages(
return_message=return_message,
timeout=timeout,
)
if get_optimizer_backend() == "qwen_chat":
return _qwen.chat_optimizer_messages(
messages=messages,
max_completion_tokens=max_completion_tokens,
retries=retries,
stage=stage,
reasoning_effort=reasoning_effort,
tools=tools,
tool_choice=tool_choice,
return_message=return_message,
timeout=timeout,
)
return _openai.chat_optimizer_messages(
messages=messages,
max_completion_tokens=max_completion_tokens,
Expand Down Expand Up @@ -414,6 +438,18 @@ def configure_qwen_chat(
timeout_seconds: float | str | None = None,
max_tokens: int | str | None = None,
enable_thinking: bool | str | None = None,
optimizer_base_url: str | None = None,
optimizer_api_key: str | None = None,
optimizer_temperature: float | str | None = None,
optimizer_timeout_seconds: float | str | None = None,
optimizer_max_tokens: int | str | None = None,
optimizer_enable_thinking: bool | str | None = None,
target_base_url: str | None = None,
target_api_key: str | None = None,
target_temperature: float | str | None = None,
target_timeout_seconds: float | str | None = None,
target_max_tokens: int | str | None = None,
target_enable_thinking: bool | str | None = None,
) -> None:
_qwen.configure_qwen_chat(
base_url=base_url,
Expand All @@ -422,6 +458,18 @@ def configure_qwen_chat(
timeout_seconds=timeout_seconds,
max_tokens=max_tokens,
enable_thinking=enable_thinking,
optimizer_base_url=optimizer_base_url,
optimizer_api_key=optimizer_api_key,
optimizer_temperature=optimizer_temperature,
optimizer_timeout_seconds=optimizer_timeout_seconds,
optimizer_max_tokens=optimizer_max_tokens,
optimizer_enable_thinking=optimizer_enable_thinking,
target_base_url=target_base_url,
target_api_key=target_api_key,
target_temperature=target_temperature,
target_timeout_seconds=target_timeout_seconds,
target_max_tokens=target_max_tokens,
target_enable_thinking=target_enable_thinking,
)


Expand Down Expand Up @@ -461,3 +509,4 @@ def set_target_deployment(deployment: str) -> None:
def set_optimizer_deployment(deployment: str) -> None:
_openai.set_optimizer_deployment(deployment)
_claude.set_optimizer_deployment(deployment)
_qwen.set_optimizer_deployment(deployment)
5 changes: 3 additions & 2 deletions skillopt/model/azure_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,9 +336,10 @@ def get_target_client() -> AzureOpenAI | OpenAI:
from skillopt.model.backend_config import get_target_backend
if get_target_backend() == "qwen_chat":
from skillopt.model import qwen_backend as _qwen
target_config = _qwen.TARGET_CONFIG
_target_client = OpenAI(
base_url=_qwen.BASE_URL,
api_key=_qwen.API_KEY or "dummy",
base_url=target_config.base_url,
api_key=target_config.api_key or "dummy",
)
else:
_target_client = _make_client("target")
Expand Down
6 changes: 3 additions & 3 deletions skillopt/model/backend_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def _parse_int(value: str | None, default: int) -> int:
def set_optimizer_backend(backend: str) -> None:
global OPTIMIZER_BACKEND
OPTIMIZER_BACKEND = normalize_backend_name(backend or "openai_chat")
if OPTIMIZER_BACKEND not in {"openai_chat", "claude_chat", "minimax_chat"}:
if OPTIMIZER_BACKEND not in {"openai_chat", "claude_chat", "qwen_chat", "minimax_chat"}:
raise ValueError(
f"Unsupported optimizer backend: {OPTIMIZER_BACKEND!r}. "
"Supported values are 'openai_chat', 'claude_chat', and 'minimax_chat'."
"Supported values are 'openai_chat', 'claude_chat', 'qwen_chat', and 'minimax_chat'."
)
os.environ["OPTIMIZER_BACKEND"] = OPTIMIZER_BACKEND

Expand Down Expand Up @@ -81,7 +81,7 @@ def is_target_exec_backend() -> bool:


def is_optimizer_chat_backend() -> bool:
return OPTIMIZER_BACKEND in {"openai_chat", "claude_chat", "minimax_chat"}
return OPTIMIZER_BACKEND in {"openai_chat", "claude_chat", "qwen_chat", "minimax_chat"}


def is_target_chat_backend() -> bool:
Expand Down
Loading