Skip to main content

vLLM Parameter Curation

Engine version: 0.19.1
Discovered at: 2026-06-22 16:20:19.046420+00:00
Discovery method: dataclasses.fields(EngineArgs) + msgspec.json.schema(SamplingParams) + config/*.py declarative-constraint overlay

Summary: 37/220 parameters curated (185 engine + 35 sampling discovered)

Engine Parameters

FieldTypeDefaultCurated?
_api_process_countint1-
_api_process_rankint0-
additional_configdict[str, Any]{}-
aggregate_engine_loggingboolFalse-
all2all_backendstrallgather_reducescatter-
allow_deprecated_quantizationboolFalse-
allowed_local_media_pathstr``-
allowed_media_domainslist[str] | Nonenull-
async_schedulingbool | Nonenull-
attention_backendAttentionBackendEnum | Nonenull-
attention_configunknownnull-
block_sizeint | Nonenullyes
calculate_kv_scalesboolFalse-
code_revisionstr | Nonenullyes
collect_detailed_traceslist[Literal['model', 'worker', 'all']] | Nonenull-
compilation_configunknownnull-
config_formatstrauto-
convertstrauto-
cp_kv_cache_interleave_sizeint1-
cpu_offload_gbfloat0yes
cpu_offload_paramsset[str][]-
cudagraph_capture_sizeslist[int] | Nonenullyes
cudagraph_metricsboolFalse-
data_parallel_addressstr | Nonenull-
data_parallel_backendstrmp-
data_parallel_external_lbboolFalse-
data_parallel_hybrid_lbboolFalse-
data_parallel_rankint | Nonenull-
data_parallel_rpc_portint | Nonenull-
data_parallel_sizeint1-
data_parallel_size_localint | Nonenull-
data_parallel_start_rankint | Nonenull-
dbo_decode_token_thresholdint32-
dbo_prefill_token_thresholdint512-
dcp_comm_backendstrag_rs-
dcp_kv_cache_interleave_sizeint1-
decode_context_parallel_sizeint1-
default_mm_lorasdict[str, str] | Nonenull-
disable_cascade_attnboolTrue-
disable_chunked_mm_inputboolFalse-
disable_custom_all_reduceboolFalseyes
disable_hybrid_kv_cache_managerbool | Nonenull-
disable_log_statsboolFalse-
disable_nccl_for_dp_synchronizationbool | Nonenull-
disable_sliding_windowboolFalse-
distributed_executor_backendstr | Literal['ray', 'mp', 'uni', 'external_launcher'] | type[Any] | Nonenullyes
distributed_timeout_secondsint | Nonenull-
download_dirstr | Nonenull-
dtypestrautoyes
ec_transfer_configunknownnull-
enable_chunked_prefillbool | Nonenullyes
enable_dboboolFalse-
enable_elastic_epboolFalse-
enable_ep_weight_filterboolFalse-
enable_eplbboolFalse-
enable_expert_parallelboolFalse-
enable_flashinfer_autotuneboolnull-
enable_layerwise_nvtx_tracingboolFalse-
enable_logging_iteration_detailsboolFalse-
enable_loraboolFalse-
enable_mfu_metricsboolFalse-
enable_mm_embedsboolFalse-
enable_mm_processor_statsboolFalse-
enable_prefix_cachingbool | Nonenullyes
enable_prompt_embedsboolFalse-
enable_return_routed_expertsboolFalse-
enable_sleep_modeboolFalse-
enable_tower_connector_loraboolFalse-
enforce_eagerboolFalseyes
eplb_configunknownnull-
expert_placement_strategystrlinear-
fail_on_environ_validationboolFalse-
fully_sharded_lorasboolFalse-
gdn_prefill_backendstrnull-
generation_configstrauto-
gpu_memory_utilizationfloat0.9yes
hf_config_pathstr | Nonenull-
hf_overridesdict[str, Any] | Callable[[typing.Any], Any]{}-
hf_tokenbool | str | Nonenull-
ignore_patternsstr | list[str]['original/**/*']-
interleave_mm_stringsboolFalse-
io_processor_pluginstr | Nonenull-
kernel_configunknownnull-
kv_cache_dtypestrautoyes
kv_cache_dtype_skip_layerslist[str][]-
kv_cache_memory_bytesint | Nonenullyes
kv_cache_metricsboolFalse-
kv_cache_metrics_samplefloat0.01-
kv_events_configunknownnull-
kv_offloading_backendstrnative-
kv_offloading_sizefloat | Nonenull-
kv_sharing_fast_prefillboolFalse-
kv_transfer_configunknownnull-
language_model_onlyboolFalse-
limit_mm_per_promptdict[str, int | dict[str, int]]{}-
load_formatstr | Anyauto-
logits_processorslist[str | type[LogitsProcessor]] | Nonenull-
logprobs_modestrraw_logprobs-
long_prefill_token_thresholdint0-
lora_dtypestr | dtype | Noneauto-
lora_target_moduleslist[str] | Nonenull-
mamba_block_sizeint | Nonenull-
mamba_cache_dtypestrauto-
mamba_cache_modestrnone-
mamba_ssm_cache_dtypestrauto-
master_addrstr127.0.0.1-
master_portint29501-
max_cpu_lorasint | Nonenull-
max_cudagraph_capture_sizeint | Nonenullyes
max_logprobsint20-
max_long_partial_prefillsint1-
max_lora_rankint16-
max_lorasint1-
max_model_lenintnullyes
max_num_batched_tokensint | Nonenullyes
max_num_partial_prefillsint1-
max_num_seqsint | Nonenullyes
max_parallel_loading_workersint | Nonenull-
media_io_kwargsdict[str, dict[str, Any]]{}-
mm_encoder_attn_backendAttentionBackendEnum | str | Nonenull-
mm_encoder_onlyboolFalse-
mm_encoder_tp_modestrweights-
mm_processor_cache_gbfloat4-
mm_processor_cache_typestrlru-
mm_processor_kwargsdict[str, Any] | Nonenull-
mm_shm_cache_max_object_size_mbint128-
mm_tensor_ipcstrdirect_rpc-
modelstrQwen/Qwen3-0.6Byes
model_implstrauto-
model_loader_extra_configdict{}-
model_weightsstr``-
moe_backendstrautoyes
nnodesint1-
node_rankint0-
num_gpu_blocks_overrideint | Nonenull-
offload_backendstrauto-
offload_group_sizeint0yes
offload_num_in_groupint1yes
offload_paramsset[str][]yes
offload_prefetch_stepint1yes
optimization_levelOptimizationLevel2-
otlp_traces_endpointstr | Nonenull-
override_attention_dtypestr | Nonenull-
override_generation_configdict[str, Any]{}-
performance_modestrbalanced-
pipeline_parallel_sizeint1yes
pooler_configunknownnull-
prefill_context_parallel_sizeint1-
prefix_caching_hash_algostrsha256-
profiler_configunknownnull-
pt_load_map_locationstr | dict[str, str]cpu-
quantizationAny | str | Nonenullyes
ray_workers_use_nsightboolFalse-
reasoning_configunknownnull-
reasoning_parserstr``-
reasoning_parser_pluginstr | Nonenull-
renderer_num_workersint1-
revisionstr | Nonenullyes
runnerstrauto-
safetensors_load_strategystr | Nonenull-
scheduler_clsstr | type[object] | Nonenull-
scheduler_reserve_full_islboolTrue-
scheduling_policystrfcfs-
seedint0-
served_model_namestr | list[str] | Nonenull-
show_hidden_metrics_for_versionstr | Nonenull-
shutdown_timeoutint0-
skip_mm_profilingboolFalse-
skip_tokenizer_initboolFalse-
specialize_active_loraboolFalse-
speculative_configunknownnull-
stream_intervalint1-
structured_outputs_configunknownnull-
tensor_parallel_sizeint1yes
tokenizerstr | Nonenull-
tokenizer_modeLiteral['auto', 'hf', 'slow', 'mistral', 'deepseek_v32'] | strauto-
tokenizer_revisionstr | Nonenull-
tokens_onlyboolFalse-
trust_remote_codeboolFalse-
ubatch_sizeint0-
use_tqdm_on_loadboolTrue-
video_pruning_ratefloat | Nonenull-
weight_transfer_configunknownnull-
worker_clsstrauto-
worker_extension_clsstr``-

Sampling Parameters

FieldTypeDefaultCurated?
_all_stop_token_idsarray[]-
_bad_words_token_idslist[list[int]] | Nonenull-
_eos_token_idint | Nonenull-
allowed_token_idslist[int] | Nonenull-
bad_wordslist[str] | Nonenull-
detokenizebooleanTrue-
extra_argsdict[str, Any] | Nonenull-
flat_logprobsbooleanFalse-
frequency_penaltynumber0.0yes
ignore_eosbooleanFalseyes
include_stop_str_in_outputbooleanFalse-
logit_biasdict[int, float] | Nonenull-
logprobsint | Nonenull-
max_tokensint | None16-
min_pnumber0.0yes
min_tokensinteger0yes
ninteger1yes
output_kindRequestOutputKind0-
output_text_buffer_lengthinteger0-
presence_penaltynumber0.0yes
prompt_logprobsint | Nonenull-
repetition_detectionRepetitionDetectionParams | Nonenull-
repetition_penaltynumber1.0yes
seedint | Nonenull-
skip_clonebooleanFalse-
skip_reading_prefix_cachebool | Nonenull-
skip_special_tokensbooleanTrue-
spaces_between_special_tokensbooleanTrue-
stopstr | list[str] | Nonenull-
stop_token_idslist[int] | Nonenull-
structured_outputsStructuredOutputsParams | Nonenull-
temperaturenumber1.0yes
thinking_token_budgetint | Nonenull-
top_kinteger0yes
top_pnumber1.0yes