vLLM Parameter Curation
Engine version: 0.7.3
Discovered at: 2026-05-06 22:57:22+02:00
Discovery method: dataclasses.fields(EngineArgs) + msgspec.json.schema(SamplingParams)
Summary: 32/135 parameters curated (104 engine + 31 sampling discovered)
Delta vs previous: deferred until first probe-pass cycle.
Engine Parameters
| Field | Type | Default | Curated? |
|---|---|---|---|
additional_config | dict[str, Any] | None | null | - |
allowed_local_media_path | str | `` | - |
block_size | int | None | null | yes |
calculate_kv_scales | bool | None | null | - |
code_revision | str | None | null | - |
collect_detailed_traces | str | None | null | - |
compilation_config | CompilationConfig | None | null | yes |
config_format | ConfigFormat | auto | - |
cpu_offload_gb | float | 0 | yes |
device | str | auto | - |
disable_async_output_proc | bool | False | - |
disable_custom_all_reduce | bool | False | yes |
disable_log_stats | bool | False | - |
disable_logprobs_during_spec_decoding | bool | None | null | - |
disable_mm_preprocessor_cache | bool | False | - |
disable_sliding_window | bool | False | - |
distributed_executor_backend | str | type[ExecutorBase] | None | null | yes |
download_dir | str | None | null | - |
dtype | str | auto | yes |
enable_chunked_prefill | bool | None | null | yes |
enable_lora | bool | False | - |
enable_lora_bias | bool | False | - |
enable_prefix_caching | bool | None | null | yes |
enable_prompt_adapter | bool | False | - |
enable_sleep_mode | bool | False | - |
enforce_eager | bool | None | null | yes |
fully_sharded_loras | bool | False | - |
generation_config | str | None | null | - |
gpu_memory_utilization | float | 0.9 | yes |
guided_decoding_backend | str | xgrammar | - |
hf_overrides | dict[str, Any] | Callable[[<class 'transformers.configuration_utils.PretrainedConfig'>], PretrainedConfig] | None | null | - |
ignore_patterns | str | list[str] | None | null | - |
kv_cache_dtype | str | auto | yes |
kv_transfer_config | KVTransferConfig | None | null | - |
limit_mm_per_prompt | Mapping[str, int] | None | null | - |
load_format | str | auto | - |
logits_processor_pattern | str | None | null | - |
long_lora_scaling_factors | tuple[float] | None | null | - |
long_prefill_token_threshold | int | None | 0 | - |
lora_dtype | str | dtype | None | auto | - |
lora_extra_vocab_size | int | 256 | - |
max_cpu_loras | int | None | null | - |
max_logprobs | int | 20 | - |
max_long_partial_prefills | int | None | 1 | - |
max_lora_rank | int | 16 | - |
max_loras | int | 1 | - |
max_model_len | int | None | null | yes |
max_num_batched_tokens | int | None | null | yes |
max_num_partial_prefills | int | None | 1 | - |
max_num_seqs | int | None | null | yes |
max_parallel_loading_workers | int | None | null | - |
max_prompt_adapter_token | int | 0 | - |
max_prompt_adapters | int | 1 | - |
max_seq_len_to_capture | int | 8192 | yes |
mm_processor_kwargs | dict[str, Any] | None | null | - |
model | str | facebook/opt-125m | yes |
model_impl | str | auto | - |
model_loader_extra_config | dict | None | null | - |
multi_step_stream_outputs | bool | True | - |
ngram_prompt_lookup_max | int | None | null | - |
ngram_prompt_lookup_min | int | None | null | - |
num_gpu_blocks_override | int | None | null | - |
num_lookahead_slots | int | 0 | - |
num_scheduler_steps | int | 1 | yes |
num_speculative_tokens | int | None | null | yes |
otlp_traces_endpoint | str | None | null | - |
override_generation_config | dict[str, Any] | None | null | - |
override_neuron_config | dict[str, Any] | None | null | - |
override_pooler_config | PoolerConfig | None | null | - |
pipeline_parallel_size | int | 1 | yes |
preemption_mode | str | None | null | - |
qlora_adapter_name_or_path | str | None | null | - |
quantization | str | None | null | yes |
ray_workers_use_nsight | bool | False | - |
revision | str | None | null | - |
rope_scaling | dict[str, Any] | None | null | - |
rope_theta | float | None | null | - |
scheduler_cls | str | type[object] | vllm.core.scheduler.Scheduler | - |
scheduler_delay_factor | float | 0.0 | - |
scheduling_policy | Literal['fcfs', 'priority'] | fcfs | - |
seed | int | 0 | - |
served_model_name | str | list[str] | None | null | - |
skip_tokenizer_init | bool | False | - |
spec_decoding_acceptance_method | str | rejection_sampler | - |
speculative_disable_by_batch_size | int | None | null | - |
speculative_disable_mqa_scorer | bool | None | False | - |
speculative_draft_tensor_parallel_size | int | None | null | - |
speculative_max_model_len | int | None | null | - |
speculative_model | str | None | null | - |
speculative_model_quantization | str | None | null | - |
swap_space | float | 4 | yes |
task | Literal['auto', 'generate', 'embedding', 'embed', 'classify', 'score', 'reward', 'transcription'] | auto | - |
tensor_parallel_size | int | 1 | yes |
tokenizer | str | None | null | - |
tokenizer_mode | str | auto | - |
tokenizer_pool_extra_config | dict[str, Any] | None | null | - |
tokenizer_pool_size | int | 0 | - |
tokenizer_pool_type | str | type[ForwardRef('BaseTokenizerGroup')] | ray | - |
tokenizer_revision | str | None | null | - |
trust_remote_code | bool | False | - |
typical_acceptance_sampler_posterior_alpha | float | None | null | - |
typical_acceptance_sampler_posterior_threshold | float | None | null | - |
use_v2_block_manager | bool | True | - |
worker_cls | str | auto | - |
Sampling Parameters
| Field | Type | Default | Curated? |
|---|---|---|---|
_all_stop_token_ids | array | [] | - |
_real_n | unknown | null | - |
allowed_token_ids | unknown | null | - |
bad_words | unknown | null | - |
best_of | unknown | null | - |
detokenize | boolean | True | - |
frequency_penalty | number | 0.0 | yes |
guided_decoding | unknown | null | - |
ignore_eos | boolean | False | yes |
include_stop_str_in_output | boolean | False | - |
logit_bias | unknown | null | - |
logits_processors | unknown | null | - |
logprobs | unknown | null | - |
max_tokens | unknown | 16 | - |
min_p | number | 0.0 | yes |
min_tokens | integer | 0 | yes |
n | integer | 1 | yes |
output_kind | unknown | 0 | - |
output_text_buffer_length | integer | 0 | - |
presence_penalty | number | 0.0 | yes |
prompt_logprobs | unknown | null | - |
repetition_penalty | number | 1.0 | yes |
seed | unknown | null | - |
skip_special_tokens | boolean | True | - |
spaces_between_special_tokens | boolean | True | - |
stop | unknown | null | - |
stop_token_ids | unknown | null | - |
temperature | number | 1.0 | yes |
top_k | integer | -1 | yes |
top_p | number | 1.0 | yes |
truncate_prompt_tokens | unknown | null | - |