Skip to main content

vLLM Parameter Curation

Engine version: 0.7.3
Discovered at: 2026-05-06 22:57:22+02:00
Discovery method: dataclasses.fields(EngineArgs) + msgspec.json.schema(SamplingParams)

Summary: 32/135 parameters curated (104 engine + 31 sampling discovered)

Delta vs previous: deferred until first probe-pass cycle.

Engine Parameters

FieldTypeDefaultCurated?
additional_configdict[str, Any] | Nonenull-
allowed_local_media_pathstr``-
block_sizeint | Nonenullyes
calculate_kv_scalesbool | Nonenull-
code_revisionstr | Nonenull-
collect_detailed_tracesstr | Nonenull-
compilation_configCompilationConfig | Nonenullyes
config_formatConfigFormatauto-
cpu_offload_gbfloat0yes
devicestrauto-
disable_async_output_procboolFalse-
disable_custom_all_reduceboolFalseyes
disable_log_statsboolFalse-
disable_logprobs_during_spec_decodingbool | Nonenull-
disable_mm_preprocessor_cacheboolFalse-
disable_sliding_windowboolFalse-
distributed_executor_backendstr | type[ExecutorBase] | Nonenullyes
download_dirstr | Nonenull-
dtypestrautoyes
enable_chunked_prefillbool | Nonenullyes
enable_loraboolFalse-
enable_lora_biasboolFalse-
enable_prefix_cachingbool | Nonenullyes
enable_prompt_adapterboolFalse-
enable_sleep_modeboolFalse-
enforce_eagerbool | Nonenullyes
fully_sharded_lorasboolFalse-
generation_configstr | Nonenull-
gpu_memory_utilizationfloat0.9yes
guided_decoding_backendstrxgrammar-
hf_overridesdict[str, Any] | Callable[[<class 'transformers.configuration_utils.PretrainedConfig'>], PretrainedConfig] | Nonenull-
ignore_patternsstr | list[str] | Nonenull-
kv_cache_dtypestrautoyes
kv_transfer_configKVTransferConfig | Nonenull-
limit_mm_per_promptMapping[str, int] | Nonenull-
load_formatstrauto-
logits_processor_patternstr | Nonenull-
long_lora_scaling_factorstuple[float] | Nonenull-
long_prefill_token_thresholdint | None0-
lora_dtypestr | dtype | Noneauto-
lora_extra_vocab_sizeint256-
max_cpu_lorasint | Nonenull-
max_logprobsint20-
max_long_partial_prefillsint | None1-
max_lora_rankint16-
max_lorasint1-
max_model_lenint | Nonenullyes
max_num_batched_tokensint | Nonenullyes
max_num_partial_prefillsint | None1-
max_num_seqsint | Nonenullyes
max_parallel_loading_workersint | Nonenull-
max_prompt_adapter_tokenint0-
max_prompt_adaptersint1-
max_seq_len_to_captureint8192yes
mm_processor_kwargsdict[str, Any] | Nonenull-
modelstrfacebook/opt-125myes
model_implstrauto-
model_loader_extra_configdict | Nonenull-
multi_step_stream_outputsboolTrue-
ngram_prompt_lookup_maxint | Nonenull-
ngram_prompt_lookup_minint | Nonenull-
num_gpu_blocks_overrideint | Nonenull-
num_lookahead_slotsint0-
num_scheduler_stepsint1yes
num_speculative_tokensint | Nonenullyes
otlp_traces_endpointstr | Nonenull-
override_generation_configdict[str, Any] | Nonenull-
override_neuron_configdict[str, Any] | Nonenull-
override_pooler_configPoolerConfig | Nonenull-
pipeline_parallel_sizeint1yes
preemption_modestr | Nonenull-
qlora_adapter_name_or_pathstr | Nonenull-
quantizationstr | Nonenullyes
ray_workers_use_nsightboolFalse-
revisionstr | Nonenull-
rope_scalingdict[str, Any] | Nonenull-
rope_thetafloat | Nonenull-
scheduler_clsstr | type[object]vllm.core.scheduler.Scheduler-
scheduler_delay_factorfloat0.0-
scheduling_policyLiteral['fcfs', 'priority']fcfs-
seedint0-
served_model_namestr | list[str] | Nonenull-
skip_tokenizer_initboolFalse-
spec_decoding_acceptance_methodstrrejection_sampler-
speculative_disable_by_batch_sizeint | Nonenull-
speculative_disable_mqa_scorerbool | NoneFalse-
speculative_draft_tensor_parallel_sizeint | Nonenull-
speculative_max_model_lenint | Nonenull-
speculative_modelstr | Nonenull-
speculative_model_quantizationstr | Nonenull-
swap_spacefloat4yes
taskLiteral['auto', 'generate', 'embedding', 'embed', 'classify', 'score', 'reward', 'transcription']auto-
tensor_parallel_sizeint1yes
tokenizerstr | Nonenull-
tokenizer_modestrauto-
tokenizer_pool_extra_configdict[str, Any] | Nonenull-
tokenizer_pool_sizeint0-
tokenizer_pool_typestr | type[ForwardRef('BaseTokenizerGroup')]ray-
tokenizer_revisionstr | Nonenull-
trust_remote_codeboolFalse-
typical_acceptance_sampler_posterior_alphafloat | Nonenull-
typical_acceptance_sampler_posterior_thresholdfloat | Nonenull-
use_v2_block_managerboolTrue-
worker_clsstrauto-

Sampling Parameters

FieldTypeDefaultCurated?
_all_stop_token_idsarray[]-
_real_nunknownnull-
allowed_token_idsunknownnull-
bad_wordsunknownnull-
best_ofunknownnull-
detokenizebooleanTrue-
frequency_penaltynumber0.0yes
guided_decodingunknownnull-
ignore_eosbooleanFalseyes
include_stop_str_in_outputbooleanFalse-
logit_biasunknownnull-
logits_processorsunknownnull-
logprobsunknownnull-
max_tokensunknown16-
min_pnumber0.0yes
min_tokensinteger0yes
ninteger1yes
output_kindunknown0-
output_text_buffer_lengthinteger0-
presence_penaltynumber0.0yes
prompt_logprobsunknownnull-
repetition_penaltynumber1.0yes
seedunknownnull-
skip_special_tokensbooleanTrue-
spaces_between_special_tokensbooleanTrue-
stopunknownnull-
stop_token_idsunknownnull-
temperaturenumber1.0yes
top_kinteger-1yes
top_pnumber1.0yes
truncate_prompt_tokensunknownnull-