TensorRT-LLM Parameter Curation
Engine version: 0.21.0
Discovered at: 2026-05-06 20:20:57+02:00
Discovery method: TrtLlmArgs.model_json_schema() + dataclasses.fields(SamplingParams)
Summary: 18/107 parameters curated (60 engine + 47 sampling discovered)
Delta vs previous: deferred until first probe-pass cycle.
Engine Parameters
| Field | Type | Default | Curated? |
|---|---|---|---|
auto_parallel | boolean | False | - |
auto_parallel_world_size | integer | None | null | - |
backend | string | None | null | yes |
batched_logits_processor | Optional[tensorrt_llm.sampling_params.BatchedLogitsProcessor] | null | - |
batching_type | BatchingType | None | null | - |
build_config | Optional[tensorrt_llm.builder.BuildConfig] | null | - |
cache_transceiver_config | CacheTransceiverConfig | None | null | - |
calib_config | CalibConfig | None | null | - |
context_parallel_size | integer | 1 | - |
cp_config | object | None | null | - |
decoding_config | Optional[DecodingConfig] | null | - |
dtype | string | auto | yes |
embedding_parallel_mode | string | SHARDING_ALONG_VOCAB | - |
enable_attention_dp | boolean | False | - |
enable_build_cache | Union[tensorrt_llm.llmapi.build_cache.BuildCacheConfig, bool] | False | - |
enable_chunked_prefill | boolean | False | - |
enable_lora | boolean | False | - |
enable_prompt_adapter | boolean | False | - |
enable_tqdm | boolean | False | - |
extended_runtime_perf_knob_config | ExtendedRuntimePerfKnobConfig | None | null | - |
fast_build | boolean | False | yes |
garbage_collection_gen0_threshold | integer | 20000 | - |
gather_generation_logits | boolean | False | - |
gpus_per_node | integer | None | null | - |
guided_decoding_backend | string | None | null | - |
iter_stats_max_iterations | integer | None | null | - |
kv_cache_config | KvCacheConfig | null | - |
load_format | Literal['auto', 'dummy'] | auto | - |
lora_config | LoraConfig | None | null | - |
max_batch_size | integer | None | null | yes |
max_beam_width | integer | None | null | - |
max_cpu_loras | integer | 4 | - |
max_input_len | integer | None | null | yes |
max_lora_rank | integer | None | null | - |
max_loras | integer | 4 | - |
max_num_tokens | integer | None | null | yes |
max_prompt_adapter_token | integer | 0 | - |
max_seq_len | integer | None | null | yes |
model | string | null | - |
moe_cluster_parallel_size | integer | None | null | - |
moe_expert_parallel_size | integer | None | null | - |
moe_tensor_parallel_size | integer | None | null | - |
normalize_log_probs | boolean | False | - |
num_postprocess_workers | integer | 0 | - |
peft_cache_config | PeftCacheConfig | None | null | - |
pipeline_parallel_size | integer | 1 | yes |
postprocess_tokenizer_dir | string | None | null | - |
quant_config | QuantConfig | None | null | - |
reasoning_parser | string | None | null | - |
request_stats_max_iterations | integer | None | null | - |
revision | string | None | null | - |
scheduler_config | SchedulerConfig | null | - |
skip_tokenizer_init | boolean | False | - |
speculative_config | LookaheadDecodingConfig | MedusaDecodingConfig | EagleDecodingConfig | MTPDecodingConfig | NGramDecodingConfig | DraftTargetDecodingConfig | None | null | - |
tensor_parallel_size | integer | 1 | yes |
tokenizer | string | None | null | - |
tokenizer_mode | Literal['auto', 'slow'] | auto | - |
tokenizer_revision | string | None | null | - |
trust_remote_code | boolean | False | - |
workspace | string | None | null | - |
Sampling Parameters
| Field | Type | Default | Curated? |
|---|---|---|---|
add_special_tokens | bool | True | - |
additional_model_outputs | list[AdditionalModelOutput] | None | null | - |
apply_batched_logits_processor | bool | False | - |
bad | str | list[str] | None | null | - |
bad_token_ids | list[int] | None | null | - |
beam_search_diversity_rate | float | None | null | - |
beam_width_array | list[int] | None | null | - |
best_of | int | None | null | - |
detokenize | bool | True | - |
early_stopping | int | None | null | - |
embedding_bias | Tensor | None | null | - |
end_id | int | None | null | - |
exclude_input_from_output | bool | True | - |
frequency_penalty | float | None | null | - |
guided_decoding | GuidedDecodingParams | None | null | - |
ignore_eos | bool | False | yes |
include_stop_str_in_output | bool | False | - |
length_penalty | float | None | null | - |
logits_processor | LogitsProcessor | list[LogitsProcessor] | None | null | - |
logprobs | int | None | null | - |
lookahead_config | LookaheadDecodingConfig | None | null | - |
max_tokens | int | 32 | yes |
min_p | float | None | null | yes |
min_tokens | int | None | null | yes |
n | int | 1 | yes |
no_repeat_ngram_size | int | None | null | - |
pad_id | int | None | null | - |
presence_penalty | float | None | null | - |
prompt_logprobs | int | None | null | - |
repetition_penalty | float | None | null | yes |
return_context_logits | bool | False | - |
return_encoder_output | bool | False | - |
return_generation_logits | bool | False | - |
return_perf_metrics | bool | False | - |
seed | int | None | null | - |
skip_special_tokens | bool | True | - |
spaces_between_special_tokens | bool | True | - |
stop | str | list[str] | None | null | - |
stop_token_ids | list[int] | None | null | - |
temperature | float | None | null | yes |
top_k | int | None | null | yes |
top_p | float | None | null | yes |
top_p_decay | float | None | null | - |
top_p_min | float | None | null | - |
top_p_reset_ids | int | None | null | - |
truncate_prompt_tokens | int | None | null | - |
use_beam_search | bool | False | - |