TensorRT-LLM Parameter Curation
Engine version: 1.0.0
Discovered at: 2026-06-22 20:11:44.265856+00:00
Discovery method: TrtLlmArgs.model_json_schema() + dataclasses.fields(SamplingParams)
Summary: 20/104 parameters curated (57 engine + 47 sampling discovered)
Engine Parameters
| Field | Type | Default | Curated? |
|---|---|---|---|
auto_parallel | boolean | False | - |
auto_parallel_world_size | integer | None | null | - |
backend | string | None | null | yes |
batched_logits_processor | Optional[tensorrt_llm.sampling_params.BatchedLogitsProcessor] | null | - |
batching_type | BatchingType | None | null | - |
build_config | unknown | null | - |
cache_transceiver_config | CacheTransceiverConfig | None | null | - |
calib_config | CalibConfig | None | null | - |
context_parallel_size | integer | 1 | - |
cp_config | object | None | null | - |
decoding_config | Optional[tensorrt_llm.llmapi.llm_args.DecodingConfig] | null | - |
dtype | string | auto | yes |
embedding_parallel_mode | string | SHARDING_ALONG_VOCAB | - |
enable_attention_dp | boolean | False | - |
enable_build_cache | Union[tensorrt_llm.llmapi.build_cache.BuildCacheConfig, bool] | False | - |
enable_chunked_prefill | boolean | False | - |
enable_lora | boolean | False | - |
enable_prompt_adapter | boolean | False | - |
enable_tqdm | boolean | False | - |
extended_runtime_perf_knob_config | ExtendedRuntimePerfKnobConfig | None | null | - |
fail_fast_on_attention_window_too_large | boolean | False | - |
fast_build | boolean | False | yes |
gather_generation_logits | boolean | False | - |
gpus_per_node | integer | None | null | - |
guided_decoding_backend | string | None | null | - |
iter_stats_max_iterations | integer | None | null | - |
kv_cache_config | KvCacheConfig | null | yes |
load_format | Literal['auto', 'dummy'] | auto | - |
lora_config | LoraConfig | None | null | - |
max_batch_size | integer | None | null | yes |
max_beam_width | integer | None | null | - |
max_input_len | integer | None | null | yes |
max_num_tokens | integer | None | null | yes |
max_prompt_adapter_token | integer | 0 | - |
max_seq_len | integer | None | null | yes |
model | string | null | - |
moe_cluster_parallel_size | integer | None | null | - |
moe_expert_parallel_size | integer | None | null | - |
moe_tensor_parallel_size | integer | None | null | - |
normalize_log_probs | boolean | False | - |
num_postprocess_workers | integer | 0 | - |
peft_cache_config | PeftCacheConfig | None | null | - |
pipeline_parallel_size | integer | 1 | yes |
postprocess_tokenizer_dir | string | None | null | - |
quant_config | QuantConfig | None | null | yes |
reasoning_parser | string | None | null | - |
request_stats_max_iterations | integer | None | null | - |
revision | string | None | null | - |
scheduler_config | SchedulerConfig | null | yes |
skip_tokenizer_init | boolean | False | - |
speculative_config | DraftTargetDecodingConfig | EagleDecodingConfig | LookaheadDecodingConfig | MedusaDecodingConfig | MTPDecodingConfig | NGramDecodingConfig | UserProvidedDecodingConfig | AutoDecodingConfig | None | null | - |
tensor_parallel_size | integer | 1 | yes |
tokenizer | string | None | null | - |
tokenizer_mode | Literal['auto', 'slow'] | auto | - |
tokenizer_revision | string | None | null | - |
trust_remote_code | boolean | False | - |
workspace | string | None | null | - |
Sampling Parameters
| Field | Type | Default | Curated? |
|---|---|---|---|
add_special_tokens | bool | True | - |
additional_model_outputs | list[AdditionalModelOutput] | None | null | - |
apply_batched_logits_processor | bool | False | - |
bad | str | list[str] | None | null | - |
bad_token_ids | list[int] | None | null | - |
beam_search_diversity_rate | float | None | null | - |
beam_width_array | list[int] | None | null | - |
best_of | int | None | null | - |
detokenize | bool | True | - |
early_stopping | int | None | null | - |
embedding_bias | Tensor | None | null | - |
end_id | int | None | null | - |
exclude_input_from_output | bool | True | - |
frequency_penalty | float | None | null | - |
guided_decoding | GuidedDecodingParams | None | null | - |
ignore_eos | bool | False | yes |
include_stop_str_in_output | bool | False | - |
length_penalty | float | None | null | - |
logits_processor | LogitsProcessor | list[LogitsProcessor] | None | null | - |
logprobs | int | None | null | - |
lookahead_config | LookaheadDecodingConfig | None | null | - |
max_tokens | int | 32 | - |
min_p | float | None | null | yes |
min_tokens | int | None | null | yes |
n | int | 1 | yes |
no_repeat_ngram_size | int | None | null | - |
pad_id | int | None | null | - |
presence_penalty | float | None | null | - |
prompt_logprobs | int | None | null | - |
repetition_penalty | float | None | null | yes |
return_context_logits | bool | False | - |
return_encoder_output | bool | False | - |
return_generation_logits | bool | False | - |
return_perf_metrics | bool | False | - |
seed | int | None | null | - |
skip_special_tokens | bool | True | - |
spaces_between_special_tokens | bool | True | - |
stop | str | list[str] | None | null | - |
stop_token_ids | list[int] | None | null | - |
temperature | float | None | null | yes |
top_k | int | None | null | yes |
top_p | float | None | null | yes |
top_p_decay | float | None | null | - |
top_p_min | float | None | null | - |
top_p_reset_ids | int | None | null | - |
truncate_prompt_tokens | int | None | null | - |
use_beam_search | bool | False | - |