Introduction
Inference service
Management service
Compatibility Services
Common endpoints
Get model metadata
This endpoint gives you the metadata of the model currently used by the agent.
Endpoint
Method: GET
Path: /api/v1/agent/{agent_id}/model_metadataResponse
An example response. The tokenizer.chat_template field holds the model's full chat template, which is very long, so it is truncated below:
{
"metadata": {
"general.architecture": "qwen35",
"general.base_model.0.name": "Qwen3.5 0.8B",
"general.base_model.0.organization": "Qwen",
"general.base_model.0.repo_url": "https://huggingface.co/Qwen/Qwen3.5-0.8B",
"general.base_model.count": "1",
"general.basename": "Qwen3.5-0.8B",
"general.file_type": "15",
"general.license": "apache-2.0",
"general.license.link": "https://huggingface.co/Qwen/Qwen3.5-0.8B/blob/main/LICENSE",
"general.name": "Qwen3.5-0.8B",
"general.quantization_version": "2",
"general.quantized_by": "Unsloth",
"general.repo_url": "https://huggingface.co/unsloth",
"general.size_label": "0.8B",
"general.type": "model",
"quantize.imatrix.chunks_count": "80",
"quantize.imatrix.dataset": "unsloth_calibration_Qwen3.5-0.8B.txt",
"quantize.imatrix.entries_count": "186",
"quantize.imatrix.file": "Qwen3.5-0.8B-GGUF/imatrix_unsloth.gguf",
"qwen35.attention.head_count": "8",
"qwen35.attention.head_count_kv": "2",
"qwen35.attention.key_length": "256",
"qwen35.attention.layer_norm_rms_epsilon": "0.000001",
"qwen35.attention.value_length": "256",
"qwen35.block_count": "24",
"qwen35.context_length": "262144",
"qwen35.embedding_length": "1024",
"qwen35.feed_forward_length": "3584",
"qwen35.full_attention_interval": "4",
"qwen35.rope.dimension_count": "64",
"qwen35.rope.freq_base": "10000000.000000",
"qwen35.ssm.conv_kernel": "4",
"qwen35.ssm.group_count": "16",
"qwen35.ssm.inner_size": "2048",
"qwen35.ssm.state_size": "128",
"qwen35.ssm.time_step_rank": "16",
"tokenizer.chat_template": "{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n... (truncated for brevity)",
"tokenizer.ggml.eos_token_id": "248046",
"tokenizer.ggml.model": "gpt2",
"tokenizer.ggml.padding_token_id": "248055",
"tokenizer.ggml.pre": "qwen35"
}
}