GET
/
info
Info
curl --request GET \
  --url https://serving.app.predibase.com/tenant_id/deployments/v2/llms/deployment_name/info
{
  "docker_label": "null",
  "max_batch_total_tokens": "32000",
  "max_best_of": "2",
  "max_concurrent_requests": "128",
  "max_input_length": "1024",
  "max_stop_sequences": "4",
  "max_total_tokens": "2048",
  "max_waiting_tokens": "20",
  "model_device_type": "cuda",
  "model_dtype": "torch.float16",
  "model_id": "bigscience/blomm-560m",
  "model_pipeline_tag": "lorax",
  "model_sha": "e985a63cdc139290c5f700ff1929f0b5942cced2",
  "sha": "null",
  "validation_workers": "2",
  "version": "0.5.0",
  "waiting_served_ratio": "1.2"
}

Response

200 - application/json

Info Response

max_batch_total_tokens
integer
required
Required range: x >= 0
Example:

"32000"

max_best_of
integer
required
Required range: x >= 0
Example:

"2"

max_concurrent_requests
integer
required

Router Parameters

Required range: x >= 0
Example:

"128"

max_input_length
integer
required
Required range: x >= 0
Example:

"1024"

max_stop_sequences
integer
required
Required range: x >= 0
Example:

"4"

max_total_tokens
integer
required
Required range: x >= 0
Example:

"2048"

max_waiting_tokens
integer
required
Required range: x >= 0
Example:

"20"

model_device_type
string
required
Example:

"cuda"

model_dtype
string
required
Example:

"torch.float16"

model_id
string
required

Model info

Example:

"bigscience/blomm-560m"

validation_workers
integer
required
Required range: x >= 0
Example:

"2"

version
string
required

Router Info

Example:

"0.5.0"

waiting_served_ratio
number
required
Example:

"1.2"

docker_label
string | null
Example:

null

model_pipeline_tag
string | null
Example:

"lorax"

model_sha
string | null
Example:

"e985a63cdc139290c5f700ff1929f0b5942cced2"

sha
string | null
Example:

null