GET
/
metrics
Metrics
curl --request GET \
  --url https://serving.app.predibase.com/tenant_id/deployments/v2/llms/deployment_name/metrics
{
  "lorax_request_count": 123,
  "lorax_request_skipped_tokens": 123,
  "lorax_queue_length": 123,
  "lorax_batch_inference_count": 123,
  "lorax_request_max_new_tokens": {},
  "lorax_request_inference_duration": {},
  "lorax_request_mean_time_per_token_duration": {},
  "lorax_request_generated_tokens": {},
  "lorax_request_success": 123,
  "lorax_batch_next_size": {},
  "lorax_request_failure": 123,
  "lorax_request_input_length": {},
  "lorax_batch_current_size": {},
  "lorax_batch_inference_success": 123,
  "lorax_batch_inference_duration": {},
  "lorax_request_queue_duration": {},
  "lorax_request_duration": {},
  "lorax_request_validation_duration": {}
}

Response

200 - text/plain

Metrics Response

lorax_request_count
integer
lorax_request_skipped_tokens
integer
lorax_queue_length
number
lorax_batch_inference_count
integer
lorax_request_max_new_tokens
object
lorax_request_inference_duration
object
lorax_request_mean_time_per_token_duration
object
lorax_request_generated_tokens
object
lorax_request_success
integer
lorax_batch_next_size
object
lorax_request_failure
number
lorax_request_input_length
object
lorax_batch_current_size
object
lorax_batch_inference_success
integer
lorax_batch_inference_duration
object
lorax_request_queue_duration
object
lorax_request_duration
object
lorax_request_validation_duration
object