Prometheus metrics scrape endpoint
curl --request GET \
--url https://serving.app.predibase.com/tenant_id/deployments/v2/llms/deployment_name/metrics
{
"lorax_request_count": 123,
"lorax_request_skipped_tokens": 123,
"lorax_queue_length": 123,
"lorax_batch_inference_count": 123,
"lorax_request_max_new_tokens": {},
"lorax_request_inference_duration": {},
"lorax_request_mean_time_per_token_duration": {},
"lorax_request_generated_tokens": {},
"lorax_request_success": 123,
"lorax_batch_next_size": {},
"lorax_request_failure": 123,
"lorax_request_input_length": {},
"lorax_batch_current_size": {},
"lorax_batch_inference_success": 123,
"lorax_batch_inference_duration": {},
"lorax_request_queue_duration": {},
"lorax_request_duration": {},
"lorax_request_validation_duration": {}
}
Metrics Response
The response is of type object
.
curl --request GET \
--url https://serving.app.predibase.com/tenant_id/deployments/v2/llms/deployment_name/metrics
{
"lorax_request_count": 123,
"lorax_request_skipped_tokens": 123,
"lorax_queue_length": 123,
"lorax_batch_inference_count": 123,
"lorax_request_max_new_tokens": {},
"lorax_request_inference_duration": {},
"lorax_request_mean_time_per_token_duration": {},
"lorax_request_generated_tokens": {},
"lorax_request_success": 123,
"lorax_batch_next_size": {},
"lorax_request_failure": 123,
"lorax_request_input_length": {},
"lorax_batch_current_size": {},
"lorax_batch_inference_success": 123,
"lorax_batch_inference_duration": {},
"lorax_request_queue_duration": {},
"lorax_request_duration": {},
"lorax_request_validation_duration": {}
}
Prometheus metrics scrape endpoint
curl --request GET \
--url https://serving.app.predibase.com/tenant_id/deployments/v2/llms/deployment_name/metrics
{
"lorax_request_count": 123,
"lorax_request_skipped_tokens": 123,
"lorax_queue_length": 123,
"lorax_batch_inference_count": 123,
"lorax_request_max_new_tokens": {},
"lorax_request_inference_duration": {},
"lorax_request_mean_time_per_token_duration": {},
"lorax_request_generated_tokens": {},
"lorax_request_success": 123,
"lorax_batch_next_size": {},
"lorax_request_failure": 123,
"lorax_request_input_length": {},
"lorax_batch_current_size": {},
"lorax_batch_inference_success": 123,
"lorax_batch_inference_duration": {},
"lorax_request_queue_duration": {},
"lorax_request_duration": {},
"lorax_request_validation_duration": {}
}
Metrics Response
The response is of type object
.
curl --request GET \
--url https://serving.app.predibase.com/tenant_id/deployments/v2/llms/deployment_name/metrics
{
"lorax_request_count": 123,
"lorax_request_skipped_tokens": 123,
"lorax_queue_length": 123,
"lorax_batch_inference_count": 123,
"lorax_request_max_new_tokens": {},
"lorax_request_inference_duration": {},
"lorax_request_mean_time_per_token_duration": {},
"lorax_request_generated_tokens": {},
"lorax_request_success": 123,
"lorax_batch_next_size": {},
"lorax_request_failure": 123,
"lorax_request_input_length": {},
"lorax_batch_current_size": {},
"lorax_batch_inference_success": 123,
"lorax_batch_inference_duration": {},
"lorax_request_queue_duration": {},
"lorax_request_duration": {},
"lorax_request_validation_duration": {}
}