from predibase import Predibase# Initialize clientpb = Predibase(api_token="<PREDIBASE_API_TOKEN>")# Get deployment clientclient = pb.deployments.client("my-deployment") # or use a shared endpoint like "qwen3-8b"# Generate textresponse = client.generate( "What is machine learning?", max_new_tokens=100, temperature=0.7)print(response.generated_text)# Stream responsesfor response in client.generate_stream( "Write a story about a robot learning to paint.", max_new_tokens=200): print(response.token.text, end="", flush=True)
Function calling allows models to interact with external tools and APIs in a
structured way. To do function calling with Predibase deployments and/or adapters, define your functions and
include them in your requests with the
OpenAI Chat Completions v1 SDK method:
Predibase endpoints allow you to enforce that responses contain only valid JSON
and adhere to a provided schema.The schema can be provided either using JSON Schema
(REST, Python) or Pydantic (Python).
from pydantic import BaseModel, constrfrom predibase import Predibaseimport json# Initialize Predibase clientpb = Predibase(api_token="<PREDIBASE_API_TOKEN>")# Define a schema for the responseclass Character(BaseModel): name: constr(max_length=10) age: int strength: int# Get a handle to the base LLM deploymentclient = pb.deployments.client("qwen3-8b")# Generate a response that adheres to the schemaresponse = client.generate( "Generate a new character for my awesome game. Strength 1-10.", response_format={ "type": "json_object", "schema": Character.model_json_schema(), }, max_new_tokens=128,)# Load the response as JSON and init an object of the desired schemaresponse_json = json.loads(response.generated_text)my_character = Character(**response_json)