from predibase import AugmentationConfig
# Create an augmentation configuration
config = AugmentationConfig(
base_model="gpt-4-turbo", # Required: The OpenAI model to use
num_samples_to_generate=500, # Optional: Number of synthetic examples to generate
num_seed_samples=10, # Optional: Number of seed samples to use
augmentation_strategy="mixture_of_agents", # Optional: Augmentation strategy
task_context="Generate diverse examples for customer service questions" # Optional: Task context
)
# Get the source dataset
source_dataset = pb.datasets.get("customer_questions")
# Augment the dataset
augmented_dataset = pb.datasets.augment(
config=config,
dataset=source_dataset,
name="augmented_customer_questions",
openai_api_key="your-openai-api-key" # Optional: If not set in environment
)
# Print dataset details
print(f"Created augmented dataset: {augmented_dataset.name}")
print(f"Number of rows: {augmented_dataset.num_rows}")
print(f"Created at: {augmented_dataset.created_at}")