Skip to main content
Agents can return structured data in a specific format using Pydantic models, ensuring type safety and validation.

Overview

By default, agents return unstructured text answers:
overview_default.py
result = agent.run(task="Find the contact email")
print(result.answer)  # "contact@example.com"
With structured output, define the exact format you want:
overview_structured.py
agent = client.Agent(session=session)
result = agent.run(task="Find contact info", response_format=ContactInfo)
if result.success and result.answer:
    contact = ContactInfo.model_validate_json(result.answer)
    print(contact.email)  # Type-safe access

Basic Example

Define a Pydantic model and pass it to response_format:
from notte_sdk import NotteClient
from pydantic import BaseModel

client = NotteClient()


class Product(BaseModel):
    name: str
    price: float
    in_stock: bool


with client.Session() as session:
    agent = client.Agent(session=session)

    result = agent.run(
        task="Extract product information",
        url="https://example.com/product/123",
        response_format=Product,
    )

    # Parse and access
    if result.success and result.answer:
        product = Product.model_validate_json(result.answer)
        print(f"{product.name}: ${product.price}")
        if product.in_stock:
            print("Available!")

Complex Models

Nested Structures

Models can contain nested objects:
nested_structures.py
agent = client.Agent(session=session)
result = agent.run(task="Extract company information", response_format=Company)

if result.success and result.answer:
    company = Company.model_validate_json(result.answer)
    print(company.name)
    print(company.address.city)

Lists of Objects

Extract multiple items:
lists_of_objects.py
from notte_sdk import NotteClient
from pydantic import BaseModel


class Review(BaseModel):
    author: str
    rating: int
    comment: str


class ReviewList(BaseModel):
    reviews: list[Review]


client = NotteClient()
with client.Session() as session:
    agent = client.Agent(session=session)
    result = agent.run(
        task="Extract all product reviews",
        response_format=ReviewList,
    )

    # Iterate over reviews
    if result.success and result.answer:
        data = ReviewList.model_validate_json(result.answer)
        for review in data.reviews:
            print(f"{review.author}: {review.rating}/5")
            print(review.comment)

Optional Fields

Use None for optional fields:
from pydantic import BaseModel


class Article(BaseModel):
    title: str
    author: str | None  # May not always be present
    date: str | None
    content: str

Use Cases

E-commerce Data Extraction

Extract structured product data:
ecommerce_extraction.py
class ProductListing(BaseModel):
    name: str
    price: float
    original_price: float | None
    rating: float
    review_count: int
    availability: str
    seller: str


client = NotteClient()
with client.Session() as session:
    agent = client.Agent(session=session)
    result = agent.run(
        task="Extract product listing information",
        url="https://store.example.com/products/laptop",
        response_format=ProductListing,
    )

Lead Generation

Extract structured contact information:
lead_generation.py
class BusinessLead(BaseModel):
    company_name: str
    contact_email: str | None
    phone: str | None
    website: str
    industry: str
    employee_count: str | None


client = NotteClient()
with client.Session() as session:
    agent = client.Agent(session=session)
    result = agent.run(
        task="Extract business information from this company page",
        url="https://example.com/about",
        response_format=BusinessLead,
    )

Job Listings

Extract job posting details:
job_listings.py
class JobPosting(BaseModel):
    title: str
    company: str
    location: str
    salary_range: str | None
    job_type: str  # "Full-time", "Part-time", etc.
    posted_date: str
    requirements: list[str]


client = NotteClient()
with client.Session() as session:
    agent = client.Agent(session=session)
    result = agent.run(
        task="Extract job posting information",
        response_format=JobPosting,

Social Media Data

Extract social media profiles:
social_media.py
    username: str
    display_name: str
    bio: str | None
    follower_count: int
    following_count: int
    post_count: int
    verified: bool


client = NotteClient()
with client.Session() as session:
    agent = client.Agent(session=session)
    result = agent.run(
        task="Extract social media profile information",
        response_format=SocialProfile,

Field Validation

Use Pydantic validators for data quality:
field_validation.py
class Product(BaseModel):
    name: str = Field(min_length=1)
    price: float = Field(gt=0)  # Must be positive
    rating: float = Field(ge=0, le=5)  # 0-5 range

    @field_validator("price")
    @classmethod
    def validate_price(cls, v: float) -> float:
        if v > 10000:
            raise ValueError("Price seems unreasonably high")
        return v


client = NotteClient()
with client.Session() as session:
    agent = client.Agent(session=session)
    result = agent.run(
        task="Extract product",
        response_format=Product,

Best Practices

1. Be Specific in Task Description

Match your task to the response format:
bp_specific_task.py
result = agent.run(
    task="Extract the product name, price, and stock status",
    response_format=Product,
)

# Less clear - agent might not fill all fields
result = agent.run(
    task="Tell me about this product",
    response_format=Product,
)

2. Use Appropriate Types

Choose Python types that match the data:
from datetime import date

from pydantic import BaseModel


class Event(BaseModel):
    title: str
    date: date  # Will be parsed as date
    price: float  # Not str
    attendee_count: int  # Not float

3. Make Optional Fields Explicit

Don’t assume data will always be present:
from pydantic import BaseModel


class Product(BaseModel):
    name: str  # Always required
    price: float  # Always required
    discount: float | None = None  # Might not exist
    rating: float | None = None  # Might not exist

4. Use Field Descriptions

Help the agent understand what you want:
from pydantic import BaseModel, Field


class Product(BaseModel):
    name: str = Field(description="Product title/name")
    price: float = Field(description="Current selling price in USD")
    original_price: float | None = Field(description="Original price before discount, if any")

5. Start Simple, Then Expand

Begin with basic models:
from pydantic import BaseModel


# Start with minimal model
class Product(BaseModel):
    name: str
    price: float


# Add fields as needed
class DetailedProduct(BaseModel):
    name: str
    price: float
    description: str | None
    specs: dict[str, str] | None

Error Handling

Handle validation errors:
error_handling.py
try:
    result = agent.run(
        task="Extract product data",
        response_format=Product,
    )
    product = result.answer
except ValidationError as e:
    print(f"Agent returned invalid data: {e}")

Limitations

Not Suitable For

Structured output works best for data extraction, not for:
  • Open-ended creative tasks
  • Tasks requiring explanation or reasoning
  • When you want natural language responses
limitations_not_suitable.py
with client.Session() as session:
    agent = client.Agent(session=session)
    # Do this instead
    result = agent.run(task="Explain how this product works")
    print(result.answer)  # Natural language explanation

Complex Relationships

Very complex nested structures may be challenging:
limitations_complex.py
class Product(BaseModel):
    name: str
    price: float


# Difficult for agents
class ComplexStructure(BaseModel):
    nested: dict[str, list[dict[str, Product]]]


# Better - flatten or simplify
class SimplifiedStructure(BaseModel):
    products: list[Product]
    categories: list[str]

Next Steps