> ## Documentation Index
> Fetch the complete documentation index at: https://docs.notte.cc/llms.txt
> Use this file to discover all available pages before exploring further.

# Scrape From Html


## OpenAPI

````yaml https://api.notte.cc/openapi.json post /scrape_from_html
openapi: 3.1.0
info:
  title: Notte API
  description: >-
    Notte API is a REST API that allows you to interact with Notte. It is used
    to create cloud browser sessions, scrape webpages, and run web ai agents to
    act on your behalf on the internet.
  version: 1.4.40
  x-logo:
    url: https://www.notte.cc/images/logo/logo-white.png
servers: []
security: []
tags:
  - name: agents
    description: Web AI agents (start, stop, status, replay, etc.)
  - name: sessions
    description: Session management (start, stop, status, etc.)
  - name: debug
    description: Session debugging tools (replay,logs, recording, etc.)
  - name: page
    description: Page operations withing a session (observe, step, scrape, etc.)
  - name: storage
    description: File storage interface (upload, download, list, etc.)
  - name: network
    description: Network requests/responses withing a session (intercept, etc.)
  - name: vaults
    description: >-
      Vault & Credentials management (create/delete vaults, create/delete
      credentials, etc.)
  - name: personas
    description: Persona management (create, delete, list emails, list sms, etc.)
  - name: scrape
    description: >-
      Webpage scraping (scrape, screenshot, etc.) with automatic session
      management.
  - name: health
    description: Health check endpoint.
  - name: usage
    description: Usage logs (usage, logs, etc.)
  - name: functions
    description: Functions management (create, delete, list, etc.)
paths:
  /scrape_from_html:
    post:
      tags:
        - scrape
      summary: Scrape From Html
      operationId: scrape_from_html
      parameters:
        - name: x-notte-request-origin
          in: header
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: X-Notte-Request-Origin
        - name: x-notte-sdk-version
          in: header
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: X-Notte-Sdk-Version
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ScrapeFromHtmlRequest'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ScrapeSchemaResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - OAuth2PasswordBearer: []
components:
  schemas:
    ScrapeFromHtmlRequest:
      properties:
        selector:
          anyOf:
            - type: string
            - type: 'null'
          title: Selector
          description: >-
            Playwright selector to scope the scrape to. Only content inside this
            selector will be scraped.
        scrape_links:
          type: boolean
          title: Scrape Links
          description: Whether to scrape links from the page. Links are scraped by default.
          default: true
        scrape_images:
          type: boolean
          title: Scrape Images
          description: >-
            Whether to scrape images from the page. Images are scraped by
            default.
          default: false
        ignored_tags:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Ignored Tags
          description: HTML tags to ignore from the page
        only_main_content:
          type: boolean
          title: Only Main Content
          description: >-
            Whether to only scrape the main content of the page. If True,
            navbars, footers, etc. are excluded.
          default: false
        only_images:
          type: boolean
          title: Only Images
          description: >-
            Whether to only scrape images from the page. If True, the page
            content is excluded.
          default: false
        response_format:
          anyOf:
            - {}
            - type: 'null'
          title: Response Format
          description: >-
            The response format to use for the scrape. You can use a Pydantic
            model or a JSON Schema dict (cf.
            https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema.)
        instructions:
          type: string
          title: Instructions
          description: User description as to what needs to be scraped
          default: ''
        use_link_placeholders:
          type: boolean
          title: Use Link Placeholders
          description: >-
            Whether to use link/image placeholders to reduce the number of
            tokens in the prompt and hallucinations. However this is an
            experimental feature and might not work as expected.
          default: false
        frames:
          items:
            $ref: '#/components/schemas/FrameData'
          type: array
          title: Frames
          description: >-
            Frame data for all frames found in the page. The main frame has to
            be the first one
      additionalProperties: false
      type: object
      title: ScrapeFromHtmlRequest
    ScrapeSchemaResponse:
      properties:
        model_schema:
          $ref: '#/components/schemas/SchemaGenerationResponse'
          description: Output schema generated by the llm according to the instructions
        scrape:
          anyOf:
            - $ref: '#/components/schemas/StructuredData_BaseModel_'
            - type: 'null'
          description: Output from the scrape, validated upon the model schema
      type: object
      required:
        - model_schema
        - scrape
      title: ScrapeSchemaResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    FrameData:
      properties:
        frameUrl:
          type: string
          title: Frameurl
          description: URL of the frame
        frameData:
          type: string
          title: Framedata
          description: DOM Content of the frame
      type: object
      required:
        - frameUrl
        - frameData
      title: FrameData
    SchemaGenerationResponse:
      properties:
        success:
          type: boolean
          title: Success
        model_schema:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Model Schema
        error:
          anyOf:
            - type: string
            - type: 'null'
          title: Error
      type: object
      required:
        - success
      title: SchemaGenerationResponse
    StructuredData_BaseModel_:
      properties:
        success:
          type: boolean
          title: Success
          description: Whether the data was extracted successfully
          default: true
        error:
          anyOf:
            - type: string
            - type: 'null'
          title: Error
          description: Error message if the data was not extracted successfully
        data:
          anyOf:
            - $ref: '#/components/schemas/BaseModel'
            - $ref: '#/components/schemas/RootModel_Any_'
            - type: 'null'
          title: Data
          description: Structured data extracted from the page in JSON format
      type: object
      title: StructuredData[BaseModel]
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    BaseModel:
      properties: {}
      type: object
      title: BaseModel
    RootModel_Any_:
      title: RootModel[Any]
  securitySchemes:
    OAuth2PasswordBearer:
      type: oauth2
      flows:
        password:
          scopes: {}
          tokenUrl: token

````