Gemini 2.5 Flash TTS 文本转语音

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

POST

gemini-2.5-flash-tts

Gemini 2.5 Flash TTS 文本转语音

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

基于 Google Vertex AI generateContent 接口的 Gemini 2.5 Flash TTS。支持同步和流式单人/多人语音合成，通过自然语言提示词精确控制风格、口音、节奏、语调和情感表达。contents 字段最大 8000 字节，输出音频最长约 655 秒。Vertex AI 输出为 LINEAR16 PCM 格式（24kHz, 单声道），不包含 WAV 头。如需其他音频格式需客户端自行转换。

请求头

string

必填

枚举值: application/json

string

必填

Bearer 身份验证格式: Bearer {{API 密钥}}。

请求体

object

必填

隐藏 properties

string

默认值:"user"

必填

角色，固定为 user可选值：user

object

必填

隐藏 properties

string

必填

要合成为语音的文本内容。Vertex AI API 将提示词和文本合并在一个字段中，格式为 ’: ‘，例如 ‘Say the following in a curious way: OK, so… tell me about this AI thing.’。总大小最多 8000 字节，超出 655 秒的音频将被截断。支持内联标记标签：[sigh]、[laughing]、[uhm]、[sarcasm]、[robotic]、[shouting]、[whispering]、[extremely fast]、[short pause]、[medium pause]、[long pause]长度限制：0 - 8000

object

必填

隐藏 properties

number

默认值:2

温度参数，控制语音生成的随机性和创造性。值越高越有创意和多样性，值越低越可预测和集中。有效范围 (0.0, 2.0]，推荐值为 2.0取值范围：[0, 2]

object

必填

隐藏 properties

object

单人语音配置。与 multi_speaker_voice_config 二选一

隐藏 properties

object

隐藏 properties

string

预置语音名称（大小写不敏感）。可选的 30 个语音（男女声均有）可选值：Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe, Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus, Kore, Laomedeia, Leda, Orus, Pulcherrima, Puck, Rasalgethi, Sadachbia, Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi

string

语言代码（BCP-47 格式，大小写不敏感）。可选字段；不传时将根据输入文本自动识别语言。GA 语言：ar-EG, bn-BD, nl-NL, en-IN, en-US, fr-FR, de-DE, hi-IN, id-ID, it-IT, ja-JP, ko-KR, mr-IN, pl-PL, pt-BR, ro-RO, ru-RU, es-ES, ta-IN, te-IN, th-TH, tr-TR, uk-UA, vi-VN。Preview 语言包括 cmn-CN（中文普通话）等 63 种可选值：af-ZA, am-ET, ar-001, ar-EG, az-AZ, be-BY, bg-BG, bn-BD, ca-ES, ceb-PH, cmn-CN, cmn-TW, cs-CZ, da-DK, de-DE, el-GR, en-AU, en-GB, en-IN, en-US, es-419, es-ES, es-MX, et-EE, eu-ES, fa-IR, fi-FI, fil-PH, fr-CA, fr-FR, gl-ES, gu-IN, he-IL, hi-IN, hr-HR, ht-HT, hu-HU, hy-AM, id-ID, is-IS, it-IT, ja-JP, jv-JV, ka-GE, kn-IN, ko-KR, kok-IN, la-VA, lb-LU, lo-LA, lt-LT, lv-LV, mai-IN, mg-MG, mk-MK, ml-IN, mn-MN, mr-IN, ms-MY, my-MM, nb-NO, ne-NP, nl-NL, nn-NO, or-IN, pa-IN, pl-PL, ps-AF, pt-BR, pt-PT, ro-RO, ru-RU, sd-IN, si-LK, sk-SK, sl-SI, sq-AL, sr-RS, sv-SE, sw-KE, ta-IN, te-IN, th-TH, tr-TR, uk-UA, ur-PK, vi-VN

object

多人语音配置。与 voice_config 二选一。注意：gemini-2.5-flash-lite-preview-tts 不支持多人合成

隐藏 properties

object[]

说话人语音配置列表

隐藏 properties

string

必填

说话人别名，必须仅由字母数字字符组成，不含空格。需与 contents.parts.text 中的说话人标识一致

object

必填

隐藏 properties

object

隐藏 properties

string

响应信息

string

Base64 编码的音频内容。格式为 LINEAR16 PCM（24kHz, 单声道, 16-bit signed little-endian），不包含 WAV 头。客户端可使用 ffmpeg 转换：ffmpeg -f s16le -ar 24k -ac 1 -i input.raw output.wav

object

隐藏 properties

integer

总 token 数量（promptTokenCount + candidatesTokenCount）

integer

输入文本消耗的 token 数量

integer

输出音频消耗的 token 数量（每秒音频约 25 个 token）

Fish Audio 音频复刻

MiniMax Music

API 基础

大语言

图像

视频

音频

Gemini 2.5 Flash TTS 文本转语音

请求头

请求体

响应信息

​请求头

​请求体

​响应信息

请求头

请求体

响应信息