Skip to main content
POST
/
openai
/
chat
/
completions
Create a chat completion
curl --request POST \
  --url https://api.sup.ai/v1/openai/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "You are a helpful assistant.",
      "role": "system"
    },
    {
      "content": "Hello!",
      "role": "user"
    }
  ],
  "environment": {
    "date": "2024-01-15T10:30:00Z",
    "location": {
      "ip_address": "current"
    },
    "user_name": "John Doe"
  },
  "include_supai_chunks": false,
  "model": "auto",
  "models": null,
  "stream": true,
  "stream_options": {
    "include_usage": true
  }
}
'
{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "The capital of France is Paris.",
        "role": "assistant"
      }
    }
  ],
  "created": 1705312200,
  "id": "chatcmpl-abc123",
  "model": "auto",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 150,
    "prompt_tokens": 50,
    "total_tokens": 200
  }
}

Authorizations

Authorization
string
header
required

API key authentication. Use your Supai API key as the bearer token.

Body

application/json
messages
object[]
required

List of messages in the conversation. Must end with a user message.

Minimum array length: 1
Example:
[
  {
    "content": "You are a helpful assistant.",
    "role": "system"
  },
  { "content": "Hello!", "role": "user" }
]
environment
object

User environment context including date, location, and name

include_supai_chunks
boolean
default:false

Whether to include Supai-specific chunk data in the stream

Example:

false

model
enum<string>
default:auto

The mode ID to use for generation. "auto" will automatically select the best mode.

Available options:
auto,
deep-thinking,
expert,
fast,
thinking
Example:

"auto"

models
enum<string>[] | null

Specific model IDs to use. If null, all active models are available.

Available options:
ai21/jamba-large-1.7,
aion-labs/aion-1.0,
aion-labs/aion-1.0-mini,
aion-labs/aion-rp-llama-3.1-8b,
alfredpros/codellama-7b-instruct-solidity,
alibaba/qvq-max,
alibaba/qwen-3-235b,
alibaba/qwen-flash,
alibaba/qwen-flash-character,
alibaba/qwen-max,
alibaba/qwen-mt-flash,
alibaba/qwen-mt-lite,
alibaba/qwen-mt-plus,
alibaba/qwen-mt-turbo,
alibaba/qwen-plus,
alibaba/qwen-plus-character,
alibaba/qwen-plus-character-ja,
alibaba/qwen-plus-thinking,
alibaba/qwen-turbo,
alibaba/qwen-vl-max,
alibaba/qwen-vl-plus,
alibaba/qwen2.5-14b-instruct,
alibaba/qwen2.5-14b-instruct-1m,
alibaba/qwen2.5-32b-instruct,
alibaba/qwen2.5-72b-instruct,
alibaba/qwen2.5-7b-instruct,
alibaba/qwen2.5-7b-instruct-1m,
alibaba/qwen2.5-coder-32b-instruct,
alibaba/qwen2.5-coder-7b-instruct,
alibaba/qwen2.5-vl-32b-instruct,
alibaba/qwen2.5-vl-3b-instruct,
alibaba/qwen2.5-vl-72b-instruct,
alibaba/qwen2.5-vl-7b-instruct,
alibaba/qwen3-0.6b,
alibaba/qwen3-1.7b,
alibaba/qwen3-14b,
alibaba/qwen3-235b-a22b,
alibaba/qwen3-235b-a22b-instruct,
alibaba/qwen3-235b-a22b-instruct-2507,
alibaba/qwen3-235b-a22b-thinking,
alibaba/qwen3-235b-a22b-thinking-2507,
alibaba/qwen3-30b-a3b,
alibaba/qwen3-30b-a3b-instruct,
alibaba/qwen3-30b-a3b-instruct-2507,
alibaba/qwen3-30b-a3b-thinking,
alibaba/qwen3-30b-a3b-thinking-2507,
alibaba/qwen3-32b,
alibaba/qwen3-4b,
alibaba/qwen3-8b,
alibaba/qwen3-coder-30b-a3b,
alibaba/qwen3-coder-30b-a3b-instruct,
alibaba/qwen3-coder-480b-a35b,
alibaba/qwen3-coder-480b-a35b-instruct,
alibaba/qwen3-coder-flash,
alibaba/qwen3-coder-next,
alibaba/qwen3-coder-plus,
alibaba/qwen3-max,
alibaba/qwen3-max-thinking,
alibaba/qwen3-next-80b-a3b-instruct,
alibaba/qwen3-next-80b-a3b-thinking,
alibaba/qwen3-vl-235b-a22b-instruct,
alibaba/qwen3-vl-235b-a22b-thinking,
alibaba/qwen3-vl-30b-a3b-instruct,
alibaba/qwen3-vl-30b-a3b-thinking,
alibaba/qwen3-vl-32b-instruct,
alibaba/qwen3-vl-32b-thinking,
alibaba/qwen3-vl-8b-instruct,
alibaba/qwen3-vl-8b-thinking,
alibaba/qwen3-vl-flash,
alibaba/qwen3-vl-plus,
alibaba/qwen3-vl-thinking,
alibaba/qwen3.5-397b-a17b,
alibaba/qwen3.5-plus,
alibaba/qwq-32b,
alibaba/qwq-plus,
alibaba/tongyi-deepresearch-30b-a3b,
allenai/molmo-2-8b,
allenai/olmo-2-32b-instruct,
allenai/olmo-3-32b-think,
allenai/olmo-3-7b-instruct,
allenai/olmo-3-7b-think,
allenai/olmo-3.1-32b-instruct,
allenai/olmo-3.1-32b-think,
alpindale/goliath-120b,
amazon/nova-2-lite,
amazon/nova-lite,
amazon/nova-micro,
amazon/nova-premier,
amazon/nova-pro,
anthracite/magnum-v4-72b,
anthropic/claude-1.0,
anthropic/claude-1.1,
anthropic/claude-1.2,
anthropic/claude-1.3,
anthropic/claude-2.0,
anthropic/claude-2.1,
anthropic/claude-4.5-haiku,
anthropic/claude-haiku-3,
anthropic/claude-haiku-3.5,
anthropic/claude-instant-1.0,
anthropic/claude-instant-1.1,
anthropic/claude-instant-1.2,
anthropic/claude-opus-3,
anthropic/claude-opus-4,
anthropic/claude-opus-4.1,
anthropic/claude-opus-4.5,
anthropic/claude-opus-4.6,
anthropic/claude-sonnet-3,
anthropic/claude-sonnet-3.5,
anthropic/claude-sonnet-3.7,
anthropic/claude-sonnet-4,
anthropic/claude-sonnet-4.5,
anthropic/claude-sonnet-4.6,
arcee/coder-large,
arcee/maestro-reasoning,
arcee/spotlight,
arcee/trinity-large,
arcee/trinity-mini,
arcee/virtuoso-large,
baidu/ernie-4.5-21b-a3b,
baidu/ernie-4.5-21b-a3b-thinking,
baidu/ernie-4.5-300b-a47b,
baidu/ernie-4.5-vl-28b-a3b,
baidu/ernie-4.5-vl-424b-a47b,
bytedance/seed-1.6,
bytedance/seed-1.6-flash,
bytedance/ui-tars-1.5-7b,
cohere/aya-expanse-32b,
cohere/aya-expanse-8b,
cohere/aya-vision-32b,
cohere/aya-vision-8b,
cohere/command,
cohere/command-a,
cohere/command-a-reasoning,
cohere/command-a-translate,
cohere/command-a-vision,
cohere/command-light,
cohere/command-r,
cohere/command-r-plus,
cohere/command-r7b,
deepcogito/cogito-v2.1-671b,
deepseek/deepseek-r1,
deepseek/deepseek-r1-distill-llama-70b,
deepseek/deepseek-r1-distill-qwen-32b,
deepseek/deepseek-v3,
deepseek/deepseek-v3.1,
deepseek/deepseek-v3.1-terminus,
deepseek/deepseek-v3.2,
deepseek/deepseek-v3.2-exp,
deepseek/deepseek-v3.2-exp-thinking,
deepseek/deepseek-v3.2-speciale,
deepseek/deepseek-v3.2-thinking,
eleutherai/llemma-7b,
essentialai/rnj-1-instruct,
google/gemini-2.0-flash,
google/gemini-2.0-flash-lite,
google/gemini-2.5-flash,
google/gemini-2.5-flash-image,
google/gemini-2.5-flash-lite,
google/gemini-2.5-pro,
google/gemini-3-flash,
google/gemini-3-pro-image,
google/gemini-3-pro-preview,
google/gemini-3.1-pro-preview,
google/gemma-2-27b-it,
google/gemma-2-9b-it,
google/gemma-3-12b-it,
google/gemma-3-27b-it,
google/gemma-3-4b-it,
google/gemma-3n-e2b-it,
google/gemma-3n-e4b-it,
gryphe/mythomax-l2-13b,
ibm/granite-4.0-h-micro,
inception/mercury,
inception/mercury-coder,
inflection/inflection-3-pi,
inflection/inflection-3-productivity,
kwaipilot/kat-coder-pro-v1,
liquid/lfm-2.2-6b,
liquid/lfm-2.5-1.2b-instruct,
liquid/lfm-2.5-1.2b-thinking,
liquid/lfm2-8b-a1b,
meituan/longcat-flash,
meta/llama-3-70b-instruct,
meta/llama-3-8b-instruct,
meta/llama-3.1-405b,
meta/llama-3.1-405b-instruct,
meta/llama-3.1-70b-instruct,
meta/llama-3.1-8b-instruct,
meta/llama-3.2-11b-vision-instruct,
meta/llama-3.2-1b-instruct,
meta/llama-3.2-3b-instruct,
meta/llama-3.3-70b,
meta/llama-4-maverick,
meta/llama-4-scout,
meta/llama-guard-2-8b,
meta/llama-guard-3-8b,
meta/llama-guard-4-12b,
microsoft/phi-4,
microsoft/wizardlm-2-8x22b,
minimax/minimax-01,
minimax/minimax-m1,
minimax/minimax-m2,
minimax/minimax-m2-her,
minimax/minimax-m2.1,
minimax/minimax-m2.1-highspeed,
minimax/minimax-m2.5,
minimax/minimax-m2.5-highspeed,
mistral/codestral,
mistral/devstral,
mistral/devstral-medium,
mistral/devstral-small,
mistral/magistral-medium,
mistral/magistral-small,
mistral/ministral-14b,
mistral/ministral-3b,
mistral/ministral-8b,
mistral/mistral-7b,
mistral/mistral-large,
mistral/mistral-medium,
mistral/mistral-nemo,
mistral/mistral-saba,
mistral/mistral-small,
mistral/mistral-small-creative,
mistral/mistral-tiny,
mistral/mixtral-8x22b,
mistral/mixtral-8x7b,
mistral/pixtral-12b,
mistral/pixtral-large,
mistral/voxtral-mini,
mistral/voxtral-small,
moonshotai/kimi-k2,
moonshotai/kimi-k2-thinking,
moonshotai/kimi-k2-thinking-turbo,
moonshotai/kimi-k2-turbo,
moonshotai/kimi-k2.5,
moonshotai/moonshot-v1-128k,
moonshotai/moonshot-v1-128k-vision,
moonshotai/moonshot-v1-32k,
moonshotai/moonshot-v1-32k-vision,
moonshotai/moonshot-v1-8k,
moonshotai/moonshot-v1-8k-vision,
morph/morph-v3-fast,
morph/morph-v3-large,
neversleep/llama-3.1-lumimaid-8b,
neversleep/noromaid-20b,
nex-agi/deepseek-v3.1-nex-n1,
nousresearch/deephermes-3-mistral-24b,
nousresearch/hermes-2-pro-llama-3-8b,
nousresearch/hermes-3-llama-3.1-405b,
nousresearch/hermes-3-llama-3.1-70b,
nousresearch/hermes-4-405b,
nousresearch/hermes-4-70b,
nvidia/llama-3.1-nemotron-70b-instruct,
nvidia/llama-3.1-nemotron-ultra-253b-v1,
nvidia/llama-3.3-nemotron-super-49b-v1.5,
nvidia/nemotron-3-nano-30b-a3b,
nvidia/nemotron-nano-12b-v2-vl,
nvidia/nemotron-nano-9b-v2,
openai/gpt-5,
openai/gpt-5-mini,
openai/gpt-5-nano,
openai/gpt-5-pro,
openai/gpt-5.1,
openai/gpt-5.1-instant,
openai/gpt-5.1-thinking,
openai/gpt-5.2,
openai/gpt-5.2-pro,
opengvlab/internvl3-78b,
perplexity/sonar,
perplexity/sonar-deep-research,
perplexity/sonar-pro,
perplexity/sonar-pro-search,
perplexity/sonar-reasoning-pro,
prime-intellect/intellect-3,
raifle/sorcererlm-8x22b,
relace/relace-apply-3,
relace/relace-search,
sao10k/l3-euryale-70b,
sao10k/l3-lunaris-8b,
sao10k/l3.1-70b-hanami-x1,
sao10k/l3.1-euryale-70b,
sao10k/l3.3-euryale-70b-v2.3,
stepfun/step-3.5-flash,
tencent/hunyuan-a13b-instruct,
thedrummer/cydonia-24b-v4.1,
thedrummer/rocinante-12b,
thedrummer/skyfall-36b-v2,
thedrummer/unslopnemo-12b,
tngtech/deepseek-r1t-chimera,
tngtech/deepseek-r1t2-chimera,
tngtech/tng-r1t-chimera,
undi95/remm-slerp-l2-13b,
upstage/solar-pro-3,
venice/uncensored,
writer/palmyra-x5,
xai/grok-2-vision,
xai/grok-3,
xai/grok-3-mini,
xai/grok-4,
xai/grok-4-fast-non-reasoning,
xai/grok-4-fast-reasoning,
xai/grok-4.1-fast-non-reasoning,
xai/grok-4.1-fast-reasoning,
xai/grok-code-fast-1,
xiaomi/mimo-v2-flash,
zai/glm-4-32b,
zai/glm-4.5,
zai/glm-4.5-air,
zai/glm-4.5-airx,
zai/glm-4.5-flash,
zai/glm-4.5-x,
zai/glm-4.5v,
zai/glm-4.6,
zai/glm-4.6v,
zai/glm-4.6v-flash,
zai/glm-4.6v-flashx,
zai/glm-4.7,
zai/glm-4.7-flash,
zai/glm-4.7-flashx,
zai/glm-5
Example:

null

stream
boolean
default:false

Whether to stream the response using Server-Sent Events

Example:

true

stream_options
object

Options for streaming responses

Response

Successful chat completion response. Returns JSON for non-streaming or SSE for streaming.

choices
object[]
required

List of generated responses

created
integer
required

Unix timestamp of when the response was created

Example:

1705312200

id
string
required

Unique identifier for the chat completion

Example:

"chatcmpl-abc123"

model
string
required

The mode used for this completion

Example:

"auto"

object
enum<string>
required
Available options:
chat.completion
usage
object
required