Nexevo Features

Asynchronous concurrent batch processing (10x throughput)

Use AsyncNexevo + asyncio.gather to process batch requests, the throughput is 10x+ higher than serial, suitable for offline tasks such as data annotation/classification.

Python

python

import asyncio
from nexevo_ai import AsyncNexevo, NexevoError

PROMPTS = [f"Classify this evaluation sentiment (positive/negative): {text}"
           for text in load_dataset()] # Your data loading

async def classify(client, prompt: str, idx: int):
    try:
        resp = await client.chat.completions.create(
            model="qwen-turbo", # The cheap model is enough, $0.0625/M
            messages=[{"role": "user", "content": prompt}],
            max_tokens=10,
            temperature=0,
        )
        return idx, resp["choices"][0]["message"]["content"].strip()
    except NexevoError as e:
        if e.is_retryable:
            await asyncio.sleep(e.retry_after or 1)
            return await classify(client, prompt, idx)
        return idx, f"ERROR: {e}"

async def main():
    async with AsyncNexevo() as client:
        # 50 concurrent users, use semaphore to control not exceeding your rate limit
        sem = asyncio.Semaphore(50)
        async def run(p, i):
            async with sem:
                return await classify(client, p, i)
        results = await asyncio.gather(*[run(p, i) for i, p in enumerate(PROMPTS)])
    print(f"Complete {len(results)} items")

asyncio.run(main())

Get a free API key →Go to Playground and try it →