Nexevo Features
Asynchronous concurrent batch processing (10x throughput)
Use AsyncNexevo + asyncio.gather to process batch requests, the throughput is 10x+ higher than serial, suitable for offline tasks such as data annotation/classification.
Python
python
import asyncio
from nexevo_ai import AsyncNexevo, NexevoError
PROMPTS = [f"Classify this evaluation sentiment (positive/negative): {text}"
for text in load_dataset()] # Your data loading
async def classify(client, prompt: str, idx: int):
try:
resp = await client.chat.completions.create(
model="qwen-turbo", # The cheap model is enough, $0.0625/M
messages=[{"role": "user", "content": prompt}],
max_tokens=10,
temperature=0,
)
return idx, resp["choices"][0]["message"]["content"].strip()
except NexevoError as e:
if e.is_retryable:
await asyncio.sleep(e.retry_after or 1)
return await classify(client, prompt, idx)
return idx, f"ERROR: {e}"
async def main():
async with AsyncNexevo() as client:
# 50 concurrent users, use semaphore to control not exceeding your rate limit
sem = asyncio.Semaphore(50)
async def run(p, i):
async with sem:
return await classify(client, p, i)
results = await asyncio.gather(*[run(p, i) for i, p in enumerate(PROMPTS)])
print(f"Complete {len(results)} items")
asyncio.run(main())