Trung cấpHướng dẫnClaude APINguồn: Anthropic

Usage & Cost API — Theo dõi chi phí Claude API real-time

Minh TuấnCTO, Transform GroupTheo dõi

26/03/2026 556 0 7 phút đọc

Nghe bài viết

00:00

1 Công cụ AI sẽ thay đổi cách bạn làm việc: Anthropic cung cấp Usage API tại https://api.anthropic.com/v1/usage cho phép query: Token usage theo ngày, tuần, tháng. Điểm mấu chốt là biết cách đặt prompt đúng để nhận kết quả có thể sử dụng ngay.
2 Góc nhìn thực tế: Pricing tham khảo USD per million tokens — check Anthropic.com cho giá mới nhất MODELPRICING = { "claude-opus-4-5": {. Điều quan trọng là hiểu rõ khi nào nên và không nên áp dụng phương pháp này.
3 Không thể bỏ qua: Ngoài Usage API, track từng request ngay trong code để có granular data: import anthropic import sqlite3 from datetime. Đây là kiến thức nền tảng mà mọi người làm việc với AI đều cần hiểu rõ.
4 Bước đầu tiên bạn nên làm: import smtplib from email.mime.text import MIMEText class BudgetMonitor: def initself, monthlybudgetusd: float,. Áp dụng đúng cách sẽ thấy kết quả rõ rệt từ tuần đầu tiên.
5 Thành thật mà nói: def printcostreporttrackedclient: TrackedClaudeClient, days: int = 7: """In báo cáo chi phí ra terminal""" stats =. Phương pháp này hiệu quả trong hầu hết trường hợp, nhưng bạn cần điều chỉnh cho phù hợp ngữ cảnh riêng.

Khi API usage tăng lên, câu hỏi không còn là "API hoạt động không?" mà là "tôi đang tiêu bao nhiêu và tiêu vào đâu?" Claude Usage API cung cấp granular data về token consumption và cost — đủ để build cost dashboard chuyên nghiệp và thiết lập alerts trước khi bill surprise cuối tháng.

Anthropic Usage API Overview

Anthropic cung cấp Usage API tại https://api.anthropic.com/v1/usage cho phép query:

Token usage theo ngày, tuần, tháng
Breakdown theo model (haiku vs sonnet vs opus)
Input vs output vs cache tokens riêng biệt
Cost estimates dựa trên current pricing

import anthropic
import httpx
import json
from datetime import datetime, timedelta

class UsageTracker:
    def __init__(self, api_key: str = None):
        import os
        self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
        self.base_url = "https://api.anthropic.com/v1"
        self.headers = {
            "x-api-key": self.api_key,
            "anthropic-version": "2023-06-01",
            "content-type": "application/json"
        }

    def get_usage(self, start_date: str, end_date: str = None) -> dict:
        """
        start_date, end_date: format YYYY-MM-DD
        Returns usage data broken down by model and date
        """
        end_date = end_date or datetime.now().strftime("%Y-%m-%d")

        response = httpx.get(
            f"{self.base_url}/usage",
            headers=self.headers,
            params={
                "start_time": f"{start_date}T00:00:00Z",
                "end_time": f"{end_date}T23:59:59Z",
                "granularity": "daily"  # daily | hourly
            }
        )
        response.raise_for_status()
        return response.json()

    def get_current_month_usage(self) -> dict:
        """Lấy usage tháng hiện tại"""
        today = datetime.now()
        first_of_month = today.replace(day=1).strftime("%Y-%m-%d")
        return self.get_usage(first_of_month)

    def get_last_30_days(self) -> dict:
        """Lấy usage 30 ngày gần nhất"""
        end = datetime.now()
        start = end - timedelta(days=30)
        return self.get_usage(
            start.strftime("%Y-%m-%d"),
            end.strftime("%Y-%m-%d")
        )

Tính toán Chi phí

# Pricing tham khảo (USD per million tokens) — check Anthropic.com cho giá mới nhất
MODEL_PRICING = {
    "claude-opus-4-5": {
        "input": 15.0,
        "output": 75.0,
        "cache_write": 18.75,
        "cache_read": 1.5
    },
    "claude-sonnet-4-5": {
        "input": 3.0,
        "output": 15.0,
        "cache_write": 3.75,
        "cache_read": 0.3
    },
    "claude-haiku-4-5": {
        "input": 0.8,
        "output": 4.0,
        "cache_write": 1.0,
        "cache_read": 0.08
    }
}

def calculate_cost(usage_data: dict) -> dict:
    """Tính cost từ usage data"""
    total_cost = 0
    model_costs = {}

    for entry in usage_data.get("data", []):
        model = entry.get("model", "unknown")
        pricing = MODEL_PRICING.get(model, {"input": 0, "output": 0, "cache_write": 0, "cache_read": 0})

        input_tokens = entry.get("input_tokens", 0)
        output_tokens = entry.get("output_tokens", 0)
        cache_write = entry.get("cache_creation_input_tokens", 0)
        cache_read = entry.get("cache_read_input_tokens", 0)

        cost = (
            (input_tokens / 1_000_000) * pricing["input"] +
            (output_tokens / 1_000_000) * pricing["output"] +
            (cache_write / 1_000_000) * pricing["cache_write"] +
            (cache_read / 1_000_000) * pricing["cache_read"]
        )

        if model not in model_costs:
            model_costs[model] = {
                "input_tokens": 0, "output_tokens": 0,
                "cache_write": 0, "cache_read": 0, "cost_usd": 0
            }

        model_costs[model]["input_tokens"] += input_tokens
        model_costs[model]["output_tokens"] += output_tokens
        model_costs[model]["cache_write"] += cache_write
        model_costs[model]["cache_read"] += cache_read
        model_costs[model]["cost_usd"] += cost
        total_cost += cost

    return {
        "total_cost_usd": round(total_cost, 4),
        "by_model": {k: {**v, "cost_usd": round(v["cost_usd"], 4)} for k, v in model_costs.items()}
    }

Track Usage Per-Request

Ngoài Usage API, track từng request ngay trong code để có granular data:

import anthropic
import sqlite3
from datetime import datetime

# Setup SQLite database
def setup_usage_db(db_path: str = "claude_usage.db"):
    conn = sqlite3.connect(db_path)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS api_calls (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp TEXT,
            model TEXT,
            feature TEXT,
            user_id TEXT,
            input_tokens INTEGER,
            output_tokens INTEGER,
            cache_write_tokens INTEGER,
            cache_read_tokens INTEGER,
            cost_usd REAL,
            latency_ms INTEGER,
            success INTEGER
        )
    """)
    conn.commit()
    return conn

class TrackedClaudeClient:
    """Wrapper quanh Anthropic client với automatic usage tracking"""

    def __init__(self, db_path: str = "claude_usage.db"):
        self.client = anthropic.Anthropic()
        self.db = setup_usage_db(db_path)

    def create(self, feature: str = "unknown", user_id: str = "anonymous", **kwargs) -> anthropic.types.Message:
        """
        Drop-in replacement cho client.messages.create()
        Tự động track usage sau mỗi call
        """
        start_time = datetime.now()
        success = True

        try:
            response = self.client.messages.create(**kwargs)

            # Extract usage
            usage = response.usage
            model = kwargs.get("model", "unknown")
            pricing = MODEL_PRICING.get(model, {"input": 0, "output": 0, "cache_write": 0, "cache_read": 0})

            input_tokens = usage.input_tokens
            output_tokens = usage.output_tokens
            cache_write = getattr(usage, 'cache_creation_input_tokens', 0) or 0
            cache_read = getattr(usage, 'cache_read_input_tokens', 0) or 0

            cost = (
                (input_tokens / 1_000_000) * pricing["input"] +
                (output_tokens / 1_000_000) * pricing["output"] +
                (cache_write / 1_000_000) * pricing["cache_write"] +
                (cache_read / 1_000_000) * pricing["cache_read"]
            )

            latency_ms = int((datetime.now() - start_time).total_seconds() * 1000)

            self._log_usage(
                model=model,
                feature=feature,
                user_id=user_id,
                input_tokens=input_tokens,
                output_tokens=output_tokens,
                cache_write=cache_write,
                cache_read=cache_read,
                cost_usd=cost,
                latency_ms=latency_ms,
                success=1
            )

            return response

        except Exception as e:
            latency_ms = int((datetime.now() - start_time).total_seconds() * 1000)
            self._log_usage(
                model=kwargs.get("model", "unknown"),
                feature=feature,
                user_id=user_id,
                input_tokens=0, output_tokens=0,
                cache_write=0, cache_read=0,
                cost_usd=0, latency_ms=latency_ms, success=0
            )
            raise

    def _log_usage(self, **kwargs):
        self.db.execute("""
            INSERT INTO api_calls
            (timestamp, model, feature, user_id, input_tokens, output_tokens,
             cache_write_tokens, cache_read_tokens, cost_usd, latency_ms, success)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            datetime.now().isoformat(),
            kwargs["model"], kwargs["feature"], kwargs["user_id"],
            kwargs["input_tokens"], kwargs["output_tokens"],
            kwargs["cache_write"], kwargs["cache_read"],
            kwargs["cost_usd"], kwargs["latency_ms"], kwargs["success"]
        ))
        self.db.commit()

    def get_stats(self, days: int = 7) -> dict:
        """Query usage statistics từ local DB"""
        since = (datetime.now() - timedelta(days=days)).isoformat()
        cursor = self.db.execute("""
            SELECT
                model,
                feature,
                COUNT(*) as calls,
                SUM(input_tokens) as total_input,
                SUM(output_tokens) as total_output,
                SUM(cost_usd) as total_cost,
                AVG(latency_ms) as avg_latency,
                SUM(CASE WHEN success=0 THEN 1 ELSE 0 END) as errors
            FROM api_calls
            WHERE timestamp > ?
            GROUP BY model, feature
            ORDER BY total_cost DESC
        """, (since,))
        return [dict(zip([col[0] for col in cursor.description], row)) for row in cursor.fetchall()]

Budget Alerts System

import smtplib
from email.mime.text import MIMEText

class BudgetMonitor:
    def __init__(self, monthly_budget_usd: float, alert_thresholds: list = None):
        self.monthly_budget = monthly_budget_usd
        self.thresholds = alert_thresholds or [0.5, 0.75, 0.9, 1.0]  # 50%, 75%, 90%, 100%
        self.tracker = UsageTracker()
        self.alerted_thresholds = set()

    def check_and_alert(self):
        """Kiểm tra spending và gửi alert nếu cần"""
        usage = self.tracker.get_current_month_usage()
        cost = calculate_cost(usage)
        current_spend = cost["total_cost_usd"]
        spend_ratio = current_spend / self.monthly_budget

        print(f"Current spend: ${current_spend:.2f} / ${self.monthly_budget:.2f} ({spend_ratio*100:.1f}%)")

        for threshold in self.thresholds:
            if spend_ratio >= threshold and threshold not in self.alerted_thresholds:
                self._send_alert(current_spend, threshold)
                self.alerted_thresholds.add(threshold)

        return {
            "current_spend": current_spend,
            "budget": self.monthly_budget,
            "remaining": self.monthly_budget - current_spend,
            "percentage_used": round(spend_ratio * 100, 1)
        }

    def _send_alert(self, current_spend: float, threshold: float):
        """Gửi email/Slack alert"""
        message = f"""
CLAUDE API BUDGET ALERT

Spending has reached {threshold*100:.0f}% of monthly budget.

Current: ${current_spend:.2f}
Budget: ${self.monthly_budget:.2f}
Remaining: ${self.monthly_budget - current_spend:.2f}

Please review API usage at: https://console.anthropic.com
"""
        print(f"ALERT: {message}")
        # Implement email/Slack/webhook notification here

    def get_burn_rate_projection(self) -> dict:
        """Dự báo chi phí cuối tháng dựa trên burn rate hiện tại"""
        today = datetime.now()
        days_elapsed = today.day
        days_in_month = 30  # Approximate

        usage = self.tracker.get_current_month_usage()
        current_spend = calculate_cost(usage)["total_cost_usd"]

        daily_rate = current_spend / days_elapsed if days_elapsed > 0 else 0
        projected_month_total = daily_rate * days_in_month
        days_until_budget_exhausted = (self.monthly_budget - current_spend) / daily_rate if daily_rate > 0 else float('inf')

        return {
            "daily_burn_rate": round(daily_rate, 4),
            "projected_month_total": round(projected_month_total, 2),
            "over_budget": projected_month_total > self.monthly_budget,
            "days_until_exhausted": round(days_until_budget_exhausted, 1) if days_until_budget_exhausted != float('inf') else None
        }

Cost Dashboard (Terminal)

def print_cost_report(tracked_client: TrackedClaudeClient, days: int = 7):
    """In báo cáo chi phí ra terminal"""
    stats = tracked_client.get_stats(days)

    print(f"
{'='*60}")
    print(f"CLAUDE API USAGE REPORT — Last {days} days")
    print(f"{'='*60}")

    total_cost = sum(s["total_cost"] for s in stats)
    total_calls = sum(s["calls"] for s in stats)
    total_errors = sum(s["errors"] for s in stats)

    print(f"Total Cost: ${total_cost:.4f}")
    print(f"Total API Calls: {total_calls:,}")
    print(f"Error Rate: {(total_errors/total_calls*100):.1f}%" if total_calls > 0 else "No calls")

    print(f"
{'Feature':<20} {'Model':<20} {'Calls':>8} {'Cost':>10} {'Avg Latency':>12}")
    print("-" * 72)

    for stat in stats[:20]:  # Top 20
        print(
            f"{stat['feature']:<20} "
            f"{stat['model']:<20} "
            f"{stat['calls']:>8,} "
            f"${stat['total_cost']:>9.4f} "
            f"{stat['avg_latency']:>10.0f}ms"
        )

    print(f"
Top cost driver: {stats[0]['feature'] if stats else 'N/A'}")

# Sử dụng
client = TrackedClaudeClient()

# Dùng như normal client
response = client.create(
    feature="blog_generation",
    user_id="user_123",
    model="claude-haiku-4-5",
    max_tokens=1000,
    messages=[{"role": "user", "content": "Write a short blog post about Vietnam tech scene"}]
)

print_cost_report(client, days=7)

Tối ưu chi phí dựa trên Usage Data

Sau khi có usage data, đây là những optimizations phổ biến nhất:

Model right-sizing — Nếu feature X dùng Opus nhưng chỉ cần summarization đơn giản, switch sang Haiku. Tiết kiệm 10-20x.
Prompt Caching — System prompts dài được gọi nhiều lần? Enable cache để giảm 90% input token cost.
Output length control — Add max_tokens phù hợp. Nhiều features không cần 4000 tokens.
Batch processing — Thay vì N individual calls, dùng Batch API (50% discount) cho non-urgent tasks.
Input compression — Summarize long documents trước khi gửi thay vì gửi toàn bộ.

Tổng kết

Cost visibility là bước đầu tiên để tối ưu API spending. Với Usage API + per-request tracking + budget alerts, bạn luôn biết tiền đang đi đâu và có thể action ngay khi spending tăng bất thường.

Xem thêm: Prompt Caching và Speculative Caching — hai kỹ thuật giảm cost hiệu quả nhất.