> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cekura.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Building High-Fidelity Metrics

> Learn how to build and optimize custom metrics for AI agents through an iterative process of definition, testing, feedback, and optimization

export const CopyPageButton = () => {
  if (typeof window !== 'undefined') {
    setTimeout(function () {
      if (document.getElementById('ck-tools')) return;
      var anchor = document.getElementById('content-area') || document.querySelector('.mdx-content');
      if (!anchor) return;
      if (!document.getElementById('ck-style')) {
        var s = document.createElement('style');
        s.id = 'ck-style';
        s.textContent = '#ck-tools{position:absolute;top:6px;right:0;z-index:100;font-family:inherit;}' + '.ck-row{display:inline-flex;align-items:stretch;border:1px solid rgba(0,0,0,0.15);border-radius:8px;overflow:hidden;background:#fff;}' + ':root.dark .ck-row{background:rgba(255,255,255,0.06);border-color:rgba(255,255,255,0.12);}' + '.ck-btn{padding:5px 12px;border:none;background:none;cursor:pointer;font-size:13px;font-weight:500;font-family:inherit;color:#374151;}' + ':root.dark .ck-btn{color:#d1d5db;}' + '.ck-btn:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-btn:hover{background:rgba(255,255,255,0.06);}' + '.ck-chevron{padding:5px 8px;border:none;background:none;cursor:pointer;font-size:14px;font-family:inherit;color:#374151;}' + ':root.dark .ck-chevron{color:#d1d5db;}' + '.ck-chevron:hover{background:rgba(0,0,0,0.04);}' + ':root.dark .ck-chevron:hover{background:rgba(255,255,255,0.06);}' + '.ck-divider{width:1px;background:rgba(0,0,0,0.12);flex-shrink:0;}' + ':root.dark .ck-divider{background:rgba(255,255,255,0.12);}' + '.ck-dd{position:absolute;top:calc(100% + 4px);right:0;min-width:180px;background:#fff;border:1px solid rgba(0,0,0,0.12);border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,0.1);padding:4px;display:none;z-index:200;}' + ':root.dark .ck-dd{background:#1f2937;border-color:rgba(255,255,255,0.1);box-shadow:0 4px 16px rgba(0,0,0,0.35);}' + '.ck-item{display:block;width:100%;padding:7px 12px;border:none;background:none;border-radius:6px;cursor:pointer;font-size:13px;font-family:inherit;text-align:left;color:#374151;}' + ':root.dark .ck-item{color:#d1d5db;}' + '.ck-item:hover{background:rgba(0,0,0,0.05);}' + ':root.dark .ck-item:hover{background:rgba(255,255,255,0.07);}';
        document.head.appendChild(s);
      }
      var wrap = document.createElement('div');
      wrap.id = 'ck-tools';
      var row = document.createElement('div');
      row.className = 'ck-row';
      var mainBtn = document.createElement('button');
      mainBtn.className = 'ck-btn';
      mainBtn.textContent = 'Copy page';
      var divider = document.createElement('span');
      divider.className = 'ck-divider';
      var chevron = document.createElement('button');
      chevron.className = 'ck-chevron';
      chevron.textContent = '▾';
      var dd = document.createElement('div');
      dd.className = 'ck-dd';
      function closeDD() {
        dd.style.display = 'none';
      }
      function openDD() {
        dd.style.display = 'block';
      }
      chevron.onclick = function (e) {
        e.stopPropagation();
        if (dd.style.display === 'block') {
          closeDD();
        } else {
          openDD();
        }
      };
      document.addEventListener('click', function (e) {
        if (!e.target.closest('#ck-tools')) {
          closeDD();
        }
      });
      document.addEventListener('keydown', function (e) {
        if (e.key === 'Escape') {
          closeDD();
        }
      });
      function makeItem(label, fn) {
        var b = document.createElement('button');
        b.className = 'ck-item';
        b.textContent = label;
        b.onclick = function () {
          fn();
          closeDD();
        };
        return b;
      }
      function getMarkdown() {
        var walk = function (node) {
          if (!node) return '';
          if (node.nodeType === 3) return node.textContent || '';
          if (node.nodeType !== 1) return '';
          var tag = node.tagName.toLowerCase();
          var skip = ['script', 'style', 'svg', 'noscript', 'button', 'iframe'];
          if (skip.indexOf(tag) !== -1) return '';
          if (node.id === 'ck-tools') return '';
          var ch = Array.from(node.childNodes).map(walk).join('');
          if (tag === 'h1') return '\n# ' + ch.trim() + '\n\n';
          if (tag === 'h2') return '\n## ' + ch.trim() + '\n\n';
          if (tag === 'h3') return '\n### ' + ch.trim() + '\n\n';
          if (tag === 'p') return '\n' + ch.trim() + '\n\n';
          if (tag === 'pre') return '\n```\n' + node.textContent.trim() + '\n```\n\n';
          if (tag === 'li') return '- ' + ch.trim() + '\n';
          if (tag === 'code') return '`' + ch.trim() + '`';
          return ch;
        };
        var content = document.querySelector('.mdx-content') || document.getElementById('content-area') || document.body;
        return walk(content).replace(/\n\n\n+/g, '\n\n').trim();
      }
      function copyMd() {
        var md = getMarkdown();
        navigator.clipboard.writeText(md).then(function () {
          mainBtn.textContent = 'Copied!';
          setTimeout(function () {
            mainBtn.textContent = 'Copy page';
          }, 2000);
        });
      }
      function viewMd() {
        var md = getMarkdown();
        var safe = md.split('&').join('&amp;').split('<').join('&lt;').split('>').join('&gt;');
        var html = '<!DOCTYPE html><html><head><meta charset="utf-8"><style>body{font-family:monospace;max-width:860px;margin:40px auto;padding:0 24px;line-height:1.7;white-space:pre-wrap;word-wrap:break-word}</style></head><body>' + safe + '</body></html>';
        window.open(URL.createObjectURL(new Blob([html], {
          type: 'text/html'
        })), '_blank');
      }
      function openClaude() {
        var prompt = 'Can you read this Cekura docs page ' + window.location.href + ' so I can ask you questions?';
        window.open('https://claude.ai/new?q=' + encodeURIComponent(prompt), '_blank');
      }
      mainBtn.onclick = copyMd;
      dd.appendChild(makeItem('Copy page', copyMd));
      dd.appendChild(makeItem('View as Markdown', viewMd));
      dd.appendChild(makeItem('Open in Claude', openClaude));
      row.appendChild(mainBtn);
      row.appendChild(divider);
      row.appendChild(chevron);
      wrap.appendChild(row);
      wrap.appendChild(dd);
      anchor.style.position = 'relative';
      anchor.insertBefore(wrap, anchor.firstChild);
    }, 50);
  }
  return null;
};

<CopyPageButton />

<Note>
  **Scope of this guide.** This guide covers building and iterating on individual metric definitions — naming, typing, setting triggers, and optimising via the feedback loop. For configuring project-level success criteria — pass/fail thresholds, AND/OR logic across multiple metrics, and which metrics determine overall evaluation success — see [Rubric Configuration](/documentation/key-concepts/metrics/rubric).
</Note>

Creating effective metrics for AI agents requires more than just defining a rule; it requires an iterative process of definition, testing, feedback, and optimization. This guide outlines the workflow to create custom metrics that accurately track specific behaviors (e.g., instruction following, tool call hallucination or proper call termination).

<iframe src="https://www.tella.tv/video/building-good-metrics-on-cekura-0taa/embed" allowfullscreen width="100%" height="360" frameborder="0" />

## Prerequisites

Before building, clarify exactly what you are tracking. Understand the terminology used during monitoring:

* **Main Agent:** Your AI agent (the one being tested).
* **Testing Agent:** The simulated user interacting with your agent.

## Step 1: Metric Definition

Navigate to the **Metrics** section and select **Create Metric**.

1. **Name & Type:** Give your metric a descriptive name (e.g., `Correct End Call by Main Agent`). Select the **Metric Type** (usually **Boolean** for pass/fail checks).
2. **Success Impact:** Toggle **Affects Call Success** to `True` if this metric is critical (i.e., if this fails, the entire call is considered a failure).
3. **Description (The Prompt):** Write a natural language description of what constitutes success.

<Note>
  The toggle above marks this individual metric as critical. To configure project-level success criteria — defining pass/fail thresholds, combining multiple metrics with AND/OR logic, and controlling which metrics determine overall evaluation success — see [Rubric Configuration](/documentation/key-concepts/metrics/rubric).
</Note>

<Tip>
  Use context variables to make the metric dynamic. For example, use `metadata['instructions']` to reference specific scenario steps the agent was supposed to follow.

  You will see a list of context variables in the dashboard when creating a metric.
</Tip>

**Example Description:**

```
Check if the Main Agent ended the call only after all steps in
metadata['instructions'] were completed by the Testing Agent.
```

## Step 2: Set Triggers

Define when the metric should run under the **Evaluation Trigger** section.

* **Always:** Runs on every call (default).
* **Custom:** Use logic to run metrics only in specific scenarios (e.g., `return True` only if the agent is attempting to book an appointment).

## Step 3: Initial Validation (Test Metric)

Before saving, validate your logic immediately within the builder.

<Steps>
  <Step title="Click Test Metric">
    Navigate to the test section within the metric builder.
  </Step>

  <Step title="Select Call IDs">
    Select a few past **Call IDs** from the list to test against.
  </Step>

  <Step title="Run the Test">
    Run the test to see if the metric passes/fails as expected on historical data.
  </Step>

  <Step title="Create Metric">
    If satisfied with the results, click **Create Metric** to save.
  </Step>
</Steps>

## Step 4: The Feedback Loop (Observability)

This is the most critical step for accuracy. You must "teach" the metric by providing ground-truth data.

<Steps>
  <Step title="Navigate to Observability">
    Go to the **Observability** tab in your dashboard.
  </Step>

  <Step title="Run Metric on Calls">
    Select a batch of calls and run your new metric on them with the help of **Re-evaluate Metrics** button in top right.
  </Step>

  <Step title="Review Results">
    Look for false positives or false negatives in the metric results.
  </Step>

  <Step title="Provide Feedback">
    For calls where the metric verdict was incorrect:

    1. Click on the call
    2. Click on 👎🏻 next to the metric of concern
    3. **Write an Explanation:** In the feedback box, detail *why* the metric was wrong

    **Example:**

    ```
    The Main Agent correctly ended the call because the Testing Agent
    refused to proceed, which is a valid termination case.
    ```

    4. Click **Add to Lab**
  </Step>
</Steps>

<Tip>
  With Slack integration, you can submit feedback directly from Slack alerts. When a metric fails, click the 👎 button next to **Go to call** to open a feedback modal. Explain why the metric evaluation was incorrect, and it will be added to Metric Optimizer for refining your metric.
</Tip>

<Note>
  **Best Practice:** Repeat this process for at least **6 calls** to create a robust dataset for optimization.
</Note>

## Step 5: Optimization (Labs)

Once you have annotated data (feedback), use the **Labs** feature to auto-optimize the metric.

<Steps>
  <Step title="Navigate to Labs">
    Navigate to **Labs** and select your metric.
  </Step>

  <Step title="Review Current Performance">
    You will see your annotated examples and the current "Overall Score" against your human feedback.
  </Step>

  <Step title="Auto Improve">
    Click **Auto Improve**.

    The system will use your feedback and explanations to rewrite the metric's internal logic/prompt to handle the edge cases you identified.
  </Step>

  <Step title="Verify & Save">
    1. Review the **View Changes** (once optimization is complete) screen to see the old vs. new logic
    2. Check the new score (e.g., improving from 0/6 to 6/6)
    3. Click **Save** to push the optimized metric to production
  </Step>
</Steps>

## Summary of Workflow

The complete workflow for building high-fidelity metrics follows this iterative process:

1. **Draft:** Create a basic description and logic.
2. **Test:** Run on historical calls.
3. **Annotate:** Correct mistakes manually and explain the *why*.
4. **Optimize:** Use "Auto Improve" to let the system refine the prompt based on your annotations.

By following this iterative approach, you can create metrics that accurately evaluate your AI agent's performance and continuously improve their accuracy over time.

## Next Steps

* Learn about [custom metrics](/documentation/key-concepts/metrics/custom-metrics)
* Explore [predefined metrics](/documentation/key-concepts/metrics/pre-defined-metrics)
* Set up [instruction following metric](/documentation/key-concepts/metrics/instruction-following-metric)
* Use [Metric Lab](/documentation/guides/metric-lab) to optimize your metrics