Source code for ai.analysis.dataset_usage_analyzer

import dataclasses
import logging
from typing import List

from ai.analysis.money.currency import Currency
from ai.analysis.run.abstract_analyzer import AbstractAnalyzer
from ai.analysis.run.analysis_result import AnalysisResult
from ai.analysis.run.assistant_run import AssistantRun
from ai.analysis.run.assistant_runs import AssistantRunsAnalyzer
from ai.assistant.assistant import Assistant



[docs]
@dataclasses.dataclass
class FormattedAnalysis:
    """Holds a formatted cost analysis entry for presentation.

    Attributes:
        assistant (str): Name of the assistant.
        model (str): Model identifier used.
        prompt_cost (str): Formatted cost of prompt tokens.
        completions_cost (str): Formatted cost of completion tokens.
        total_cost (str): Formatted total cost.
        share (str): Percentage share of total cost.
    """
    assistant: str
    model: str
    prompt_cost: str
    completions_cost: str
    total_cost: str
    share: str




[docs]
def transform_field_names(cls) -> List[str]:
    """Generate human-readable column names from a dataclass.

    Replaces underscores with spaces and capitalizes each field name.

    Args:
        cls (Type): Dataclass whose fields will be transformed.

    Returns:
        List[str]: List of formatted field names.
    """
    return [
        field.name.replace("_", " ").capitalize()
        for field in dataclasses.fields(cls)
    ]




[docs]
class DatasetUsageAnalyzer(AbstractAnalyzer):
    """Aggregates and summarizes cost analyses across multiple assistant runs.

    Attributes:
        _currency (Currency): Target currency for formatting costs.
        _logger (logging.Logger): Logger for warnings and debug messages.
        _runs_by_assistant (dict[Assistant, list[AssistantRun]]): Mapping of assistants to their runs.
    """


[docs]
    def __init__(self, currency: Currency):
        """Initialize the analyzer with a currency.

        Args:
            currency (Currency): Currency in which to report costs.
        """
        self._currency = currency
        self._logger = logging.getLogger(__name__)
        self._runs_by_assistant: dict[Assistant, list[AssistantRun]] = {}



[docs]
    def get_cost_analysis(self) -> AnalysisResult:
        """Compute the combined AnalysisResult for all recorded runs.

        Returns:
            AnalysisResult: Aggregated cost analysis across all assistants.
        """
        return AssistantRunsAnalyzer(self._all_runs).get_cost_analysis()



[docs]
    def add_run(self, assistant_run: AssistantRun) -> None:
        """Record a new AssistantRun under its assistant.

        Args:
            assistant_run (AssistantRun): The run to add.
        """
        self._runs_by_assistant.setdefault(
            assistant_run.assistant, []
        ).append(assistant_run)



[docs]
    def generate_cost_summary(self) -> List[FormattedAnalysis]:
        """Generate a summary table of costs per assistant, plus a total row.

        Returns:
            List[FormattedAnalysis]: Formatted cost entries for each assistant and the grand total.
        """
        total_cost = self.get_cost_analysis().total_cost
        sorted_analysis = self._get_sorted_converted_analyses()

        cost_table = [
            FormattedAnalysis(
                assistant=analysis_result.assistant.name.value,
                model=str(analysis_result.model),
                prompt_cost=analysis_result.prompts_cost.in_formatted(4),
                completions_cost=analysis_result.completions_cost.in_formatted(4),
                total_cost=analysis_result.total_cost.in_formatted(4),
                share=f"{round(analysis_result.get_share(total_cost) * 100)}%",
            )
            for analysis_result in sorted_analysis
        ]

        cost_table.append(
            FormattedAnalysis(
                assistant="Total",
                model="",
                prompt_cost="",
                completions_cost="",
                total_cost=total_cost.convert_to(self._currency).in_formatted(),
                share="100%",
            )
        )

        return cost_table


    def _get_sorted_converted_analyses(self) -> List[AnalysisResult]:
        """Convert and sort each assistant's AnalysisResult by total cost descending.

        Returns:
            List[AnalysisResult]: Sorted list of converted analysis results.
        """
        analyses = [
            self._create_assistant_analysis(assistant)
            for assistant in self._assistants
        ]
        converted = [
            analysis_result.convert_to(self._currency)
            for analysis_result in analyses
        ]
        return sorted(converted, key=lambda x: x.total_cost, reverse=True)

    @property
    def _all_runs(self) -> List[AssistantRun]:
        """List of all AssistantRun instances across all assistants."""
        return sum(self._runs_by_assistant.values(), [])

    @property
    def _assistants(self):
        """Iterable of Assistant keys for which runs have been recorded."""
        return self._runs_by_assistant.keys()

    def _create_assistant_analysis(self, assistant: Assistant) -> AnalysisResult:
        """Compute aggregated AnalysisResult for a single assistant.

        Args:
            assistant (Assistant): The assistant whose runs to aggregate.

        Returns:
            AnalysisResult: Aggregated cost analysis for the given assistant.
        """
        runs = self._runs_by_assistant[assistant]
        return AssistantRunsAnalyzer(runs).get_cost_analysis()