Source code for rdfsolve.schema_models.report

"""Mining analytics report models."""

from __future__ import annotations

from typing import Any

from pydantic import BaseModel, ConfigDict, Field


[docs] class QueryStats(BaseModel): """Cumulative statistics for one query category.""" sent: int = Field(0, ge=0, description="Queries sent") failed: int = Field( 0, ge=0, description="Queries that failed", ) total_time_s: float = Field( 0.0, ge=0, description="Wall-clock seconds for this category", ) model_config = ConfigDict(extra="forbid")
[docs] class OneShotQueryResult(BaseModel): """Outcome of a single unbounded SELECT against a SPARQL endpoint. Used to record the raw performance of an unguarded one-shot query so it can be compared against the fallback-chain result. """ query_type: str = Field( ..., description=("Pattern type queried: 'typed-object', 'literal', or 'untyped-uri'"), ) success: bool = Field( ..., description="True if the endpoint returned a result set", ) duration_s: float | None = Field( None, ge=0, description="Wall-clock seconds for the single HTTP call", ) row_count: int | None = Field( None, ge=0, description="Number of result rows returned", ) error: str | None = Field( None, description="Exception message if the query failed", ) model_config = ConfigDict(extra="forbid")
[docs] class PhaseReport(BaseModel): """Timing and outcome for one mining phase.""" name: str = Field(..., description="Phase identifier") started_at: str | None = Field( None, description="ISO-8601 start time", ) finished_at: str | None = Field( None, description="ISO-8601 finish time", ) duration_s: float | None = Field( None, ge=0, description="Wall-clock seconds", ) items_discovered: int = Field( 0, ge=0, description="Number of items produced by this phase", ) error: str | None = Field( None, description="Error message if the phase failed", ) model_config = ConfigDict(extra="forbid")
[docs] class MiningReport(BaseModel): """Analytical metadata collected during a mining run. Designed to be written to disk incrementally (after each phase completes) so that partial data is preserved even if mining crashes midway. """ # Identification dataset_name: str | None = Field( None, description="Human-readable name of the mined dataset", ) endpoint_url: str = Field( ..., description="SPARQL endpoint URL", ) graph_uris: list[str] | None = Field( None, description="Named-graph URIs (if any)", ) strategy: str = Field( "unknown", description=("Mining strategy: 'miner' or 'miner/two-phase'"), ) # Versions & environment rdfsolve_version: str = Field( ..., description="Package version string", ) python_version: str = Field( ..., description="Python interpreter version", ) qlever_version: dict[str, str] | None = Field( None, description=( "QLever build info fetched from the endpoint's " '?cmd=stats: {"git_hash_server": str, ' '"git_hash_index": str}' ), ) # Timing started_at: str = Field( ..., description="ISO-8601 timestamp when mining started", ) finished_at: str | None = Field( None, description="ISO-8601 timestamp when mining finished", ) total_duration_s: float | None = Field( None, ge=0, description="Total wall-clock seconds", ) # Query statistics query_stats: dict[str, QueryStats] = Field( default_factory=dict, description="Per-purpose query statistics.", ) total_queries_sent: int = Field(0, ge=0) total_queries_failed: int = Field(0, ge=0) # Phase breakdown phases: list[PhaseReport] = Field(default_factory=list) # Results summary abort_reason: str | None = Field(None) pattern_count: int = Field(0, ge=0) class_count: int = Field(0, ge=0) property_count: int = Field(0, ge=0) unique_uris_labelled: int = Field(0, ge=0) # Configuration snapshot config: dict[str, Any] = Field(default_factory=dict) # Benchmark / resource usage machine: dict[str, Any] | None = Field(None) benchmark: dict[str, Any] | None = Field(None) # One-shot baseline one_shot_results: list[OneShotQueryResult] | None = Field( None, ) # Author provenance authors: list[dict[str, str]] | None = Field(None) # Captured endpoint metadata dataset_metadata: dict[str, Any] | None = Field(None) # Canonical URI report_uri: str | None = Field(None) model_config = ConfigDict(extra="allow")