homebox_lens/validate_semantics.py

# [FILE] validate_semantics.py
# [PURPOSE] This script provides a CLI tool to validate a given Kotlin source file against the Semantic Enrichment Protocol.
# [SEMANTICS] validation, cli, code_quality, python

import re
import sys
import logging
from pathlib import Path

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(message)s')

# [ANCHOR:SEMANTIC_TAXONOMY:Constant]
# [PURPOSE] Defines the allowed keywords for the [SEMANTICS] header, mirroring semantic_enrichment_protocol.md.
# Taxonomy from semantic_enrichment_protocol.md
SEMANTIC_TAXONOMY = {
    "Layer": ["ui", "domain", "data", "presentation"],
    "Component": [
        "viewmodel", "usecase", "repository", "service", "screen", "component", "dialog", "model",
        "entity", "activity", "application", "nav_host", "controller", "navigation_drawer",
        "scaffold", "dashboard", "item", "label", "location", "setup", "theme", "dependencies",
        "custom_field", "statistics", "image", "attachment", "item_creation", "item_detailed",
        "item_summary", "item_update", "summary", "update"
    ],
    "Concern": [
        "networking", "database", "caching", "authentication", "validation", "parsing",
        "state_management", "navigation", "di", "testing", "entrypoint", "hilt", "timber",
        "compose", "actions", "routes", "common", "color_selection", "loading", "list",
        "details", "edit", "label_management", "labels_list", "dialog_management",
        "locations", "sealed_state", "parallel_data_loading", "timber_logging", "dialog",
        "color", "typography", "build", "data_transfer_object", "dto", "api", "item_creation",
        "item_detailed", "item_summary", "item_update", "create", "mapper", "count",
        "user_setup", "authentication_flow"
    ],
    "LanguageConstruct": ["sealed_class", "sealed_interface"],
    "Pattern": ["ui_logic", "ui_state", "data_model", "immutable"]
}
# [END_ANCHOR:SEMANTIC_TAXONOMY]

# [ANCHOR:ENTITY_TYPES:Constant]
# [PURPOSE] Defines the allowed entity types for [ANCHOR:id:type] definitions.
ENTITY_TYPES = [
    "Module", "Class", "Interface", "Object", "DataClass", "SealedInterface",
    "EnumClass", "Function", "UseCase", "ViewModel", "Repository", "DataStructure",
    "DatabaseTable", "ApiEndpoint"
]
# [END_ANCHOR:ENTITY_TYPES]

# [ANCHOR:SemanticValidator:Class]
# [PURPOSE] Encapsulates the logic for validating a single file against all semantic rules.
class SemanticValidator:
    # [ANCHOR:SemanticValidator.__init__:Method]
    # [CONTRACT:SemanticValidator.__init__]
    # [PURPOSE] Initializes the validator with the file path and reads its content.
    # [PARAM:file_path:str] The path to the file to be validated.
    # [POST] self.file_path is a Path object.
    # [POST] self.lines contains the file content as a list of strings.
    # [END_CONTRACT:SemanticValidator.__init__]
    def __init__(self, file_path):
        self.file_path = Path(file_path)
        self.lines = self.file_path.read_text().splitlines()
        self.errors = []
        self.filename = self.file_path.name
        logging.info("[INFO][SemanticValidator.__init__][STATE] Initialized for file '%s'.", self.filename)
    # [END_ANCHOR:SemanticValidator.__init__]

    # [ANCHOR:SemanticValidator.validate:Method]
    # [CONTRACT:SemanticValidator.validate]
    # [PURPOSE] Runs all individual validation checks and returns a list of errors.
    # [RETURN:list] A list of formatted error strings. Empty if validation is successful.
    # [END_CONTRACT:SemanticValidator.validate]
    def validate(self):
        logging.info("[INFO][SemanticValidator.validate][START] Starting validation for %s", self.filename)
        self.check_file_header()
        self.check_semantic_taxonomy()
        self.check_anchors()
        self.check_file_termination()
        self.check_no_stray_comments()
        self.check_contracts_and_implementation()
        self.check_ai_friendly_logging()
        if not self.errors:
            logging.info("[INFO][SemanticValidator.validate][SUCCESS] Validation passed.")
        else:
            logging.info("[INFO][SemanticValidator.validate][FAILURE] Validation failed with %d errors.", len(self.errors))
        return self.errors
    # [END_ANCHOR:SemanticValidator.validate]

    # [ANCHOR:SemanticValidator.add_error:Method]
    # [CONTRACT:SemanticValidator.add_error]
    # [PURPOSE] A helper method to format and append a new error to the errors list.
    # [PARAM:line_num:int] The line number where the error occurred.
    # [PARAM:message:str] The error message.
    # [POST] A new error string is appended to self.errors.
    # [END_CONTRACT:SemanticValidator.add_error]
    def add_error(self, line_num, message):
        self.errors.append(f"L{line_num}: {message}")
    # [END_ANCHOR:SemanticValidator.add_error]

    # [ANCHOR:SemanticValidator.check_file_header:Method]
    # [CONTRACT:SemanticValidator.check_file_header]
    # [PURPOSE] Validates Rule 1: FileHeaderIntegrity.
    # [POST] Errors are added to self.errors if the header is incorrect.
    # [END_CONTRACT:SemanticValidator.check_file_header]
    def check_file_header(self):
        if not self.lines[0].startswith(f"// [FILE] {self.filename}"):
            self.add_error(1, f"FileHeaderIntegrity: File must start with '// [FILE] {self.filename}'.")
        if not self.lines[1].startswith("// [SEMANTICS]"):
            self.add_error(2, "FileHeaderIntegrity: Second line must start with '// [SEMANTICS]'.")
    # [END_ANCHOR:SemanticValidator.check_file_header]

    # [ANCHOR:SemanticValidator.check_semantic_taxonomy:Method]
    # [CONTRACT:SemanticValidator.check_semantic_taxonomy]
    # [PURPOSE] Validates Rule 2: SemanticKeywordTaxonomy.
    # [POST] Errors are added to self.errors if invalid keywords are found.
    # [END_CONTRACT:SemanticValidator.check_semantic_taxonomy]
    def check_semantic_taxonomy(self):
        if len(self.lines) > 1 and self.lines[1].startswith("// [SEMANTICS]"):
            semantics_str = self.lines[1].replace("// [SEMANTICS]", "").strip()
            if not semantics_str:
                self.add_error(2, "SemanticKeywordTaxonomy: [SEMANTICS] anchor cannot be empty.")
                return

            keywords = [k.strip() for k in semantics_str.split(',')]
            all_valid_keywords = set(sum(SEMANTIC_TAXONOMY.values(), []))

            for keyword in keywords:
                if keyword not in all_valid_keywords:
                    self.add_error(2, f"SemanticKeywordTaxonomy: Invalid keyword '{keyword}'.")
    # [END_ANCHOR:SemanticValidator.check_semantic_taxonomy]

    # [ANCHOR:SemanticValidator.check_anchors:Method]
    # [CONTRACT:SemanticValidator.check_anchors]
    # [PURPOSE] Validates Rule 3: Anchors. Checks for pairing and valid types.
    # [POST] Errors are added for mismatched or invalid anchors.
    # [END_CONTRACT:SemanticValidator.check_anchors]
    def check_anchors(self):
        anchor_pattern = re.compile(r"// \[ANCHOR:(\w+):(\w+)\]")
        end_anchor_pattern = re.compile(r"// \[END_ANCHOR:(\w+)\]")
        open_anchors = {}

        for i, line in enumerate(self.lines, 1):
            # Check entity type in ANCHOR
            match = anchor_pattern.match(line)
            if match:
                anchor_id, anchor_type = match.groups()
                if anchor_type not in ENTITY_TYPES:
                    self.add_error(i, f"Anchor Error: Invalid entity type '{anchor_type}' for anchor '{anchor_id}'.")
                if anchor_id in open_anchors:
                    self.add_error(i, f"Anchor Error: Duplicate anchor ID '{anchor_id}' found.")
                else:
                    open_anchors[anchor_id] = i

            # Check for matching END_ANCHOR
            end_match = end_anchor_pattern.match(line)
            if end_match:
                anchor_id = end_match.group(1)
                if anchor_id not in open_anchors:
                    self.add_error(i, f"Anchor Error: Found closing anchor '// [END_ANCHOR:{anchor_id}]' without a matching opening anchor.")
                else:
                    del open_anchors[anchor_id]

        for anchor_id, line_num in open_anchors.items():
            self.add_error(line_num, f"Anchor Error: Opening anchor '// [ANCHOR:{anchor_id}:...]' at line {line_num} has no matching closing anchor.")
    # [END_ANCHOR:SemanticValidator.check_anchors]

    # [ANCHOR:SemanticValidator.check_file_termination:Method]
    # [CONTRACT:SemanticValidator.check_file_termination]
    # [PURPOSE] Validates Rule 5: FileTermination.
    # [POST] An error is added if the file does not have the correct termination anchor.
    # [END_CONTRACT:SemanticValidator.check_file_termination]
    def check_file_termination(self):
        if not self.lines[-1].strip() == f"// [END_FILE_{self.filename}]":
            self.add_error(len(self.lines), f"FileTermination: File must end with '// [END_FILE_{self.filename}]'.")
    # [END_ANCHOR:SemanticValidator.check_file_termination]

    # [ANCHOR:SemanticValidator.check_no_stray_comments:Method]
    # [CONTRACT:SemanticValidator.check_no_stray_comments]
    # [PURPOSE] Validates Rule 6: NoStrayComments.
    # [POST] Errors are added for any non-structured comments.
    # [END_CONTRACT:SemanticValidator.check_no_stray_comments]
    def check_no_stray_comments(self):
        for i, line in enumerate(self.lines, 1):
            stripped_line = line.strip()
            if stripped_line.startswith('//') and not (
                stripped_line.startswith('// [') or
                stripped_line.startswith('// [END_') or
                re.match(r"//\s*\[(AI_NOTE|CONTRACT|PURPOSE|PRE|POST|PARAM|RETURN|TEST|THROW|RELATION)]", stripped_line)
            ):
                 self.add_error(i, "NoStrayComments: Stray comment found. Only structured comments are allowed.")
    # [END_ANCHOR:SemanticValidator.check_no_stray_comments]

    # [ANCHOR:SemanticValidator.check_contracts_and_implementation:Method]
    # [CONTRACT:SemanticValidator.check_contracts_and_implementation]
    # [PURPOSE] Validates Principle B: DesignByContract. Ensures PRE/POST conditions are implemented.
    # [POST] Errors are added if contract implementations are missing.
    # [END_CONTRACT:SemanticValidator.check_contracts_and_implementation]
    def check_contracts_and_implementation(self):
        # This is a simplified check. A full implementation would require a proper parser.
        # It finds contract blocks and checks for corresponding require/check calls in the function body.
        contract_pattern = re.compile(r"// \[CONTRACT:(\w+)\]")
        end_contract_pattern = re.compile(r"// \[END_CONTRACT:(\w+)\]")
        pre_pattern = re.compile(r'// \[PRE\](.*)')
        post_pattern = re.compile(r'// \[POST\](.*)')
        fun_pattern = re.compile(r"fun\s+\w+\(.*\)\s*\{")

        in_contract = False
        contract_id = None
        pre_conditions = []

        for i, line in enumerate(self.lines, 1):
            if contract_pattern.search(line):
                in_contract = True
                contract_id = contract_pattern.search(line).group(1)
                pre_conditions = []

            if in_contract:
                pre_match = pre_pattern.search(line)
                if pre_match:
                    # simplistic extraction of the condition text
                    condition_text = pre_match.group(1).strip().replace('"', '')
                    pre_conditions.append(condition_text)

            if end_contract_pattern.search(line) and in_contract:
                in_contract = False
                # Now, find the function body and check for require() calls
                body_found = False
                for j in range(i, len(self.lines)):
                    if fun_pattern.search(self.lines[j]):
                        body_found = True
                        # Look for require statements in the function body
                        body_end = self.find_scope_end(j)
                        function_body_text = " ".join(self.lines[j:body_end])

                        for pre in pre_conditions:
                            # This check is basic. It just looks for the presence of the text.
                            # A robust solution needs code parsing.
                            if f'require{{' in function_body_text or f'require(' in function_body_text:
                                 if pre not in function_body_text:
                                    self.add_error(j + 1, f"DesignByContract: Missing `require` implementation for PRE condition: '{pre}' in contract '{contract_id}'.")
                            else:
                                self.add_error(j + 1, f"DesignByContract: No `require` calls found for contract '{contract_id}' with PRE conditions.")
                        break # Stop searching for function after finding the first one
                if not body_found:
                    self.add_error(i, f"DesignByContract: Could not find function/method body for contract '{contract_id}'.")

    def find_scope_end(self, start_line_idx):
        """Finds the line index of the closing brace for a scope starting at start_line_idx."""
        open_braces = 0
        for i in range(start_line_idx, len(self.lines)):
            line = self.lines[i]
            open_braces += line.count('{')
            open_braces -= line.count('}')
            if open_braces == 0:
                return i
        return len(self.lines) -1 # fallback
    # [END_ANCHOR:SemanticValidator.check_contracts_and_implementation]

    # [ANCHOR:SemanticValidator.check_ai_friendly_logging:Method]
    # [CONTRACT:SemanticValidator.check_ai_friendly_logging]
    # [PURPOSE] Validates Principle A: AIFriendlyLogging.
    # [POST] Errors are added for logs that use string interpolation or have an invalid format.
    # [END_CONTRACT:SemanticValidator.check_ai_friendly_logging]
    def check_ai_friendly_logging(self):
        logging_pattern = re.compile(r"Timber\.\w+\((.*)\)")
        for i, line in enumerate(self.lines, 1):
            match = logging_pattern.search(line)
            if match:
                log_content = match.group(1)
                # 1. Check for string interpolation
                if '$' in log_content.split(',')[0]:
                    self.add_error(i, "AIFriendlyLogging: String interpolation with '$' is forbidden in log messages. Pass data as arguments.")

                # 2. Check for structured message format (basic check)
                log_message = log_content.split(',')[0].strip().replace('"', '')
                if not (log_message.startswith('[') and log_message.endswith(']')):
                    if not (re.search(r"\[\w+\]\[\w+\]", log_message)):
                         self.add_error(i, f"AIFriendlyLogging: Log message '{log_message}' does not appear to follow the structured format '[LEVEL][ANCHOR]...'.")
    # [END_ANCHOR:SemanticValidator.check_ai_friendly_logging]
# [END_ANCHOR:SemanticValidator]


# [ANCHOR:main_execution:Block]
# [CONTRACT:main_execution]
# [PURPOSE] Main execution block. Parses CLI arguments and runs the validator.
# [PRE] The script must be run with exactly one argument: the file path.
# [POST] Prints validation results to stdout.
# [POST] Exits with code 1 on validation failure or incorrect usage.
# [POST] Exits with code 0 on validation success.
# [END_CONTRACT:main_execution]
if __name__ == "__main__":
    if len(sys.argv) != 2:
        logging.error("[ERROR][main_execution][FATAL] Incorrect number of arguments provided.")
        print("Usage: python validate_semantics.py <file_path>")
        sys.exit(1)

    file_to_validate = sys.argv[1]
    validator = SemanticValidator(file_to_validate)
    errors = validator.validate()

    if errors:
        print(f"Semantic validation failed for {file_to_validate}:")
        for error in errors:
            print(f"- {error}")
        sys.exit(1)
    else:
        print(f"Semantic validation passed for {file_to_validate}.")
# [END_ANCHOR:main_execution]

# [END_FILE_validate_semantics.py]