# [FILE] validate_semantics.py # [PURPOSE] This script provides a CLI tool to validate a given Kotlin source file against the Semantic Enrichment Protocol. # [SEMANTICS] validation, cli, code_quality, python import re import sys import logging from pathlib import Path # Configure logging logging.basicConfig(level=logging.INFO, format='%(message)s') # [ANCHOR:SEMANTIC_TAXONOMY:Constant] # [PURPOSE] Defines the allowed keywords for the [SEMANTICS] header, mirroring semantic_enrichment_protocol.md. # Taxonomy from semantic_enrichment_protocol.md SEMANTIC_TAXONOMY = { "Layer": ["ui", "domain", "data", "presentation"], "Component": [ "viewmodel", "usecase", "repository", "service", "screen", "component", "dialog", "model", "entity", "activity", "application", "nav_host", "controller", "navigation_drawer", "scaffold", "dashboard", "item", "label", "location", "setup", "theme", "dependencies", "custom_field", "statistics", "image", "attachment", "item_creation", "item_detailed", "item_summary", "item_update", "summary", "update" ], "Concern": [ "networking", "database", "caching", "authentication", "validation", "parsing", "state_management", "navigation", "di", "testing", "entrypoint", "hilt", "timber", "compose", "actions", "routes", "common", "color_selection", "loading", "list", "details", "edit", "label_management", "labels_list", "dialog_management", "locations", "sealed_state", "parallel_data_loading", "timber_logging", "dialog", "color", "typography", "build", "data_transfer_object", "dto", "api", "item_creation", "item_detailed", "item_summary", "item_update", "create", "mapper", "count", "user_setup", "authentication_flow" ], "LanguageConstruct": ["sealed_class", "sealed_interface"], "Pattern": ["ui_logic", "ui_state", "data_model", "immutable"] } # [END_ANCHOR:SEMANTIC_TAXONOMY] # [ANCHOR:ENTITY_TYPES:Constant] # [PURPOSE] Defines the allowed entity types for [ANCHOR:id:type] definitions. ENTITY_TYPES = [ "Module", "Class", "Interface", "Object", "DataClass", "SealedInterface", "EnumClass", "Function", "UseCase", "ViewModel", "Repository", "DataStructure", "DatabaseTable", "ApiEndpoint" ] # [END_ANCHOR:ENTITY_TYPES] # [ANCHOR:SemanticValidator:Class] # [PURPOSE] Encapsulates the logic for validating a single file against all semantic rules. class SemanticValidator: # [ANCHOR:SemanticValidator.__init__:Method] # [CONTRACT:SemanticValidator.__init__] # [PURPOSE] Initializes the validator with the file path and reads its content. # [PARAM:file_path:str] The path to the file to be validated. # [POST] self.file_path is a Path object. # [POST] self.lines contains the file content as a list of strings. # [END_CONTRACT:SemanticValidator.__init__] def __init__(self, file_path): self.file_path = Path(file_path) self.lines = self.file_path.read_text().splitlines() self.errors = [] self.filename = self.file_path.name logging.info("[INFO][SemanticValidator.__init__][STATE] Initialized for file '%s'.", self.filename) # [END_ANCHOR:SemanticValidator.__init__] # [ANCHOR:SemanticValidator.validate:Method] # [CONTRACT:SemanticValidator.validate] # [PURPOSE] Runs all individual validation checks and returns a list of errors. # [RETURN:list] A list of formatted error strings. Empty if validation is successful. # [END_CONTRACT:SemanticValidator.validate] def validate(self): logging.info("[INFO][SemanticValidator.validate][START] Starting validation for %s", self.filename) self.check_file_header() self.check_semantic_taxonomy() self.check_anchors() self.check_file_termination() self.check_no_stray_comments() self.check_contracts_and_implementation() self.check_ai_friendly_logging() if not self.errors: logging.info("[INFO][SemanticValidator.validate][SUCCESS] Validation passed.") else: logging.info("[INFO][SemanticValidator.validate][FAILURE] Validation failed with %d errors.", len(self.errors)) return self.errors # [END_ANCHOR:SemanticValidator.validate] # [ANCHOR:SemanticValidator.add_error:Method] # [CONTRACT:SemanticValidator.add_error] # [PURPOSE] A helper method to format and append a new error to the errors list. # [PARAM:line_num:int] The line number where the error occurred. # [PARAM:message:str] The error message. # [POST] A new error string is appended to self.errors. # [END_CONTRACT:SemanticValidator.add_error] def add_error(self, line_num, message): self.errors.append(f"L{line_num}: {message}") # [END_ANCHOR:SemanticValidator.add_error] # [ANCHOR:SemanticValidator.check_file_header:Method] # [CONTRACT:SemanticValidator.check_file_header] # [PURPOSE] Validates Rule 1: FileHeaderIntegrity. # [POST] Errors are added to self.errors if the header is incorrect. # [END_CONTRACT:SemanticValidator.check_file_header] def check_file_header(self): if not self.lines[0].startswith(f"// [FILE] {self.filename}"): self.add_error(1, f"FileHeaderIntegrity: File must start with '// [FILE] {self.filename}'.") if not self.lines[1].startswith("// [SEMANTICS]"): self.add_error(2, "FileHeaderIntegrity: Second line must start with '// [SEMANTICS]'.") # [END_ANCHOR:SemanticValidator.check_file_header] # [ANCHOR:SemanticValidator.check_semantic_taxonomy:Method] # [CONTRACT:SemanticValidator.check_semantic_taxonomy] # [PURPOSE] Validates Rule 2: SemanticKeywordTaxonomy. # [POST] Errors are added to self.errors if invalid keywords are found. # [END_CONTRACT:SemanticValidator.check_semantic_taxonomy] def check_semantic_taxonomy(self): if len(self.lines) > 1 and self.lines[1].startswith("// [SEMANTICS]"): semantics_str = self.lines[1].replace("// [SEMANTICS]", "").strip() if not semantics_str: self.add_error(2, "SemanticKeywordTaxonomy: [SEMANTICS] anchor cannot be empty.") return keywords = [k.strip() for k in semantics_str.split(',')] all_valid_keywords = set(sum(SEMANTIC_TAXONOMY.values(), [])) for keyword in keywords: if keyword not in all_valid_keywords: self.add_error(2, f"SemanticKeywordTaxonomy: Invalid keyword '{keyword}'.") # [END_ANCHOR:SemanticValidator.check_semantic_taxonomy] # [ANCHOR:SemanticValidator.check_anchors:Method] # [CONTRACT:SemanticValidator.check_anchors] # [PURPOSE] Validates Rule 3: Anchors. Checks for pairing and valid types. # [POST] Errors are added for mismatched or invalid anchors. # [END_CONTRACT:SemanticValidator.check_anchors] def check_anchors(self): anchor_pattern = re.compile(r"// \[ANCHOR:(\w+):(\w+)\]") end_anchor_pattern = re.compile(r"// \[END_ANCHOR:(\w+)\]") open_anchors = {} for i, line in enumerate(self.lines, 1): # Check entity type in ANCHOR match = anchor_pattern.match(line) if match: anchor_id, anchor_type = match.groups() if anchor_type not in ENTITY_TYPES: self.add_error(i, f"Anchor Error: Invalid entity type '{anchor_type}' for anchor '{anchor_id}'.") if anchor_id in open_anchors: self.add_error(i, f"Anchor Error: Duplicate anchor ID '{anchor_id}' found.") else: open_anchors[anchor_id] = i # Check for matching END_ANCHOR end_match = end_anchor_pattern.match(line) if end_match: anchor_id = end_match.group(1) if anchor_id not in open_anchors: self.add_error(i, f"Anchor Error: Found closing anchor '// [END_ANCHOR:{anchor_id}]' without a matching opening anchor.") else: del open_anchors[anchor_id] for anchor_id, line_num in open_anchors.items(): self.add_error(line_num, f"Anchor Error: Opening anchor '// [ANCHOR:{anchor_id}:...]' at line {line_num} has no matching closing anchor.") # [END_ANCHOR:SemanticValidator.check_anchors] # [ANCHOR:SemanticValidator.check_file_termination:Method] # [CONTRACT:SemanticValidator.check_file_termination] # [PURPOSE] Validates Rule 5: FileTermination. # [POST] An error is added if the file does not have the correct termination anchor. # [END_CONTRACT:SemanticValidator.check_file_termination] def check_file_termination(self): if not self.lines[-1].strip() == f"// [END_FILE_{self.filename}]": self.add_error(len(self.lines), f"FileTermination: File must end with '// [END_FILE_{self.filename}]'.") # [END_ANCHOR:SemanticValidator.check_file_termination] # [ANCHOR:SemanticValidator.check_no_stray_comments:Method] # [CONTRACT:SemanticValidator.check_no_stray_comments] # [PURPOSE] Validates Rule 6: NoStrayComments. # [POST] Errors are added for any non-structured comments. # [END_CONTRACT:SemanticValidator.check_no_stray_comments] def check_no_stray_comments(self): for i, line in enumerate(self.lines, 1): stripped_line = line.strip() if stripped_line.startswith('//') and not ( stripped_line.startswith('// [') or stripped_line.startswith('// [END_') or re.match(r"//\s*\[(AI_NOTE|CONTRACT|PURPOSE|PRE|POST|PARAM|RETURN|TEST|THROW|RELATION)]", stripped_line) ): self.add_error(i, "NoStrayComments: Stray comment found. Only structured comments are allowed.") # [END_ANCHOR:SemanticValidator.check_no_stray_comments] # [ANCHOR:SemanticValidator.check_contracts_and_implementation:Method] # [CONTRACT:SemanticValidator.check_contracts_and_implementation] # [PURPOSE] Validates Principle B: DesignByContract. Ensures PRE/POST conditions are implemented. # [POST] Errors are added if contract implementations are missing. # [END_CONTRACT:SemanticValidator.check_contracts_and_implementation] def check_contracts_and_implementation(self): # This is a simplified check. A full implementation would require a proper parser. # It finds contract blocks and checks for corresponding require/check calls in the function body. contract_pattern = re.compile(r"// \[CONTRACT:(\w+)\]") end_contract_pattern = re.compile(r"// \[END_CONTRACT:(\w+)\]") pre_pattern = re.compile(r'// \[PRE\](.*)') post_pattern = re.compile(r'// \[POST\](.*)') fun_pattern = re.compile(r"fun\s+\w+\(.*\)\s*\{") in_contract = False contract_id = None pre_conditions = [] for i, line in enumerate(self.lines, 1): if contract_pattern.search(line): in_contract = True contract_id = contract_pattern.search(line).group(1) pre_conditions = [] if in_contract: pre_match = pre_pattern.search(line) if pre_match: # simplistic extraction of the condition text condition_text = pre_match.group(1).strip().replace('"', '') pre_conditions.append(condition_text) if end_contract_pattern.search(line) and in_contract: in_contract = False # Now, find the function body and check for require() calls body_found = False for j in range(i, len(self.lines)): if fun_pattern.search(self.lines[j]): body_found = True # Look for require statements in the function body body_end = self.find_scope_end(j) function_body_text = " ".join(self.lines[j:body_end]) for pre in pre_conditions: # This check is basic. It just looks for the presence of the text. # A robust solution needs code parsing. if f'require{{' in function_body_text or f'require(' in function_body_text: if pre not in function_body_text: self.add_error(j + 1, f"DesignByContract: Missing `require` implementation for PRE condition: '{pre}' in contract '{contract_id}'.") else: self.add_error(j + 1, f"DesignByContract: No `require` calls found for contract '{contract_id}' with PRE conditions.") break # Stop searching for function after finding the first one if not body_found: self.add_error(i, f"DesignByContract: Could not find function/method body for contract '{contract_id}'.") def find_scope_end(self, start_line_idx): """Finds the line index of the closing brace for a scope starting at start_line_idx.""" open_braces = 0 for i in range(start_line_idx, len(self.lines)): line = self.lines[i] open_braces += line.count('{') open_braces -= line.count('}') if open_braces == 0: return i return len(self.lines) -1 # fallback # [END_ANCHOR:SemanticValidator.check_contracts_and_implementation] # [ANCHOR:SemanticValidator.check_ai_friendly_logging:Method] # [CONTRACT:SemanticValidator.check_ai_friendly_logging] # [PURPOSE] Validates Principle A: AIFriendlyLogging. # [POST] Errors are added for logs that use string interpolation or have an invalid format. # [END_CONTRACT:SemanticValidator.check_ai_friendly_logging] def check_ai_friendly_logging(self): logging_pattern = re.compile(r"Timber\.\w+\((.*)\)") for i, line in enumerate(self.lines, 1): match = logging_pattern.search(line) if match: log_content = match.group(1) # 1. Check for string interpolation if '$' in log_content.split(',')[0]: self.add_error(i, "AIFriendlyLogging: String interpolation with '$' is forbidden in log messages. Pass data as arguments.") # 2. Check for structured message format (basic check) log_message = log_content.split(',')[0].strip().replace('"', '') if not (log_message.startswith('[') and log_message.endswith(']')): if not (re.search(r"\[\w+\]\[\w+\]", log_message)): self.add_error(i, f"AIFriendlyLogging: Log message '{log_message}' does not appear to follow the structured format '[LEVEL][ANCHOR]...'.") # [END_ANCHOR:SemanticValidator.check_ai_friendly_logging] # [END_ANCHOR:SemanticValidator] # [ANCHOR:main_execution:Block] # [CONTRACT:main_execution] # [PURPOSE] Main execution block. Parses CLI arguments and runs the validator. # [PRE] The script must be run with exactly one argument: the file path. # [POST] Prints validation results to stdout. # [POST] Exits with code 1 on validation failure or incorrect usage. # [POST] Exits with code 0 on validation success. # [END_CONTRACT:main_execution] if __name__ == "__main__": if len(sys.argv) != 2: logging.error("[ERROR][main_execution][FATAL] Incorrect number of arguments provided.") print("Usage: python validate_semantics.py ") sys.exit(1) file_to_validate = sys.argv[1] validator = SemanticValidator(file_to_validate) errors = validator.validate() if errors: print(f"Semantic validation failed for {file_to_validate}:") for error in errors: print(f"- {error}") sys.exit(1) else: print(f"Semantic validation passed for {file_to_validate}.") # [END_ANCHOR:main_execution] # [END_FILE_validate_semantics.py]