# [FILE] validate_semantics.py # [PURPOSE] This script provides a CLI tool to validate a given Kotlin source file against the Semantic Enrichment Protocol. # [SEMANTICS] validation, cli, code_quality, python import re import sys import logging import argparse from pathlib import Path # Configure logging logging.basicConfig(level=logging.INFO, format='%(message)s') # [ANCHOR:SEMANTIC_TAXONOMY:Constant] # [PURPOSE] Defines the allowed keywords for the [SEMANTICS] header, mirroring semantic_enrichment_protocol.md. # Taxonomy from semantic_enrichment_protocol.md SEMANTIC_TAXONOMY = { "Layer": ["ui", "domain", "data", "presentation"], "Component": [ "viewmodel", "usecase", "repository", "service", "screen", "component", "dialog", "model", "entity", "activity", "application", "nav_host", "controller", "navigation_drawer", "scaffold", "dashboard", "item", "label", "location", "setup", "theme", "dependencies", "custom_field", "statistics", "image", "attachment", "item_creation", "item_detailed", "item_summary", "item_update", "summary", "update", "feature_module" ], "Concern": [ "networking", "database", "caching", "authentication", "validation", "parsing", "state_management", "navigation", "di", "testing", "entrypoint", "hilt", "timber", "compose", "actions", "routes", "common", "color_selection", "loading", "list", "details", "edit", "label_management", "labels_list", "dialog_management", "locations", "sealed_state", "parallel_data_loading", "timber_logging", "dialog", "color", "typography", "build", "data_transfer_object", "dto", "api", "item_creation", "item_detailed", "item_summary", "item_update", "create", "mapper", "count", "user_setup", "authentication_flow" ], "LanguageConstruct": ["sealed_class", "sealed_interface"], "Pattern": ["ui_logic", "ui_state", "data_model", "immutable"] } # [END_ANCHOR:SEMANTIC_TAXONOMY] # [ANCHOR:ENTITY_TYPES:Constant] # [PURPOSE] Defines the allowed entity types for [ANCHOR:id:type] definitions. ENTITY_TYPES = [ "Module", "Class", "Interface", "Object", "DataClass", "SealedInterface", "EnumClass", "Function", "UseCase", "ViewModel", "Repository", "DataStructure", "DatabaseTable", "ApiEndpoint" ] # [END_ANCHOR:ENTITY_TYPES] # [ANCHOR:SemanticValidator:Class] # [PURPOSE] Encapsulates the logic for validating a single file against all semantic rules. class SemanticValidator: # [ANCHOR:SemanticValidator.__init__:Method] # [CONTRACT:SemanticValidator.__init__] # [PURPOSE] Initializes the validator with the file path and reads its content. # [PARAM:file_path:str] The path to the file to be validated. # [POST] self.file_path is a Path object. # [POST] self.lines contains the file content as a list of strings. # [END_CONTRACT:SemanticValidator.__init__] def __init__(self, file_path): self.file_path = Path(file_path) self.lines = self.file_path.read_text().splitlines() self.errors = [] self.filename = self.file_path.name logging.info("[INFO][SemanticValidator.__init__][STATE] Initialized for file '%s'.", self.filename) # [END_ANCHOR:SemanticValidator.__init__] # [ANCHOR:SemanticValidator.validate:Method] # [CONTRACT:SemanticValidator.validate] # [PURPOSE] Runs all individual validation checks and returns a list of errors. # [RETURN:list] A list of formatted error strings. Empty if validation is successful. # [END_CONTRACT:SemanticValidator.validate] def validate(self): logging.info("[INFO][SemanticValidator.validate][START] Starting validation for %s", self.filename) self.check_file_header() self.check_semantic_taxonomy() self.check_anchors() self.check_function_anchors() self.check_file_termination() self.check_no_stray_comments() self.check_contracts_and_implementation() self.check_relation_triplets() self.check_ai_friendly_logging() if not self.errors: logging.info("[INFO][SemanticValidator.validate][SUCCESS] Validation passed.") else: logging.info("[INFO][SemanticValidator.validate][FAILURE] Validation failed with %d errors.", len(self.errors)) return self.errors # [END_ANCHOR:SemanticValidator.validate] # [ANCHOR:SemanticValidator.add_error:Method] # [CONTRACT:SemanticValidator.add_error] # [PURPOSE] A helper method to format and append a new error to the errors list. # [PARAM:line_num:int] The line number where the error occurred. # [PARAM:message:str] The error message. # [POST] A new error string is appended to self.errors. # [END_CONTRACT:SemanticValidator.add_error] def add_error(self, line_num, message): self.errors.append(f"L{line_num}: {message}") # [END_ANCHOR:SemanticValidator.add_error] # [ANCHOR:SemanticValidator.check_file_header:Method] # [CONTRACT:SemanticValidator.check_file_header] # [PURPOSE] Validates Rule 1: FileHeaderIntegrity. # [POST] Errors are added to self.errors if the header is incorrect. # [END_CONTRACT:SemanticValidator.check_file_header] def check_file_header(self): if not self.lines[0].startswith(f"// [FILE] {self.file_path.as_posix()}"): self.add_error(1, f"FileHeaderIntegrity: File must start with '// [FILE] {self.file_path.as_posix()}'.") if not self.lines[1].startswith("// [SEMANTICS]"): self.add_error(2, "FileHeaderIntegrity: Second line must start with '// [SEMANTICS]'.") # [END_ANCHOR:SemanticValidator.check_file_header] # [ANCHOR:SemanticValidator.check_semantic_taxonomy:Method] # [CONTRACT:SemanticValidator.check_semantic_taxonomy] # [PURPOSE] Validates Rule 2: SemanticKeywordTaxonomy. # [POST] Errors are added to self.errors if invalid keywords are found. # [END_CONTRACT:SemanticValidator.check_semantic_taxonomy] def check_semantic_taxonomy(self): if len(self.lines) > 1 and self.lines[1].startswith("// [SEMANTICS]"): semantics_str = self.lines[1].replace("// [SEMANTICS]", "").strip() if not semantics_str: self.add_error(2, "SemanticKeywordTaxonomy: [SEMANTICS] anchor cannot be empty.") return keywords = [k.strip() for k in semantics_str.split(',')] all_valid_keywords = set(sum(SEMANTIC_TAXONOMY.values(), [])) for keyword in keywords: if keyword not in all_valid_keywords: self.add_error(2, f"SemanticKeywordTaxonomy: Invalid keyword '{keyword}'.") # [END_ANCHOR:SemanticValidator.check_semantic_taxonomy] # [ANCHOR:SemanticValidator.check_anchors:Method] # [CONTRACT:SemanticValidator.check_anchors] # [PURPOSE] Validates Rule 3: Anchors. Checks for pairing and valid types. # [POST] Errors are added for mismatched or invalid anchors. # [END_CONTRACT:SemanticValidator.check_anchors] def check_anchors(self): anchor_pattern = re.compile(r"// \[ANCHOR:(\w+):(\w+)\]") end_anchor_pattern = re.compile(r"// \[END_ANCHOR:(\w+)\]") open_anchors = {} for i, line in enumerate(self.lines, 1): # Check entity type in ANCHOR match = anchor_pattern.match(line) if match: anchor_id, anchor_type = match.groups() if anchor_type not in ENTITY_TYPES: self.add_error(i, f"Anchor Error: Invalid entity type '{anchor_type}' for anchor '{anchor_id}'.") if anchor_id in open_anchors: self.add_error(i, f"Anchor Error: Duplicate anchor ID '{anchor_id}' found.") else: open_anchors[anchor_id] = i # Check for matching END_ANCHOR end_match = end_anchor_pattern.match(line) if end_match: anchor_id = end_match.group(1) if anchor_id not in open_anchors: self.add_error(i, f"Anchor Error: Found closing anchor '// [END_ANCHOR:{anchor_id}]' without a matching opening anchor.") else: del open_anchors[anchor_id] for anchor_id, line_num in open_anchors.items(): self.add_error(line_num, f"Anchor Error: Opening anchor '// [ANCHOR:{anchor_id}:...]' at line {line_num} has no matching closing anchor.") # [END_ANCHOR:SemanticValidator.check_anchors] # [ANCHOR:SemanticValidator.check_function_anchors:Method] # [CONTRACT:SemanticValidator.check_function_anchors] # [PURPOSE] Validates that all functions have a corresponding ANCHOR. # [POST] Errors are added if a function is found without an ANCHOR. # [END_CONTRACT:SemanticValidator.check_function_anchors] def check_function_anchors(self): fun_pattern = re.compile(r"fun\s+(.+?)\s*\(") anchor_pattern = re.compile(r"// \[ANCHOR:(\w+):Function\]") for i, line in enumerate(self.lines, 1): fun_match = fun_pattern.search(line) if fun_match: function_name = fun_match.group(1).strip() # Look for an ANCHOR comment in the preceding lines found_anchor = False for j in range(max(0, i-5), i): # Check up to 5 lines before the function anchor_match = anchor_pattern.search(self.lines[j]) if anchor_match and anchor_match.group(1) == function_name: found_anchor = True break if not found_anchor: self.add_error(i, f"Function Anchor Error: Function '{function_name}' is missing a corresponding '// [ANCHOR:{function_name}:Function]' comment.") # [END_ANCHOR:SemanticValidator.check_function_anchors] # [ANCHOR:SemanticValidator.check_file_termination:Method] # [CONTRACT:SemanticValidator.check_file_termination] # [PURPOSE] Validates Rule 5: FileTermination. # [POST] An error is added if the file does not have the correct termination anchor. # [END_CONTRACT:SemanticValidator.check_file_termination] def check_file_termination(self): if not self.lines[-1].strip() == f"// [END_FILE_{self.file_path.as_posix()}]": self.add_error(len(self.lines), f"FileTermination: File must end with '// [END_FILE_{self.file_path.as_posix()}]'.") # [END_ANCHOR:SemanticValidator.check_file_termination] # [ANCHOR:SemanticValidator.check_no_stray_comments:Method] # [CONTRACT:SemanticValidator.check_no_stray_comments] # [PURPOSE] Validates Rule 6: NoStrayComments. # [POST] Errors are added for any non-structured comments. # [END_CONTRACT:SemanticValidator.check_no_stray_comments] def check_no_stray_comments(self): for i, line in enumerate(self.lines, 1): stripped_line = line.strip() if stripped_line.startswith('//') and not ( stripped_line.startswith('// [') or stripped_line.startswith('// [END_') or re.match(r"//\s*\[(AI_NOTE|CONTRACT|PURPOSE|PRE|POST|PARAM|RETURN|TEST|THROW|RELATION)]", stripped_line) ): self.add_error(i, "NoStrayComments: Stray comment found. Only structured comments are allowed.") # [END_ANCHOR:SemanticValidator.check_no_stray_comments] # [ANCHOR:SemanticValidator.check_contracts_and_implementation:Method] # [CONTRACT:SemanticValidator.check_contracts_and_implementation] # [PURPOSE] Validates Principle B: DesignByContract. Ensures PRE/POST conditions are implemented. # [POST] Errors are added if contract implementations are missing. # [END_CONTRACT:SemanticValidator.check_contracts_and_implementation] def check_contracts_and_implementation(self): # This is a simplified check. A full implementation would require a proper parser. # It finds contract blocks and checks for corresponding require/check calls in the function body. contract_pattern = re.compile(r"// \[CONTRACT:(\w+)\]") end_contract_pattern = re.compile(r"// \[END_CONTRACT:(\w+)\]") pre_pattern = re.compile(r'// \[PRE\](.*)') post_pattern = re.compile(r'// \[POST\](.*)') fun_pattern = re.compile(r"fun\s+(.+?)\(.*\)\s*\{") in_contract = False contract_id = None pre_conditions = [] for i, line in enumerate(self.lines, 1): fun_match = fun_pattern.search(line) if fun_match: function_name = fun_match.group(1).strip() # Check for CONTRACT comment in the preceding lines found_contract = False for j in range(max(0, i-5), i): # Check up to 5 lines before the function contract_match = contract_pattern.search(self.lines[j]) if contract_match: contract_id_from_comment = contract_match.group(1) if contract_id_from_comment == function_name: # Check if contract ID matches function name found_contract = True break else: self.add_error(j, f"Function Contract Error: Contract ID '{contract_id_from_comment}' does not match function name '{function_name}'.") if not found_contract: self.add_error(i, f"Function Contract Error: Function '{function_name}' is missing a corresponding '// [CONTRACT:{function_name}]' comment.") if contract_pattern.search(line): in_contract = True contract_id = contract_pattern.search(line).group(1) pre_conditions = [] if in_contract: pre_match = pre_pattern.search(line) if pre_match: # simplistic extraction of the condition text condition_text = pre_match.group(1).strip().replace('"', '') pre_conditions.append(condition_text) if end_contract_pattern.search(line) and in_contract: in_contract = False # Now, find the function body and check for require() calls body_found = False for j in range(i, len(self.lines)): if fun_pattern.search(self.lines[j]): body_found = True # Look for require statements in the function body body_end = self.find_scope_end(j) function_body_text = " ".join(self.lines[j:body_end]) for pre in pre_conditions: # This check is basic. It just looks for the presence of the text. # A robust solution needs code parsing. if f'require{{' in function_body_text or f'require(' in function_body_text: if pre not in function_body_text: self.add_error(j + 1, f"DesignByContract: Missing `require` implementation for PRE condition: '{pre}' in contract '{contract_id}'.") else: self.add_error(j + 1, f"DesignByContract: No `require` calls found for contract '{contract_id}' with PRE conditions.") break # Stop searching for function after finding the first one if not body_found: self.add_error(i, f"DesignByContract: Could not find function/method body for contract '{contract_id}'.") def find_scope_end(self, start_line_idx): """Finds the line index of the closing brace for a scope starting at start_line_idx.""" open_braces = 0 for i in range(start_line_idx, len(self.lines)): line = self.lines[i] open_braces += line.count('{') open_braces -= line.count('}') if open_braces == 0: return i return len(self.lines) -1 # fallback # [END_ANCHOR:SemanticValidator.check_contracts_and_implementation] # [ANCHOR:SemanticValidator.check_function_contracts:Method] # [CONTRACT:SemanticValidator.check_function_contracts] # [PURPOSE] Validates that all functions have a corresponding CONTRACT. # [POST] Errors are added if a function is found without a CONTRACT. # [END_CONTRACT:SemanticValidator.check_function_contracts] def check_function_contracts(self): fun_pattern = re.compile(r"fun\s+(.+?)\(.*\)\s*\{") contract_pattern = re.compile(r"// \[CONTRACT:(\w+)\]") for i, line in enumerate(self.lines, 1): fun_match = fun_pattern.search(line) if fun_match: function_name = fun_match.group(1).strip() # Look for a CONTRACT comment in the preceding lines found_contract = False for j in range(max(0, i-5), i): # Check up to 5 lines before the function contract_match = contract_pattern.search(self.lines[j]) if contract_match and contract_match.group(1) == function_name: found_contract = True break if not found_contract: self.add_error(i, f"Function Contract Error: Function '{function_name}' is missing a corresponding '// [CONTRACT:{function_name}]' comment.") # [END_ANCHOR:SemanticValidator.check_function_contracts] # [ANCHOR:SemanticValidator.check_relation_triplets:Method] # [CONTRACT:SemanticValidator.check_relation_triplets] # [PURPOSE] Validates the syntax of relation triplets within ANCHOR blocks. # [POST] Errors are added for invalid relation triplet syntax or unknown predicates. # [END_CONTRACT:SemanticValidator.check_relation_triplets] def check_relation_triplets(self): relation_pattern = re.compile(r"// \[RELATION:(\w+):(\w+)\]") valid_predicates = ["CALLS", "CREATES_INSTANCE_OF", "INHERITS_FROM", "IMPLEMENTS", "READS_FROM", "WRITES_TO", "MODIFIES_STATE_OF", "DEPENDS_ON", "DISPATCHES_EVENT", "OBSERVES", "TRIGGERS", "EMITS_STATE", "CONSUMES_STATE"] for i, line in enumerate(self.lines, 1): match = relation_pattern.search(line) if match: predicate, object_id = match.groups() if predicate not in valid_predicates: self.add_error(i, f"Relation Triplet Error: Invalid predicate '{predicate}' in relation triplet.") # [END_ANCHOR:SemanticValidator.check_relation_triplets] # [ANCHOR:SemanticValidator.check_ai_friendly_logging:Method] # [CONTRACT:SemanticValidator.check_ai_friendly_logging] # [PURPOSE] Validates Principle A: AIFriendlyLogging. # [POST] Errors are added for logs that use string interpolation or have an invalid format. # [END_CONTRACT:SemanticValidator.check_ai_friendly_logging] def check_ai_friendly_logging(self): logging_pattern = re.compile(r"Timber\.\w+\((.*)\)") for i, line in enumerate(self.lines, 1): match = logging_pattern.search(line) if match: log_content = match.group(1) # 1. Check for string interpolation if '$' in log_content.split(',')[0]: self.add_error(i, "AIFriendlyLogging: String interpolation with '$' is forbidden in log messages. Pass data as arguments.") # 2. Check for structured message format (basic check) log_message = log_content.split(',')[0].strip().replace('"', '') if not (log_message.startswith('[') and log_message.endswith(']')): if not (re.search(r"\[\w+\]\[\w+\]", log_message)): self.add_error(i, f"AIFriendlyLogging: Log message '{log_message}' does not appear to follow the structured format '[LEVEL][ANCHOR]...'.") # [END_ANCHOR:SemanticValidator.check_ai_friendly_logging] # [END_ANCHOR:SemanticValidator] # [ANCHOR:main_execution:Block] # [CONTRACT:main_execution] # [PURPOSE] Main execution block. Parses CLI arguments and runs the validator. # [PRE] The script must be run with exactly one argument: the file path. # [POST] Prints validation results to stdout. # [POST] Exits with code 1 on validation failure or incorrect usage. # [POST] Exits with code 0 on validation success. # [END_CONTRACT:main_execution] if __name__ == "__main__": parser = argparse.ArgumentParser(description="Validate Kotlin source files against the Semantic Enrichment Protocol.") parser.add_argument("paths", nargs="+", help="One or more file paths or directory paths to validate.") args = parser.parse_args() all_files_to_validate = [] for path_str in args.paths: path = Path(path_str) if path.is_file() and path.suffix == ".kt": all_files_to_validate.append(path) elif path.is_dir(): for kt_file in path.rglob("*.kt"): all_files_to_validate.append(kt_file) else: logging.warning("[WARNING][main_execution][SKIP] Skipping invalid path: %s (not a .kt file or directory).", path_str) if not all_files_to_validate: logging.error("[ERROR][main_execution][FATAL] No .kt files found to validate.") sys.exit(1) overall_success = True for file_path in all_files_to_validate: logging.info("\n[INFO][main_execution][START] Validating file: %s", file_path) validator = SemanticValidator(file_path) errors = validator.validate() if errors: overall_success = False print(f"Semantic validation failed for {file_path}:") for error in errors: print(f"- {error}") else: print(f"Semantic validation passed for {file_path}.") if not overall_success: print("\n[ERROR][main_execution][SUMMARY] One or more files failed semantic validation.") sys.exit(1) else: print("\n[INFO][main_execution][SUMMARY] All specified .kt files passed semantic validation.") # [END_ANCHOR:main_execution] # [END_FILE_validate_semantics.py]