309 lines
16 KiB
Python
309 lines
16 KiB
Python
# [FILE] validate_semantics.py
|
|
# [PURPOSE] This script provides a CLI tool to validate a given Kotlin source file against the Semantic Enrichment Protocol.
|
|
# [SEMANTICS] validation, cli, code_quality, python
|
|
|
|
import re
|
|
import sys
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
|
|
|
# [ANCHOR:SEMANTIC_TAXONOMY:Constant]
|
|
# [PURPOSE] Defines the allowed keywords for the [SEMANTICS] header, mirroring semantic_enrichment_protocol.md.
|
|
# Taxonomy from semantic_enrichment_protocol.md
|
|
SEMANTIC_TAXONOMY = {
|
|
"Layer": ["ui", "domain", "data", "presentation"],
|
|
"Component": [
|
|
"viewmodel", "usecase", "repository", "service", "screen", "component", "dialog", "model",
|
|
"entity", "activity", "application", "nav_host", "controller", "navigation_drawer",
|
|
"scaffold", "dashboard", "item", "label", "location", "setup", "theme", "dependencies",
|
|
"custom_field", "statistics", "image", "attachment", "item_creation", "item_detailed",
|
|
"item_summary", "item_update", "summary", "update"
|
|
],
|
|
"Concern": [
|
|
"networking", "database", "caching", "authentication", "validation", "parsing",
|
|
"state_management", "navigation", "di", "testing", "entrypoint", "hilt", "timber",
|
|
"compose", "actions", "routes", "common", "color_selection", "loading", "list",
|
|
"details", "edit", "label_management", "labels_list", "dialog_management",
|
|
"locations", "sealed_state", "parallel_data_loading", "timber_logging", "dialog",
|
|
"color", "typography", "build", "data_transfer_object", "dto", "api", "item_creation",
|
|
"item_detailed", "item_summary", "item_update", "create", "mapper", "count",
|
|
"user_setup", "authentication_flow"
|
|
],
|
|
"LanguageConstruct": ["sealed_class", "sealed_interface"],
|
|
"Pattern": ["ui_logic", "ui_state", "data_model", "immutable"]
|
|
}
|
|
# [END_ANCHOR:SEMANTIC_TAXONOMY]
|
|
|
|
# [ANCHOR:ENTITY_TYPES:Constant]
|
|
# [PURPOSE] Defines the allowed entity types for [ANCHOR:id:type] definitions.
|
|
ENTITY_TYPES = [
|
|
"Module", "Class", "Interface", "Object", "DataClass", "SealedInterface",
|
|
"EnumClass", "Function", "UseCase", "ViewModel", "Repository", "DataStructure",
|
|
"DatabaseTable", "ApiEndpoint"
|
|
]
|
|
# [END_ANCHOR:ENTITY_TYPES]
|
|
|
|
# [ANCHOR:SemanticValidator:Class]
|
|
# [PURPOSE] Encapsulates the logic for validating a single file against all semantic rules.
|
|
class SemanticValidator:
|
|
# [ANCHOR:SemanticValidator.__init__:Method]
|
|
# [CONTRACT:SemanticValidator.__init__]
|
|
# [PURPOSE] Initializes the validator with the file path and reads its content.
|
|
# [PARAM:file_path:str] The path to the file to be validated.
|
|
# [POST] self.file_path is a Path object.
|
|
# [POST] self.lines contains the file content as a list of strings.
|
|
# [END_CONTRACT:SemanticValidator.__init__]
|
|
def __init__(self, file_path):
|
|
self.file_path = Path(file_path)
|
|
self.lines = self.file_path.read_text().splitlines()
|
|
self.errors = []
|
|
self.filename = self.file_path.name
|
|
logging.info("[INFO][SemanticValidator.__init__][STATE] Initialized for file '%s'.", self.filename)
|
|
# [END_ANCHOR:SemanticValidator.__init__]
|
|
|
|
# [ANCHOR:SemanticValidator.validate:Method]
|
|
# [CONTRACT:SemanticValidator.validate]
|
|
# [PURPOSE] Runs all individual validation checks and returns a list of errors.
|
|
# [RETURN:list] A list of formatted error strings. Empty if validation is successful.
|
|
# [END_CONTRACT:SemanticValidator.validate]
|
|
def validate(self):
|
|
logging.info("[INFO][SemanticValidator.validate][START] Starting validation for %s", self.filename)
|
|
self.check_file_header()
|
|
self.check_semantic_taxonomy()
|
|
self.check_anchors()
|
|
self.check_file_termination()
|
|
self.check_no_stray_comments()
|
|
self.check_contracts_and_implementation()
|
|
self.check_ai_friendly_logging()
|
|
if not self.errors:
|
|
logging.info("[INFO][SemanticValidator.validate][SUCCESS] Validation passed.")
|
|
else:
|
|
logging.info("[INFO][SemanticValidator.validate][FAILURE] Validation failed with %d errors.", len(self.errors))
|
|
return self.errors
|
|
# [END_ANCHOR:SemanticValidator.validate]
|
|
|
|
# [ANCHOR:SemanticValidator.add_error:Method]
|
|
# [CONTRACT:SemanticValidator.add_error]
|
|
# [PURPOSE] A helper method to format and append a new error to the errors list.
|
|
# [PARAM:line_num:int] The line number where the error occurred.
|
|
# [PARAM:message:str] The error message.
|
|
# [POST] A new error string is appended to self.errors.
|
|
# [END_CONTRACT:SemanticValidator.add_error]
|
|
def add_error(self, line_num, message):
|
|
self.errors.append(f"L{line_num}: {message}")
|
|
# [END_ANCHOR:SemanticValidator.add_error]
|
|
|
|
# [ANCHOR:SemanticValidator.check_file_header:Method]
|
|
# [CONTRACT:SemanticValidator.check_file_header]
|
|
# [PURPOSE] Validates Rule 1: FileHeaderIntegrity.
|
|
# [POST] Errors are added to self.errors if the header is incorrect.
|
|
# [END_CONTRACT:SemanticValidator.check_file_header]
|
|
def check_file_header(self):
|
|
if not self.lines[0].startswith(f"// [FILE] {self.filename}"):
|
|
self.add_error(1, f"FileHeaderIntegrity: File must start with '// [FILE] {self.filename}'.")
|
|
if not self.lines[1].startswith("// [SEMANTICS]"):
|
|
self.add_error(2, "FileHeaderIntegrity: Second line must start with '// [SEMANTICS]'.")
|
|
# [END_ANCHOR:SemanticValidator.check_file_header]
|
|
|
|
# [ANCHOR:SemanticValidator.check_semantic_taxonomy:Method]
|
|
# [CONTRACT:SemanticValidator.check_semantic_taxonomy]
|
|
# [PURPOSE] Validates Rule 2: SemanticKeywordTaxonomy.
|
|
# [POST] Errors are added to self.errors if invalid keywords are found.
|
|
# [END_CONTRACT:SemanticValidator.check_semantic_taxonomy]
|
|
def check_semantic_taxonomy(self):
|
|
if len(self.lines) > 1 and self.lines[1].startswith("// [SEMANTICS]"):
|
|
semantics_str = self.lines[1].replace("// [SEMANTICS]", "").strip()
|
|
if not semantics_str:
|
|
self.add_error(2, "SemanticKeywordTaxonomy: [SEMANTICS] anchor cannot be empty.")
|
|
return
|
|
|
|
keywords = [k.strip() for k in semantics_str.split(',')]
|
|
all_valid_keywords = set(sum(SEMANTIC_TAXONOMY.values(), []))
|
|
|
|
for keyword in keywords:
|
|
if keyword not in all_valid_keywords:
|
|
self.add_error(2, f"SemanticKeywordTaxonomy: Invalid keyword '{keyword}'.")
|
|
# [END_ANCHOR:SemanticValidator.check_semantic_taxonomy]
|
|
|
|
# [ANCHOR:SemanticValidator.check_anchors:Method]
|
|
# [CONTRACT:SemanticValidator.check_anchors]
|
|
# [PURPOSE] Validates Rule 3: Anchors. Checks for pairing and valid types.
|
|
# [POST] Errors are added for mismatched or invalid anchors.
|
|
# [END_CONTRACT:SemanticValidator.check_anchors]
|
|
def check_anchors(self):
|
|
anchor_pattern = re.compile(r"// \[ANCHOR:(\w+):(\w+)\]")
|
|
end_anchor_pattern = re.compile(r"// \[END_ANCHOR:(\w+)\]")
|
|
open_anchors = {}
|
|
|
|
for i, line in enumerate(self.lines, 1):
|
|
# Check entity type in ANCHOR
|
|
match = anchor_pattern.match(line)
|
|
if match:
|
|
anchor_id, anchor_type = match.groups()
|
|
if anchor_type not in ENTITY_TYPES:
|
|
self.add_error(i, f"Anchor Error: Invalid entity type '{anchor_type}' for anchor '{anchor_id}'.")
|
|
if anchor_id in open_anchors:
|
|
self.add_error(i, f"Anchor Error: Duplicate anchor ID '{anchor_id}' found.")
|
|
else:
|
|
open_anchors[anchor_id] = i
|
|
|
|
# Check for matching END_ANCHOR
|
|
end_match = end_anchor_pattern.match(line)
|
|
if end_match:
|
|
anchor_id = end_match.group(1)
|
|
if anchor_id not in open_anchors:
|
|
self.add_error(i, f"Anchor Error: Found closing anchor '// [END_ANCHOR:{anchor_id}]' without a matching opening anchor.")
|
|
else:
|
|
del open_anchors[anchor_id]
|
|
|
|
for anchor_id, line_num in open_anchors.items():
|
|
self.add_error(line_num, f"Anchor Error: Opening anchor '// [ANCHOR:{anchor_id}:...]' at line {line_num} has no matching closing anchor.")
|
|
# [END_ANCHOR:SemanticValidator.check_anchors]
|
|
|
|
# [ANCHOR:SemanticValidator.check_file_termination:Method]
|
|
# [CONTRACT:SemanticValidator.check_file_termination]
|
|
# [PURPOSE] Validates Rule 5: FileTermination.
|
|
# [POST] An error is added if the file does not have the correct termination anchor.
|
|
# [END_CONTRACT:SemanticValidator.check_file_termination]
|
|
def check_file_termination(self):
|
|
if not self.lines[-1].strip() == f"// [END_FILE_{self.filename}]":
|
|
self.add_error(len(self.lines), f"FileTermination: File must end with '// [END_FILE_{self.filename}]'.")
|
|
# [END_ANCHOR:SemanticValidator.check_file_termination]
|
|
|
|
# [ANCHOR:SemanticValidator.check_no_stray_comments:Method]
|
|
# [CONTRACT:SemanticValidator.check_no_stray_comments]
|
|
# [PURPOSE] Validates Rule 6: NoStrayComments.
|
|
# [POST] Errors are added for any non-structured comments.
|
|
# [END_CONTRACT:SemanticValidator.check_no_stray_comments]
|
|
def check_no_stray_comments(self):
|
|
for i, line in enumerate(self.lines, 1):
|
|
stripped_line = line.strip()
|
|
if stripped_line.startswith('//') and not (
|
|
stripped_line.startswith('// [') or
|
|
stripped_line.startswith('// [END_') or
|
|
re.match(r"//\s*\[(AI_NOTE|CONTRACT|PURPOSE|PRE|POST|PARAM|RETURN|TEST|THROW|RELATION)]", stripped_line)
|
|
):
|
|
self.add_error(i, "NoStrayComments: Stray comment found. Only structured comments are allowed.")
|
|
# [END_ANCHOR:SemanticValidator.check_no_stray_comments]
|
|
|
|
# [ANCHOR:SemanticValidator.check_contracts_and_implementation:Method]
|
|
# [CONTRACT:SemanticValidator.check_contracts_and_implementation]
|
|
# [PURPOSE] Validates Principle B: DesignByContract. Ensures PRE/POST conditions are implemented.
|
|
# [POST] Errors are added if contract implementations are missing.
|
|
# [END_CONTRACT:SemanticValidator.check_contracts_and_implementation]
|
|
def check_contracts_and_implementation(self):
|
|
# This is a simplified check. A full implementation would require a proper parser.
|
|
# It finds contract blocks and checks for corresponding require/check calls in the function body.
|
|
contract_pattern = re.compile(r"// \[CONTRACT:(\w+)\]")
|
|
end_contract_pattern = re.compile(r"// \[END_CONTRACT:(\w+)\]")
|
|
pre_pattern = re.compile(r'// \[PRE\](.*)')
|
|
post_pattern = re.compile(r'// \[POST\](.*)')
|
|
fun_pattern = re.compile(r"fun\s+\w+\(.*\)\s*\{")
|
|
|
|
in_contract = False
|
|
contract_id = None
|
|
pre_conditions = []
|
|
|
|
for i, line in enumerate(self.lines, 1):
|
|
if contract_pattern.search(line):
|
|
in_contract = True
|
|
contract_id = contract_pattern.search(line).group(1)
|
|
pre_conditions = []
|
|
|
|
if in_contract:
|
|
pre_match = pre_pattern.search(line)
|
|
if pre_match:
|
|
# simplistic extraction of the condition text
|
|
condition_text = pre_match.group(1).strip().replace('"', '')
|
|
pre_conditions.append(condition_text)
|
|
|
|
if end_contract_pattern.search(line) and in_contract:
|
|
in_contract = False
|
|
# Now, find the function body and check for require() calls
|
|
body_found = False
|
|
for j in range(i, len(self.lines)):
|
|
if fun_pattern.search(self.lines[j]):
|
|
body_found = True
|
|
# Look for require statements in the function body
|
|
body_end = self.find_scope_end(j)
|
|
function_body_text = " ".join(self.lines[j:body_end])
|
|
|
|
for pre in pre_conditions:
|
|
# This check is basic. It just looks for the presence of the text.
|
|
# A robust solution needs code parsing.
|
|
if f'require{{' in function_body_text or f'require(' in function_body_text:
|
|
if pre not in function_body_text:
|
|
self.add_error(j + 1, f"DesignByContract: Missing `require` implementation for PRE condition: '{pre}' in contract '{contract_id}'.")
|
|
else:
|
|
self.add_error(j + 1, f"DesignByContract: No `require` calls found for contract '{contract_id}' with PRE conditions.")
|
|
break # Stop searching for function after finding the first one
|
|
if not body_found:
|
|
self.add_error(i, f"DesignByContract: Could not find function/method body for contract '{contract_id}'.")
|
|
|
|
def find_scope_end(self, start_line_idx):
|
|
"""Finds the line index of the closing brace for a scope starting at start_line_idx."""
|
|
open_braces = 0
|
|
for i in range(start_line_idx, len(self.lines)):
|
|
line = self.lines[i]
|
|
open_braces += line.count('{')
|
|
open_braces -= line.count('}')
|
|
if open_braces == 0:
|
|
return i
|
|
return len(self.lines) -1 # fallback
|
|
# [END_ANCHOR:SemanticValidator.check_contracts_and_implementation]
|
|
|
|
# [ANCHOR:SemanticValidator.check_ai_friendly_logging:Method]
|
|
# [CONTRACT:SemanticValidator.check_ai_friendly_logging]
|
|
# [PURPOSE] Validates Principle A: AIFriendlyLogging.
|
|
# [POST] Errors are added for logs that use string interpolation or have an invalid format.
|
|
# [END_CONTRACT:SemanticValidator.check_ai_friendly_logging]
|
|
def check_ai_friendly_logging(self):
|
|
logging_pattern = re.compile(r"Timber\.\w+\((.*)\)")
|
|
for i, line in enumerate(self.lines, 1):
|
|
match = logging_pattern.search(line)
|
|
if match:
|
|
log_content = match.group(1)
|
|
# 1. Check for string interpolation
|
|
if '$' in log_content.split(',')[0]:
|
|
self.add_error(i, "AIFriendlyLogging: String interpolation with '$' is forbidden in log messages. Pass data as arguments.")
|
|
|
|
# 2. Check for structured message format (basic check)
|
|
log_message = log_content.split(',')[0].strip().replace('"', '')
|
|
if not (log_message.startswith('[') and log_message.endswith(']')):
|
|
if not (re.search(r"\[\w+\]\[\w+\]", log_message)):
|
|
self.add_error(i, f"AIFriendlyLogging: Log message '{log_message}' does not appear to follow the structured format '[LEVEL][ANCHOR]...'.")
|
|
# [END_ANCHOR:SemanticValidator.check_ai_friendly_logging]
|
|
# [END_ANCHOR:SemanticValidator]
|
|
|
|
|
|
# [ANCHOR:main_execution:Block]
|
|
# [CONTRACT:main_execution]
|
|
# [PURPOSE] Main execution block. Parses CLI arguments and runs the validator.
|
|
# [PRE] The script must be run with exactly one argument: the file path.
|
|
# [POST] Prints validation results to stdout.
|
|
# [POST] Exits with code 1 on validation failure or incorrect usage.
|
|
# [POST] Exits with code 0 on validation success.
|
|
# [END_CONTRACT:main_execution]
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
logging.error("[ERROR][main_execution][FATAL] Incorrect number of arguments provided.")
|
|
print("Usage: python validate_semantics.py <file_path>")
|
|
sys.exit(1)
|
|
|
|
file_to_validate = sys.argv[1]
|
|
validator = SemanticValidator(file_to_validate)
|
|
errors = validator.validate()
|
|
|
|
if errors:
|
|
print(f"Semantic validation failed for {file_to_validate}:")
|
|
for error in errors:
|
|
print(f"- {error}")
|
|
sys.exit(1)
|
|
else:
|
|
print(f"Semantic validation passed for {file_to_validate}.")
|
|
# [END_ANCHOR:main_execution]
|
|
|
|
# [END_FILE_validate_semantics.py] |