semantic checker script update
This commit is contained in:
@@ -13,7 +13,14 @@ import os
|
||||
import re
|
||||
import json
|
||||
import datetime
|
||||
from typing import Dict, List, Optional, Any, Pattern, Tuple
|
||||
import fnmatch
|
||||
from typing import Dict, List, Optional, Any, Pattern, Tuple, Set
|
||||
|
||||
# Mock belief_scope for the script itself to avoid import issues
|
||||
class belief_scope:
|
||||
def __init__(self, name): self.name = name
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *args): pass
|
||||
# [/SECTION]
|
||||
|
||||
# [SECTION: CONFIGURATION]
|
||||
@@ -30,9 +37,9 @@ OUTPUT_COMPRESSED_MD = "specs/project_map.md"
|
||||
REPORTS_DIR = "semantics/reports"
|
||||
|
||||
MANDATORY_TAGS = {
|
||||
"Module": ["PURPOSE", "LAYER"],
|
||||
"Component": ["PURPOSE", "LAYER"],
|
||||
"Function": ["PURPOSE"],
|
||||
"Module": ["PURPOSE", "LAYER", "SEMANTICS"],
|
||||
"Component": ["PURPOSE", "LAYER", "SEMANTICS"],
|
||||
"Function": ["PURPOSE", "PRE", "POST"],
|
||||
"Class": ["PURPOSE"]
|
||||
}
|
||||
# [/SECTION]
|
||||
@@ -41,108 +48,133 @@ MANDATORY_TAGS = {
|
||||
# @PURPOSE: Represents a code entity (Module, Function, Component) found during parsing.
|
||||
# @INVARIANT: start_line is always set; end_line is set upon closure.
|
||||
class SemanticEntity:
|
||||
# [DEF:__init__:Function]
|
||||
# @PURPOSE: Initializes a new SemanticEntity instance.
|
||||
# @PRE: name, type_, start_line, file_path are provided.
|
||||
# @POST: Instance is initialized with default values.
|
||||
def __init__(self, name: str, type_: str, start_line: int, file_path: str):
|
||||
self.name = name
|
||||
self.type = type_
|
||||
self.start_line = start_line
|
||||
self.end_line: Optional[int] = None
|
||||
self.file_path = file_path
|
||||
self.tags: Dict[str, str] = {}
|
||||
self.relations: List[Dict[str, str]] = []
|
||||
self.children: List['SemanticEntity'] = []
|
||||
self.parent: Optional['SemanticEntity'] = None
|
||||
self.compliance_issues: List[str] = []
|
||||
with belief_scope("__init__"):
|
||||
self.name = name
|
||||
self.type = type_
|
||||
self.start_line = start_line
|
||||
self.end_line: Optional[int] = None
|
||||
self.file_path = file_path
|
||||
self.tags: Dict[str, str] = {}
|
||||
self.relations: List[Dict[str, str]] = []
|
||||
self.children: List['SemanticEntity'] = []
|
||||
self.parent: Optional['SemanticEntity'] = None
|
||||
self.compliance_issues: List[str] = []
|
||||
# [/DEF:__init__:Function]
|
||||
|
||||
# [DEF:to_dict:Function]
|
||||
# @PURPOSE: Serializes the entity to a dictionary for JSON output.
|
||||
# @PRE: Entity is fully populated.
|
||||
# @POST: Returns a dictionary representation.
|
||||
# @RETURN: Dict representation of the entity.
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"type": self.type,
|
||||
"start_line": self.start_line,
|
||||
"end_line": self.end_line,
|
||||
"tags": self.tags,
|
||||
"relations": self.relations,
|
||||
"children": [c.to_dict() for c in self.children],
|
||||
"compliance": {
|
||||
"valid": len(self.compliance_issues) == 0,
|
||||
"issues": self.compliance_issues
|
||||
with belief_scope("to_dict"):
|
||||
return {
|
||||
"name": self.name,
|
||||
"type": self.type,
|
||||
"start_line": self.start_line,
|
||||
"end_line": self.end_line,
|
||||
"tags": self.tags,
|
||||
"relations": self.relations,
|
||||
"children": [c.to_dict() for c in self.children],
|
||||
"compliance": {
|
||||
"valid": len(self.compliance_issues) == 0,
|
||||
"issues": self.compliance_issues
|
||||
}
|
||||
}
|
||||
}
|
||||
# [/DEF:to_dict:Function]
|
||||
|
||||
# [DEF:validate:Function]
|
||||
# @PURPOSE: Checks for semantic compliance (closure, mandatory tags).
|
||||
# @PURPOSE: Checks for semantic compliance (closure, mandatory tags, belief state).
|
||||
# @PRE: Entity structure is complete.
|
||||
# @POST: Populates self.compliance_issues.
|
||||
def validate(self):
|
||||
# 1. Check Closure
|
||||
if self.end_line is None:
|
||||
self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
|
||||
|
||||
# 2. Check Mandatory Tags
|
||||
required = MANDATORY_TAGS.get(self.type, [])
|
||||
for req_tag in required:
|
||||
found = False
|
||||
for existing_tag in self.tags:
|
||||
if existing_tag.upper() == req_tag:
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")
|
||||
with belief_scope("validate"):
|
||||
# 1. Check Closure
|
||||
if self.end_line is None:
|
||||
self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
|
||||
|
||||
# 2. Check Mandatory Tags
|
||||
required = MANDATORY_TAGS.get(self.type, [])
|
||||
for req_tag in required:
|
||||
found = False
|
||||
for existing_tag in self.tags:
|
||||
if existing_tag.upper() == req_tag:
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")
|
||||
|
||||
# Recursive validation
|
||||
for child in self.children:
|
||||
child.validate()
|
||||
# 3. Check for Belief State Logging (Python only)
|
||||
if self.type == "Function" and self.file_path.endswith(".py"):
|
||||
if not getattr(self, 'has_belief_scope', False):
|
||||
self.compliance_issues.append("Missing Belief State Logging: Function should use belief_scope context manager.")
|
||||
|
||||
# Recursive validation
|
||||
for child in self.children:
|
||||
child.validate()
|
||||
# [/DEF:validate:Function]
|
||||
|
||||
# [DEF:get_score:Function]
|
||||
# @PURPOSE: Calculates a compliance score (0.0 to 1.0).
|
||||
# @PRE: validate() has been called.
|
||||
# @POST: Returns a float score.
|
||||
# @RETURN: Float score.
|
||||
def get_score(self) -> float:
|
||||
if self.end_line is None:
|
||||
return 0.0
|
||||
|
||||
score = 1.0
|
||||
required = MANDATORY_TAGS.get(self.type, [])
|
||||
if required:
|
||||
found_count = 0
|
||||
for req_tag in required:
|
||||
for existing_tag in self.tags:
|
||||
if existing_tag.upper() == req_tag:
|
||||
found_count += 1
|
||||
break
|
||||
if found_count < len(required):
|
||||
# Penalty proportional to missing tags
|
||||
score -= 0.5 * (1 - (found_count / len(required)))
|
||||
|
||||
return max(0.0, score)
|
||||
with belief_scope("get_score"):
|
||||
if self.end_line is None:
|
||||
return 0.0
|
||||
|
||||
score = 1.0
|
||||
required = MANDATORY_TAGS.get(self.type, [])
|
||||
if required:
|
||||
found_count = 0
|
||||
for req_tag in required:
|
||||
for existing_tag in self.tags:
|
||||
if existing_tag.upper() == req_tag:
|
||||
found_count += 1
|
||||
break
|
||||
if found_count < len(required):
|
||||
# Penalty proportional to missing tags
|
||||
score -= 0.5 * (1 - (found_count / len(required)))
|
||||
|
||||
return max(0.0, score)
|
||||
# [/DEF:get_score:Function]
|
||||
# [/DEF:SemanticEntity:Class]
|
||||
|
||||
|
||||
# [DEF:get_patterns:Function]
|
||||
# @PURPOSE: Returns regex patterns for a specific language.
|
||||
# @PRE: lang is either 'python' or 'svelte_js'.
|
||||
# @POST: Returns a dictionary of compiled regex patterns.
|
||||
# @PARAM: lang (str) - 'python' or 'svelte_js'
|
||||
# @RETURN: Dict containing compiled regex patterns.
|
||||
def get_patterns(lang: str) -> Dict[str, Pattern]:
|
||||
if lang == "python":
|
||||
return {
|
||||
"anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
|
||||
"relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
|
||||
"html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
|
||||
"js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
|
||||
"jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
|
||||
"relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
|
||||
}
|
||||
with belief_scope("get_patterns"):
|
||||
if lang == "python":
|
||||
return {
|
||||
"anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
|
||||
"relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
|
||||
"func_def": re.compile(r"^\s*(async\s+)?def\s+(?P<name>\w+)"),
|
||||
"belief_scope": re.compile(r"with\s+belief_scope\("),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
|
||||
"html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
|
||||
"js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
|
||||
"jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
|
||||
"relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
|
||||
"func_def": re.compile(r"^\s*(export\s+)?(async\s+)?function\s+(?P<name>\w+)"),
|
||||
}
|
||||
# [/DEF:get_patterns:Function]
|
||||
|
||||
|
||||
@@ -213,7 +245,22 @@ def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticE
|
||||
issues.append(f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}:{top.type}], found [/DEF:{name}:{type_}].")
|
||||
continue
|
||||
|
||||
# 3. Check for Tags/Relations
|
||||
# 3. Check for Naked Functions (Missing Contracts)
|
||||
if "func_def" in patterns:
|
||||
match_func = patterns["func_def"].search(line)
|
||||
if match_func:
|
||||
func_name = match_func.group("name")
|
||||
is_covered = False
|
||||
if stack:
|
||||
current = stack[-1]
|
||||
# Check if we are inside a Function anchor that matches the name
|
||||
if current.type == "Function" and current.name == func_name:
|
||||
is_covered = True
|
||||
|
||||
if not is_covered:
|
||||
issues.append(f"{rel_path}:{lineno} Function '{func_name}' implementation found without matching [DEF:{func_name}:Function] contract.")
|
||||
|
||||
# 4. Check for Tags/Relations
|
||||
if stack:
|
||||
current = stack[-1]
|
||||
|
||||
@@ -238,6 +285,11 @@ def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticE
|
||||
tag_value = match_tag.group("value").strip()
|
||||
current.tags[tag_name] = tag_value
|
||||
|
||||
# Check for belief scope in implementation
|
||||
if lang == "python" and "belief_scope" in patterns:
|
||||
if patterns["belief_scope"].search(line):
|
||||
current.has_belief_scope = True
|
||||
|
||||
# End of file check
|
||||
if stack:
|
||||
for unclosed in stack:
|
||||
@@ -252,14 +304,79 @@ def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticE
|
||||
# [DEF:SemanticMapGenerator:Class]
|
||||
# @PURPOSE: Orchestrates the mapping process.
|
||||
class SemanticMapGenerator:
|
||||
# [DEF:__init__:Function]
|
||||
# @PURPOSE: Initializes the generator with a root directory.
|
||||
# @PRE: root_dir is a valid path string.
|
||||
# @POST: Generator instance is ready.
|
||||
def __init__(self, root_dir: str):
|
||||
self.root_dir = root_dir
|
||||
self.entities: List[SemanticEntity] = []
|
||||
self.file_scores: Dict[str, float] = {}
|
||||
self.global_issues: List[str] = []
|
||||
self.ignored_patterns = self._load_gitignore()
|
||||
# [/DEF:__init__:Function]
|
||||
|
||||
# [DEF:_load_gitignore:Function]
|
||||
# @PURPOSE: Loads patterns from .gitignore file.
|
||||
# @RETURN: Set of patterns to ignore.
|
||||
def _load_gitignore(self) -> Set[str]:
|
||||
patterns = set()
|
||||
ignore_file = os.path.join(self.root_dir, ".gitignore")
|
||||
if os.path.exists(ignore_file):
|
||||
with open(ignore_file, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#"):
|
||||
patterns.add(line)
|
||||
return patterns
|
||||
# [/DEF:_load_gitignore:Function]
|
||||
|
||||
# [DEF:_is_ignored:Function]
|
||||
# @PURPOSE: Checks if a path should be ignored based on .gitignore or hardcoded defaults.
|
||||
# @PRE: rel_path is a valid relative path string.
|
||||
# @POST: Returns True if the path should be ignored.
|
||||
# @PARAM: rel_path (str) - Path relative to root.
|
||||
# @RETURN: bool - True if ignored.
|
||||
def _is_ignored(self, rel_path: str) -> bool:
|
||||
# Normalize path for matching
|
||||
rel_path = rel_path.replace(os.sep, '/')
|
||||
|
||||
# Check hardcoded defaults
|
||||
parts = rel_path.split('/')
|
||||
for part in parts:
|
||||
if part in IGNORE_DIRS:
|
||||
return True
|
||||
|
||||
if os.path.basename(rel_path) in IGNORE_FILES:
|
||||
return True
|
||||
|
||||
# Check gitignore patterns
|
||||
for pattern in self.ignored_patterns:
|
||||
# Handle directory patterns like 'node_modules/'
|
||||
if pattern.endswith('/'):
|
||||
dir_pattern = pattern.rstrip('/')
|
||||
if rel_path == dir_pattern or rel_path.startswith(pattern):
|
||||
return True
|
||||
|
||||
# Check for patterns in frontend/ or backend/
|
||||
if rel_path.startswith("frontend/") and fnmatch.fnmatch(rel_path[9:], pattern):
|
||||
return True
|
||||
if rel_path.startswith("backend/") and fnmatch.fnmatch(rel_path[8:], pattern):
|
||||
return True
|
||||
|
||||
# Use fnmatch for glob patterns
|
||||
if fnmatch.fnmatch(rel_path, pattern) or \
|
||||
fnmatch.fnmatch(os.path.basename(rel_path), pattern) or \
|
||||
any(fnmatch.fnmatch(part, pattern) for part in parts):
|
||||
return True
|
||||
|
||||
return False
|
||||
# [/DEF:_is_ignored:Function]
|
||||
|
||||
# [DEF:run:Function]
|
||||
# @PURPOSE: Main execution flow.
|
||||
# @PRE: Generator is initialized.
|
||||
# @POST: Semantic map and reports are generated.
|
||||
# @RELATION: CALLS -> _walk_and_parse
|
||||
# @RELATION: CALLS -> _generate_artifacts
|
||||
def run(self):
|
||||
@@ -271,17 +388,20 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_walk_and_parse:Function]
|
||||
# @PURPOSE: Recursively walks directories and triggers parsing.
|
||||
# @PRE: root_dir exists.
|
||||
# @POST: All files are scanned and entities extracted.
|
||||
def _walk_and_parse(self):
|
||||
for root, dirs, files in os.walk(self.root_dir):
|
||||
dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
|
||||
# Optimization: don't enter ignored directories
|
||||
dirs[:] = [d for d in dirs if not self._is_ignored(os.path.relpath(os.path.join(root, d), self.root_dir) + "/")]
|
||||
|
||||
for file in files:
|
||||
if file in IGNORE_FILES:
|
||||
continue
|
||||
|
||||
file_path = os.path.join(root, file)
|
||||
rel_path = os.path.relpath(file_path, self.root_dir)
|
||||
|
||||
if self._is_ignored(rel_path):
|
||||
continue
|
||||
|
||||
lang = None
|
||||
if file.endswith(".py"):
|
||||
lang = "python"
|
||||
@@ -298,10 +418,16 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_process_file_results:Function]
|
||||
# @PURPOSE: Validates entities and calculates file scores.
|
||||
# @PRE: Entities have been parsed from the file.
|
||||
# @POST: File score is calculated and issues collected.
|
||||
def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
|
||||
total_score = 0
|
||||
count = 0
|
||||
|
||||
# [DEF:validate_recursive:Function]
|
||||
# @PURPOSE: Recursively validates a list of entities.
|
||||
# @PRE: ent_list is a list of SemanticEntity objects.
|
||||
# @POST: All entities and their children are validated.
|
||||
def validate_recursive(ent_list):
|
||||
nonlocal total_score, count
|
||||
for e in ent_list:
|
||||
@@ -309,6 +435,7 @@ class SemanticMapGenerator:
|
||||
total_score += e.get_score()
|
||||
count += 1
|
||||
validate_recursive(e.children)
|
||||
# [/DEF:validate_recursive:Function]
|
||||
|
||||
validate_recursive(entities)
|
||||
|
||||
@@ -318,6 +445,8 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_generate_artifacts:Function]
|
||||
# @PURPOSE: Writes output files.
|
||||
# @PRE: Parsing and validation are complete.
|
||||
# @POST: JSON and Markdown artifacts are written to disk.
|
||||
def _generate_artifacts(self):
|
||||
# 1. Full JSON Map
|
||||
full_map = {
|
||||
@@ -340,6 +469,8 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_generate_report:Function]
|
||||
# @PURPOSE: Generates the Markdown compliance report.
|
||||
# @PRE: File scores and issues are available.
|
||||
# @POST: Markdown report is created in reports directory.
|
||||
def _generate_report(self):
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
|
||||
@@ -379,6 +510,8 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_collect_issues:Function]
|
||||
# @PURPOSE: Helper to collect issues for a specific file from the entity tree.
|
||||
# @PRE: entities list and file_path are valid.
|
||||
# @POST: issues list is populated with compliance issues.
|
||||
def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[str]):
|
||||
for e in entities:
|
||||
if e.file_path == file_path:
|
||||
@@ -388,6 +521,8 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_generate_compressed_map:Function]
|
||||
# @PURPOSE: Generates the token-optimized project map.
|
||||
# @PRE: Entities have been processed.
|
||||
# @POST: Markdown project map is written.
|
||||
def _generate_compressed_map(self):
|
||||
os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)
|
||||
|
||||
@@ -403,6 +538,8 @@ class SemanticMapGenerator:
|
||||
|
||||
# [DEF:_write_entity_md:Function]
|
||||
# @PURPOSE: Recursive helper to write entity tree to Markdown.
|
||||
# @PRE: f is an open file handle, entity is valid.
|
||||
# @POST: Entity details are written to the file.
|
||||
def _write_entity_md(self, f, entity: SemanticEntity, level: int):
|
||||
indent = " " * level
|
||||
|
||||
|
||||
Reference in New Issue
Block a user