semantic checker script update

2026-01-13 17:33:57 +03:00
parent b2529973eb
commit 11c59fb420
11 changed files with 3013 additions and 1044 deletions
--- a/generate_semantic_map.py
+++ b/generate_semantic_map.py
@@ -13,7 +13,14 @@ import os
 import re
 import json
 import datetime
-from typing import Dict, List, Optional, Any, Pattern, Tuple
+import fnmatch
+from typing import Dict, List, Optional, Any, Pattern, Tuple, Set
+
+# Mock belief_scope for the script itself to avoid import issues
+class belief_scope:
+    def __init__(self, name): self.name = name
+    def __enter__(self): return self
+    def __exit__(self, *args): pass
 # [/SECTION]

 # [SECTION: CONFIGURATION]
@@ -30,9 +37,9 @@ OUTPUT_COMPRESSED_MD = "specs/project_map.md"
 REPORTS_DIR = "semantics/reports"

 MANDATORY_TAGS = {
-    "Module": ["PURPOSE", "LAYER"],
-    "Component": ["PURPOSE", "LAYER"],
-    "Function": ["PURPOSE"],
+    "Module": ["PURPOSE", "LAYER", "SEMANTICS"],
+    "Component": ["PURPOSE", "LAYER", "SEMANTICS"],
+    "Function": ["PURPOSE", "PRE", "POST"],
    "Class": ["PURPOSE"]
 }
 # [/SECTION]
@@ -41,108 +48,133 @@ MANDATORY_TAGS = {
 # @PURPOSE:   Represents a code entity (Module, Function, Component) found during parsing.
 # @INVARIANT: start_line is always set; end_line is set upon closure.
 class SemanticEntity:
+    # [DEF:__init__:Function]
+    # @PURPOSE: Initializes a new SemanticEntity instance.
+    # @PRE:     name, type_, start_line, file_path are provided.
+    # @POST:    Instance is initialized with default values.
    def __init__(self, name: str, type_: str, start_line: int, file_path: str):
-        self.name = name
-        self.type = type_
-        self.start_line = start_line
-        self.end_line: Optional[int] = None
-        self.file_path = file_path
-        self.tags: Dict[str, str] = {}
-        self.relations: List[Dict[str, str]] = []
-        self.children: List['SemanticEntity'] = []
-        self.parent: Optional['SemanticEntity'] = None
-        self.compliance_issues: List[str] = []
+        with belief_scope("__init__"):
+            self.name = name
+            self.type = type_
+            self.start_line = start_line
+            self.end_line: Optional[int] = None
+            self.file_path = file_path
+            self.tags: Dict[str, str] = {}
+            self.relations: List[Dict[str, str]] = []
+            self.children: List['SemanticEntity'] = []
+            self.parent: Optional['SemanticEntity'] = None
+            self.compliance_issues: List[str] = []
+    # [/DEF:__init__:Function]

    # [DEF:to_dict:Function]
    # @PURPOSE: Serializes the entity to a dictionary for JSON output.
+    # @PRE:     Entity is fully populated.
+    # @POST:    Returns a dictionary representation.
    # @RETURN:  Dict representation of the entity.
    def to_dict(self) -> Dict[str, Any]:
-        return {
-            "name": self.name,
-            "type": self.type,
-            "start_line": self.start_line,
-            "end_line": self.end_line,
-            "tags": self.tags,
-            "relations": self.relations,
-            "children": [c.to_dict() for c in self.children],
-            "compliance": {
-                "valid": len(self.compliance_issues) == 0,
-                "issues": self.compliance_issues
+        with belief_scope("to_dict"):
+            return {
+                "name": self.name,
+                "type": self.type,
+                "start_line": self.start_line,
+                "end_line": self.end_line,
+                "tags": self.tags,
+                "relations": self.relations,
+                "children": [c.to_dict() for c in self.children],
+                "compliance": {
+                    "valid": len(self.compliance_issues) == 0,
+                    "issues": self.compliance_issues
+                }
            }
-        }
    # [/DEF:to_dict:Function]

    # [DEF:validate:Function]
-    # @PURPOSE: Checks for semantic compliance (closure, mandatory tags).
+    # @PURPOSE: Checks for semantic compliance (closure, mandatory tags, belief state).
+    # @PRE:     Entity structure is complete.
    # @POST:    Populates self.compliance_issues.
    def validate(self):
-        # 1. Check Closure
-        if self.end_line is None:
-            self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
-        
-        # 2. Check Mandatory Tags
-        required = MANDATORY_TAGS.get(self.type, [])
-        for req_tag in required:
-            found = False
-            for existing_tag in self.tags:
-                if existing_tag.upper() == req_tag:
-                    found = True
-                    break
-            if not found:
-                self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")
+        with belief_scope("validate"):
+            # 1. Check Closure
+            if self.end_line is None:
+                self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
+            
+            # 2. Check Mandatory Tags
+            required = MANDATORY_TAGS.get(self.type, [])
+            for req_tag in required:
+                found = False
+                for existing_tag in self.tags:
+                    if existing_tag.upper() == req_tag:
+                        found = True
+                        break
+                if not found:
+                    self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")

-        # Recursive validation
-        for child in self.children:
-            child.validate()
+            # 3. Check for Belief State Logging (Python only)
+            if self.type == "Function" and self.file_path.endswith(".py"):
+                if not getattr(self, 'has_belief_scope', False):
+                    self.compliance_issues.append("Missing Belief State Logging: Function should use belief_scope context manager.")
+
+            # Recursive validation
+            for child in self.children:
+                child.validate()
    # [/DEF:validate:Function]

    # [DEF:get_score:Function]
    # @PURPOSE: Calculates a compliance score (0.0 to 1.0).
+    # @PRE:     validate() has been called.
+    # @POST:    Returns a float score.
    # @RETURN:  Float score.
    def get_score(self) -> float:
-        if self.end_line is None:
-            return 0.0
-        
-        score = 1.0
-        required = MANDATORY_TAGS.get(self.type, [])
-        if required:
-            found_count = 0
-            for req_tag in required:
-                 for existing_tag in self.tags:
-                    if existing_tag.upper() == req_tag:
-                        found_count += 1
-                        break
-            if found_count < len(required):
-                # Penalty proportional to missing tags
-                score -= 0.5 * (1 - (found_count / len(required)))
-        
-        return max(0.0, score)
+        with belief_scope("get_score"):
+            if self.end_line is None:
+                return 0.0
+            
+            score = 1.0
+            required = MANDATORY_TAGS.get(self.type, [])
+            if required:
+                found_count = 0
+                for req_tag in required:
+                     for existing_tag in self.tags:
+                        if existing_tag.upper() == req_tag:
+                            found_count += 1
+                            break
+                if found_count < len(required):
+                    # Penalty proportional to missing tags
+                    score -= 0.5 * (1 - (found_count / len(required)))
+            
+            return max(0.0, score)
    # [/DEF:get_score:Function]
 # [/DEF:SemanticEntity:Class]


 # [DEF:get_patterns:Function]
 # @PURPOSE: Returns regex patterns for a specific language.
+# @PRE:     lang is either 'python' or 'svelte_js'.
+# @POST:    Returns a dictionary of compiled regex patterns.
 # @PARAM:   lang (str) - 'python' or 'svelte_js'
 # @RETURN:  Dict containing compiled regex patterns.
 def get_patterns(lang: str) -> Dict[str, Pattern]:
-    if lang == "python":
-        return {
-            "anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
-            "anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
-            "tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
-            "relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
-        }
-    else:
-        return {
-            "html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
-            "html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
-            "js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
-            "js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
-            "html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
-            "jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
-            "relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
-        }
+    with belief_scope("get_patterns"):
+        if lang == "python":
+            return {
+                "anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
+                "anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
+                "tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
+                "relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
+                "func_def": re.compile(r"^\s*(async\s+)?def\s+(?P<name>\w+)"),
+                "belief_scope": re.compile(r"with\s+belief_scope\("),
+            }
+        else:
+            return {
+                "html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
+                "html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
+                "js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
+                "js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
+                "html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
+                "jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
+                "relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
+                "func_def": re.compile(r"^\s*(export\s+)?(async\s+)?function\s+(?P<name>\w+)"),
+            }
 # [/DEF:get_patterns:Function]


@@ -213,7 +245,22 @@ def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticE
                issues.append(f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}:{top.type}], found [/DEF:{name}:{type_}].")
            continue

-        # 3. Check for Tags/Relations
+        # 3. Check for Naked Functions (Missing Contracts)
+        if "func_def" in patterns:
+            match_func = patterns["func_def"].search(line)
+            if match_func:
+                func_name = match_func.group("name")
+                is_covered = False
+                if stack:
+                    current = stack[-1]
+                    # Check if we are inside a Function anchor that matches the name
+                    if current.type == "Function" and current.name == func_name:
+                        is_covered = True
+                
+                if not is_covered:
+                    issues.append(f"{rel_path}:{lineno} Function '{func_name}' implementation found without matching [DEF:{func_name}:Function] contract.")
+
+        # 4. Check for Tags/Relations
        if stack:
            current = stack[-1]
            
@@ -238,6 +285,11 @@ def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticE
                tag_value = match_tag.group("value").strip()
                current.tags[tag_name] = tag_value

+            # Check for belief scope in implementation
+            if lang == "python" and "belief_scope" in patterns:
+                if patterns["belief_scope"].search(line):
+                    current.has_belief_scope = True
+
    # End of file check
    if stack:
        for unclosed in stack:
@@ -252,14 +304,79 @@ def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticE
 # [DEF:SemanticMapGenerator:Class]
 # @PURPOSE: Orchestrates the mapping process.
 class SemanticMapGenerator:
+    # [DEF:__init__:Function]
+    # @PURPOSE: Initializes the generator with a root directory.
+    # @PRE:     root_dir is a valid path string.
+    # @POST:    Generator instance is ready.
    def __init__(self, root_dir: str):
        self.root_dir = root_dir
        self.entities: List[SemanticEntity] = []
        self.file_scores: Dict[str, float] = {}
        self.global_issues: List[str] = []
+        self.ignored_patterns = self._load_gitignore()
+    # [/DEF:__init__:Function]
+
+    # [DEF:_load_gitignore:Function]
+    # @PURPOSE: Loads patterns from .gitignore file.
+    # @RETURN: Set of patterns to ignore.
+    def _load_gitignore(self) -> Set[str]:
+        patterns = set()
+        ignore_file = os.path.join(self.root_dir, ".gitignore")
+        if os.path.exists(ignore_file):
+            with open(ignore_file, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith("#"):
+                        patterns.add(line)
+        return patterns
+    # [/DEF:_load_gitignore:Function]
+
+    # [DEF:_is_ignored:Function]
+    # @PURPOSE: Checks if a path should be ignored based on .gitignore or hardcoded defaults.
+    # @PRE:     rel_path is a valid relative path string.
+    # @POST:    Returns True if the path should be ignored.
+    # @PARAM:   rel_path (str) - Path relative to root.
+    # @RETURN:  bool - True if ignored.
+    def _is_ignored(self, rel_path: str) -> bool:
+        # Normalize path for matching
+        rel_path = rel_path.replace(os.sep, '/')
+        
+        # Check hardcoded defaults
+        parts = rel_path.split('/')
+        for part in parts:
+            if part in IGNORE_DIRS:
+                return True
+        
+        if os.path.basename(rel_path) in IGNORE_FILES:
+            return True
+
+        # Check gitignore patterns
+        for pattern in self.ignored_patterns:
+            # Handle directory patterns like 'node_modules/'
+            if pattern.endswith('/'):
+                dir_pattern = pattern.rstrip('/')
+                if rel_path == dir_pattern or rel_path.startswith(pattern):
+                    return True
+            
+            # Check for patterns in frontend/ or backend/
+            if rel_path.startswith("frontend/") and fnmatch.fnmatch(rel_path[9:], pattern):
+                return True
+            if rel_path.startswith("backend/") and fnmatch.fnmatch(rel_path[8:], pattern):
+                return True
+
+            # Use fnmatch for glob patterns
+            if fnmatch.fnmatch(rel_path, pattern) or \
+               fnmatch.fnmatch(os.path.basename(rel_path), pattern) or \
+               any(fnmatch.fnmatch(part, pattern) for part in parts):
+                return True
+        
+        return False
+    # [/DEF:_is_ignored:Function]

    # [DEF:run:Function]
    # @PURPOSE: Main execution flow.
+    # @PRE:     Generator is initialized.
+    # @POST:    Semantic map and reports are generated.
    # @RELATION: CALLS -> _walk_and_parse
    # @RELATION: CALLS -> _generate_artifacts
    def run(self):
@@ -271,17 +388,20 @@ class SemanticMapGenerator:

    # [DEF:_walk_and_parse:Function]
    # @PURPOSE: Recursively walks directories and triggers parsing.
+    # @PRE:     root_dir exists.
+    # @POST:    All files are scanned and entities extracted.
    def _walk_and_parse(self):
        for root, dirs, files in os.walk(self.root_dir):
-            dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
+            # Optimization: don't enter ignored directories
+            dirs[:] = [d for d in dirs if not self._is_ignored(os.path.relpath(os.path.join(root, d), self.root_dir) + "/")]
            
            for file in files:
-                if file in IGNORE_FILES:
-                    continue
-                
                file_path = os.path.join(root, file)
                rel_path = os.path.relpath(file_path, self.root_dir)
                
+                if self._is_ignored(rel_path):
+                    continue
+                
                lang = None
                if file.endswith(".py"):
                    lang = "python"
@@ -298,10 +418,16 @@ class SemanticMapGenerator:

    # [DEF:_process_file_results:Function]
    # @PURPOSE: Validates entities and calculates file scores.
+    # @PRE:     Entities have been parsed from the file.
+    # @POST:    File score is calculated and issues collected.
    def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
        total_score = 0
        count = 0
        
+        # [DEF:validate_recursive:Function]
+        # @PURPOSE: Recursively validates a list of entities.
+        # @PRE:     ent_list is a list of SemanticEntity objects.
+        # @POST:    All entities and their children are validated.
        def validate_recursive(ent_list):
            nonlocal total_score, count
            for e in ent_list:
@@ -309,6 +435,7 @@ class SemanticMapGenerator:
                total_score += e.get_score()
                count += 1
                validate_recursive(e.children)
+        # [/DEF:validate_recursive:Function]

        validate_recursive(entities)
        
@@ -318,6 +445,8 @@ class SemanticMapGenerator:

    # [DEF:_generate_artifacts:Function]
    # @PURPOSE: Writes output files.
+    # @PRE:     Parsing and validation are complete.
+    # @POST:    JSON and Markdown artifacts are written to disk.
    def _generate_artifacts(self):
        # 1. Full JSON Map
        full_map = {
@@ -340,6 +469,8 @@ class SemanticMapGenerator:

    # [DEF:_generate_report:Function]
    # @PURPOSE: Generates the Markdown compliance report.
+    # @PRE:     File scores and issues are available.
+    # @POST:    Markdown report is created in reports directory.
    def _generate_report(self):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
@@ -379,6 +510,8 @@ class SemanticMapGenerator:

    # [DEF:_collect_issues:Function]
    # @PURPOSE: Helper to collect issues for a specific file from the entity tree.
+    # @PRE:     entities list and file_path are valid.
+    # @POST:    issues list is populated with compliance issues.
    def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[str]):
        for e in entities:
            if e.file_path == file_path:
@@ -388,6 +521,8 @@ class SemanticMapGenerator:

    # [DEF:_generate_compressed_map:Function]
    # @PURPOSE: Generates the token-optimized project map.
+    # @PRE:     Entities have been processed.
+    # @POST:    Markdown project map is written.
    def _generate_compressed_map(self):
        os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)
        
@@ -403,6 +538,8 @@ class SemanticMapGenerator:

    # [DEF:_write_entity_md:Function]
    # @PURPOSE: Recursive helper to write entity tree to Markdown.
+    # @PRE:     f is an open file handle, entity is valid.
+    # @POST:    Entity details are written to the file.
    def _write_entity_md(self, f, entity: SemanticEntity, level: int):
        indent = "  " * level