Files
ss-tools/generate_semantic_map.py
2026-01-18 21:29:54 +03:00

613 lines
26 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# [DEF:generate_semantic_map:Module]
#
# @SEMANTICS: semantic_analysis, parser, map_generator, compliance_checker
# @PURPOSE: Scans the codebase to generate a Semantic Map and Compliance Report based on the System Standard.
# @LAYER: DevOps/Tooling
# @RELATION: READS -> FileSystem
# @RELATION: PRODUCES -> semantics/semantic_map.json
# @RELATION: PRODUCES -> specs/project_map.md
# @RELATION: PRODUCES -> semantics/reports/semantic_report_*.md
# [SECTION: IMPORTS]
import os
import re
import json
import datetime
import fnmatch
from typing import Dict, List, Optional, Any, Pattern, Tuple, Set
# Mock belief_scope for the script itself to avoid import issues
class belief_scope:
# [DEF:__init__:Function]
# @PURPOSE: Mock init.
# @PRE: name is a string.
# @POST: Instance initialized.
def __init__(self, name):
self.name = name
# [/DEF:__init__:Function]
# [DEF:__enter__:Function]
# @PURPOSE: Mock enter.
# @PRE: Instance initialized.
# @POST: Returns self.
def __enter__(self):
return self
# [/DEF:__enter__:Function]
# [DEF:__exit__:Function]
# @PURPOSE: Mock exit.
# @PRE: Context entered.
# @POST: Context exited.
def __exit__(self, *args):
pass
# [/DEF:__exit__:Function]
# [/SECTION]
# [SECTION: CONFIGURATION]
PROJECT_ROOT = "."
IGNORE_DIRS = {
".git", "__pycache__", "node_modules", "venv", ".pytest_cache",
".kilocode", "backups", "logs", "semantics", "specs"
}
IGNORE_FILES = {
"package-lock.json", "poetry.lock", "yarn.lock"
}
OUTPUT_JSON = "semantics/semantic_map.json"
OUTPUT_COMPRESSED_MD = "specs/project_map.md"
REPORTS_DIR = "semantics/reports"
MANDATORY_TAGS = {
"Module": ["PURPOSE", "LAYER", "SEMANTICS"],
"Component": ["PURPOSE", "LAYER", "SEMANTICS"],
"Function": ["PURPOSE", "PRE", "POST"],
"Class": ["PURPOSE"]
}
# [/SECTION]
# [DEF:SemanticEntity:Class]
# @PURPOSE: Represents a code entity (Module, Function, Component) found during parsing.
# @INVARIANT: start_line is always set; end_line is set upon closure.
class SemanticEntity:
# [DEF:__init__:Function]
# @PURPOSE: Initializes a new SemanticEntity instance.
# @PRE: name, type_, start_line, file_path are provided.
# @POST: Instance is initialized with default values.
def __init__(self, name: str, type_: str, start_line: int, file_path: str):
with belief_scope("__init__"):
self.name = name
self.type = type_
self.start_line = start_line
self.end_line: Optional[int] = None
self.file_path = file_path
self.tags: Dict[str, str] = {}
self.relations: List[Dict[str, str]] = []
self.children: List['SemanticEntity'] = []
self.parent: Optional['SemanticEntity'] = None
self.compliance_issues: List[str] = []
# [/DEF:__init__:Function]
# [DEF:to_dict:Function]
# @PURPOSE: Serializes the entity to a dictionary for JSON output.
# @PRE: Entity is fully populated.
# @POST: Returns a dictionary representation.
# @RETURN: Dict representation of the entity.
def to_dict(self) -> Dict[str, Any]:
with belief_scope("to_dict"):
return {
"name": self.name,
"type": self.type,
"start_line": self.start_line,
"end_line": self.end_line,
"tags": self.tags,
"relations": self.relations,
"children": [c.to_dict() for c in self.children],
"compliance": {
"valid": len(self.compliance_issues) == 0,
"issues": self.compliance_issues
}
}
# [/DEF:to_dict:Function]
# [DEF:validate:Function]
# @PURPOSE: Checks for semantic compliance (closure, mandatory tags, belief state).
# @PRE: Entity structure is complete.
# @POST: Populates self.compliance_issues.
def validate(self):
with belief_scope("validate"):
# 1. Check Closure
if self.end_line is None:
self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
# 2. Check Mandatory Tags
required = MANDATORY_TAGS.get(self.type, [])
for req_tag in required:
found = False
for existing_tag in self.tags:
if existing_tag.upper() == req_tag:
found = True
break
if not found:
self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")
# 3. Check for Belief State Logging (Python only)
if self.type == "Function" and self.file_path.endswith(".py"):
if not getattr(self, 'has_belief_scope', False):
self.compliance_issues.append("Missing Belief State Logging: Function should use belief_scope context manager.")
# Recursive validation
for child in self.children:
child.validate()
# [/DEF:validate:Function]
# [DEF:get_score:Function]
# @PURPOSE: Calculates a compliance score (0.0 to 1.0).
# @PRE: validate() has been called.
# @POST: Returns a float score.
# @RETURN: Float score.
def get_score(self) -> float:
with belief_scope("get_score"):
if self.end_line is None:
return 0.0
score = 1.0
required = MANDATORY_TAGS.get(self.type, [])
if required:
found_count = 0
for req_tag in required:
for existing_tag in self.tags:
if existing_tag.upper() == req_tag:
found_count += 1
break
if found_count < len(required):
# Penalty proportional to missing tags
score -= 0.5 * (1 - (found_count / len(required)))
return max(0.0, score)
# [/DEF:get_score:Function]
# [/DEF:SemanticEntity:Class]
# [DEF:get_patterns:Function]
# @PURPOSE: Returns regex patterns for a specific language.
# @PRE: lang is either 'python' or 'svelte_js'.
# @POST: Returns a dictionary of compiled regex patterns.
# @PARAM: lang (str) - 'python' or 'svelte_js'
# @RETURN: Dict containing compiled regex patterns.
def get_patterns(lang: str) -> Dict[str, Pattern]:
with belief_scope("get_patterns"):
if lang == "python":
return {
"anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
"relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
"func_def": re.compile(r"^\s*(async\s+)?def\s+(?P<name>\w+)"),
"belief_scope": re.compile(r"with\s+(\w+\.)?belief_scope\("),
}
else:
return {
"html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
"html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
"js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
"jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
"relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
"func_def": re.compile(r"^\s*(export\s+)?(async\s+)?function\s+(?P<name>\w+)"),
}
# [/DEF:get_patterns:Function]
# [DEF:parse_file:Function]
# @PURPOSE: Parses a single file to extract semantic entities.
# @PRE: full_path, rel_path, lang are valid strings.
# @POST: Returns extracted entities and list of issues.
# @PARAM: full_path - Absolute path to file.
# @PARAM: rel_path - Relative path from project root.
# @PARAM: lang - Language identifier.
# @RETURN: Tuple[List[SemanticEntity], List[str]] - Entities found and global issues.
def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticEntity], List[str]]:
with belief_scope("parse_file"):
issues: List[str] = []
try:
with open(full_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
except Exception as e:
return [], [f"Could not read file {rel_path}: {e}"]
stack: List[SemanticEntity] = []
file_entities: List[SemanticEntity] = []
patterns = get_patterns(lang)
for i, line in enumerate(lines):
lineno = i + 1
line = line.strip()
# 1. Check for Anchor Start
match_start = None
if lang == "python":
match_start = patterns["anchor_start"].search(line)
else:
match_start = patterns["html_anchor_start"].search(line) or patterns["js_anchor_start"].search(line)
if match_start:
name = match_start.group("name")
type_ = match_start.group("type")
entity = SemanticEntity(name, type_, lineno, rel_path)
if stack:
parent = stack[-1]
parent.children.append(entity)
entity.parent = parent
else:
file_entities.append(entity)
stack.append(entity)
continue
# 2. Check for Anchor End
match_end = None
if lang == "python":
match_end = patterns["anchor_end"].search(line)
else:
match_end = patterns["html_anchor_end"].search(line) or patterns["js_anchor_end"].search(line)
if match_end:
name = match_end.group("name")
type_ = match_end.group("type")
if not stack:
issues.append(f"{rel_path}:{lineno} Found closing anchor [/DEF:{name}:{type_}] without opening anchor.")
continue
top = stack[-1]
if top.name == name and top.type == type_:
top.end_line = lineno
stack.pop()
else:
issues.append(f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}:{top.type}], found [/DEF:{name}:{type_}].")
continue
# 3. Check for Naked Functions (Missing Contracts)
if "func_def" in patterns:
match_func = patterns["func_def"].search(line)
if match_func:
func_name = match_func.group("name")
is_covered = False
if stack:
current = stack[-1]
# Check if we are inside a Function anchor that matches the name
if current.type == "Function" and current.name == func_name:
is_covered = True
if not is_covered:
issues.append(f"{rel_path}:{lineno} Function '{func_name}' implementation found without matching [DEF:{func_name}:Function] contract.")
# 4. Check for Tags/Relations
if stack:
current = stack[-1]
match_rel = patterns["relation"].search(line)
if match_rel:
current.relations.append({
"type": match_rel.group("type"),
"target": match_rel.group("target")
})
continue
match_tag = None
if lang == "python":
match_tag = patterns["tag"].search(line)
elif lang == "svelte_js":
match_tag = patterns["html_tag"].search(line)
if not match_tag and ("/*" in line or "*" in line or "//" in line):
match_tag = patterns["jsdoc_tag"].search(line)
if match_tag:
tag_name = match_tag.group("tag").upper()
tag_value = match_tag.group("value").strip()
current.tags[tag_name] = tag_value
# Check for belief scope in implementation
if lang == "python" and "belief_scope" in patterns:
if patterns["belief_scope"].search(line):
current.has_belief_scope = True
# End of file check
if stack:
for unclosed in stack:
unclosed.compliance_issues.append(f"Unclosed Anchor at end of file (started line {unclosed.start_line})")
if unclosed.parent is None and unclosed not in file_entities:
file_entities.append(unclosed)
return file_entities, issues
# [/DEF:parse_file:Function]
# [DEF:SemanticMapGenerator:Class]
# @PURPOSE: Orchestrates the mapping process.
class SemanticMapGenerator:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the generator with a root directory.
# @PRE: root_dir is a valid path string.
# @POST: Generator instance is ready.
def __init__(self, root_dir: str):
with belief_scope("__init__"):
self.root_dir = root_dir
self.entities: List[SemanticEntity] = []
self.file_scores: Dict[str, float] = {}
self.global_issues: List[str] = []
self.ignored_patterns = self._load_gitignore()
# [/DEF:__init__:Function]
# [DEF:_load_gitignore:Function]
# @PURPOSE: Loads patterns from .gitignore file.
# @PRE: .gitignore exists in root_dir.
# @POST: Returns set of ignore patterns.
# @RETURN: Set of patterns to ignore.
def _load_gitignore(self) -> Set[str]:
with belief_scope("_load_gitignore"):
patterns = set()
ignore_file = os.path.join(self.root_dir, ".gitignore")
if os.path.exists(ignore_file):
with open(ignore_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
patterns.add(line)
return patterns
# [/DEF:_load_gitignore:Function]
# [DEF:_is_ignored:Function]
# @PURPOSE: Checks if a path should be ignored based on .gitignore or hardcoded defaults.
# @PRE: rel_path is a valid relative path string.
# @POST: Returns True if the path should be ignored.
# @PARAM: rel_path (str) - Path relative to root.
# @RETURN: bool - True if ignored.
def _is_ignored(self, rel_path: str) -> bool:
with belief_scope("_is_ignored"):
# Normalize path for matching
rel_path = rel_path.replace(os.sep, '/')
# Check hardcoded defaults
parts = rel_path.split('/')
for part in parts:
if part in IGNORE_DIRS:
return True
if os.path.basename(rel_path) in IGNORE_FILES:
return True
# Check gitignore patterns
for pattern in self.ignored_patterns:
# Handle directory patterns like 'node_modules/'
if pattern.endswith('/'):
dir_pattern = pattern.rstrip('/')
if rel_path == dir_pattern or rel_path.startswith(pattern):
return True
# Check for patterns in frontend/ or backend/
if rel_path.startswith("frontend/") and fnmatch.fnmatch(rel_path[9:], pattern):
return True
if rel_path.startswith("backend/") and fnmatch.fnmatch(rel_path[8:], pattern):
return True
# Use fnmatch for glob patterns
if fnmatch.fnmatch(rel_path, pattern) or \
fnmatch.fnmatch(os.path.basename(rel_path), pattern) or \
any(fnmatch.fnmatch(part, pattern) for part in parts):
return True
return False
# [/DEF:_is_ignored:Function]
# [DEF:run:Function]
# @PURPOSE: Main execution flow.
# @PRE: Generator is initialized.
# @POST: Semantic map and reports are generated.
# @RELATION: CALLS -> _walk_and_parse
# @RELATION: CALLS -> _generate_artifacts
def run(self):
with belief_scope("run"):
print(f"Starting Semantic Map Generation in {self.root_dir}...")
self._walk_and_parse()
self._generate_artifacts()
print("Done.")
# [/DEF:run:Function]
# [DEF:_walk_and_parse:Function]
# @PURPOSE: Recursively walks directories and triggers parsing.
# @PRE: root_dir exists.
# @POST: All files are scanned and entities extracted.
def _walk_and_parse(self):
with belief_scope("_walk_and_parse"):
for root, dirs, files in os.walk(self.root_dir):
# Optimization: don't enter ignored directories
dirs[:] = [d for d in dirs if not self._is_ignored(os.path.relpath(os.path.join(root, d), self.root_dir) + "/")]
for file in files:
file_path = os.path.join(root, file)
rel_path = os.path.relpath(file_path, self.root_dir)
if self._is_ignored(rel_path):
continue
lang = None
if file.endswith(".py"):
lang = "python"
elif file.endswith((".svelte", ".js", ".ts")):
lang = "svelte_js"
if lang:
entities, issues = parse_file(file_path, rel_path, lang)
self.global_issues.extend(issues)
if entities:
self._process_file_results(rel_path, entities)
# [/DEF:_walk_and_parse:Function]
# [DEF:_process_file_results:Function]
# @PURPOSE: Validates entities and calculates file scores.
# @PRE: Entities have been parsed from the file.
# @POST: File score is calculated and issues collected.
def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
with belief_scope("_process_file_results"):
total_score = 0
count = 0
# [DEF:validate_recursive:Function]
# @PURPOSE: Recursively validates a list of entities.
# @PRE: ent_list is a list of SemanticEntity objects.
# @POST: All entities and their children are validated.
def validate_recursive(ent_list):
with belief_scope("validate_recursive"):
nonlocal total_score, count
for e in ent_list:
e.validate()
total_score += e.get_score()
count += 1
validate_recursive(e.children)
# [/DEF:validate_recursive:Function]
validate_recursive(entities)
self.entities.extend(entities)
self.file_scores[rel_path] = (total_score / count) if count > 0 else 0.0
# [/DEF:_process_file_results:Function]
# [DEF:_generate_artifacts:Function]
# @PURPOSE: Writes output files.
# @PRE: Parsing and validation are complete.
# @POST: JSON and Markdown artifacts are written to disk.
def _generate_artifacts(self):
with belief_scope("_generate_artifacts"):
# 1. Full JSON Map
full_map = {
"project_root": self.root_dir,
"generated_at": datetime.datetime.now().isoformat(),
"modules": [e.to_dict() for e in self.entities]
}
os.makedirs(os.path.dirname(OUTPUT_JSON), exist_ok=True)
with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
json.dump(full_map, f, indent=2)
print(f"Generated {OUTPUT_JSON}")
# 2. Compliance Report
self._generate_report()
# 3. Compressed Map (Markdown)
self._generate_compressed_map()
# [/DEF:_generate_artifacts:Function]
# [DEF:_generate_report:Function]
# @PURPOSE: Generates the Markdown compliance report.
# @PRE: File scores and issues are available.
# @POST: Markdown report is created in reports directory.
def _generate_report(self):
with belief_scope("_generate_report"):
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
os.makedirs(REPORTS_DIR, exist_ok=True)
total_files = len(self.file_scores)
avg_score = sum(self.file_scores.values()) / total_files if total_files > 0 else 0
with open(report_path, 'w', encoding='utf-8') as f:
f.write(f"# Semantic Compliance Report\n\n")
f.write(f"**Generated At:** {datetime.datetime.now().isoformat()}\n")
f.write(f"**Global Compliance Score:** {avg_score:.1%}\n")
f.write(f"**Scanned Files:** {total_files}\n\n")
if self.global_issues:
f.write("## Critical Parsing Errors\n")
for issue in self.global_issues:
f.write(f"- 🔴 {issue}\n")
f.write("\n")
f.write("## File Compliance Status\n")
f.write("| File | Score | Issues |\n")
f.write("|------|-------|--------|\n")
sorted_files = sorted(self.file_scores.items(), key=lambda x: x[1])
for file_path, score in sorted_files:
issues = []
self._collect_issues(self.entities, file_path, issues)
status_icon = "🟢" if score == 1.0 else "🟡" if score > 0.5 else "🔴"
issue_text = "<br>".join(issues) if issues else "OK"
f.write(f"| {file_path} | {status_icon} {score:.0%} | {issue_text} |\n")
print(f"Generated {report_path}")
# [/DEF:_generate_report:Function]
# [DEF:_collect_issues:Function]
# @PURPOSE: Helper to collect issues for a specific file from the entity tree.
# @PRE: entities list and file_path are valid.
# @POST: issues list is populated with compliance issues.
def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[str]):
with belief_scope("_collect_issues"):
for e in entities:
if e.file_path == file_path:
issues.extend([f"[{e.name}] {i}" for i in e.compliance_issues])
self._collect_issues(e.children, file_path, issues)
# [/DEF:_collect_issues:Function]
# [DEF:_generate_compressed_map:Function]
# @PURPOSE: Generates the token-optimized project map.
# @PRE: Entities have been processed.
# @POST: Markdown project map is written.
def _generate_compressed_map(self):
with belief_scope("_generate_compressed_map"):
os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)
with open(OUTPUT_COMPRESSED_MD, 'w', encoding='utf-8') as f:
f.write("# Project Semantic Map\n\n")
f.write("> Compressed view for AI Context. Generated automatically.\n\n")
for entity in self.entities:
self._write_entity_md(f, entity, level=0)
print(f"Generated {OUTPUT_COMPRESSED_MD}")
# [/DEF:_generate_compressed_map:Function]
# [DEF:_write_entity_md:Function]
# @PURPOSE: Recursive helper to write entity tree to Markdown.
# @PRE: f is an open file handle, entity is valid.
# @POST: Entity details are written to the file.
def _write_entity_md(self, f, entity: SemanticEntity, level: int):
with belief_scope("_write_entity_md"):
indent = " " * level
icon = "📦"
if entity.type == "Component": icon = "🧩"
elif entity.type == "Function": icon = "ƒ"
elif entity.type == "Class": icon = ""
f.write(f"{indent}- {icon} **{entity.name}** (`{entity.type}`)\n")
purpose = entity.tags.get("PURPOSE") or entity.tags.get("purpose")
layer = entity.tags.get("LAYER") or entity.tags.get("layer")
if purpose:
f.write(f"{indent} - 📝 {purpose}\n")
if layer:
f.write(f"{indent} - 🏗️ Layer: {layer}\n")
for rel in entity.relations:
if rel['type'] in ['DEPENDS_ON', 'CALLS', 'INHERITS_FROM']:
f.write(f"{indent} - 🔗 {rel['type']} -> `{rel['target']}`\n")
if level < 2:
for child in entity.children:
self._write_entity_md(f, child, level + 1)
# [/DEF:_write_entity_md:Function]
# [/DEF:SemanticMapGenerator:Class]
if __name__ == "__main__":
generator = SemanticMapGenerator(PROJECT_ROOT)
generator.run()
# [/DEF:generate_semantic_map:Module]