Files
ss-tools/generate_semantic_map.py

439 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# [DEF:generate_semantic_map:Module]
#
# @SEMANTICS: semantic_analysis, parser, map_generator, compliance_checker
# @PURPOSE: Scans the codebase to generate a Semantic Map and Compliance Report based on the System Standard.
# @LAYER: DevOps/Tooling
# @RELATION: READS -> FileSystem
# @RELATION: PRODUCES -> semantics/semantic_map.json
# @RELATION: PRODUCES -> specs/project_map.md
# @RELATION: PRODUCES -> semantics/reports/semantic_report_*.md
# [SECTION: IMPORTS]
import os
import re
import json
import datetime
from typing import Dict, List, Optional, Any, Pattern, Tuple
# [/SECTION]
# [SECTION: CONFIGURATION]
PROJECT_ROOT = "."
IGNORE_DIRS = {
".git", "__pycache__", "node_modules", "venv", ".pytest_cache",
".kilocode", "backups", "logs", "semantics", "specs"
}
IGNORE_FILES = {
"package-lock.json", "poetry.lock", "yarn.lock"
}
OUTPUT_JSON = "semantics/semantic_map.json"
OUTPUT_COMPRESSED_MD = "specs/project_map.md"
REPORTS_DIR = "semantics/reports"
MANDATORY_TAGS = {
"Module": ["PURPOSE", "LAYER"],
"Component": ["PURPOSE", "LAYER"],
"Function": ["PURPOSE"],
"Class": ["PURPOSE"]
}
# [/SECTION]
# [DEF:SemanticEntity:Class]
# @PURPOSE: Represents a code entity (Module, Function, Component) found during parsing.
# @INVARIANT: start_line is always set; end_line is set upon closure.
class SemanticEntity:
def __init__(self, name: str, type_: str, start_line: int, file_path: str):
self.name = name
self.type = type_
self.start_line = start_line
self.end_line: Optional[int] = None
self.file_path = file_path
self.tags: Dict[str, str] = {}
self.relations: List[Dict[str, str]] = []
self.children: List['SemanticEntity'] = []
self.parent: Optional['SemanticEntity'] = None
self.compliance_issues: List[str] = []
# [DEF:to_dict:Function]
# @PURPOSE: Serializes the entity to a dictionary for JSON output.
# @RETURN: Dict representation of the entity.
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"type": self.type,
"start_line": self.start_line,
"end_line": self.end_line,
"tags": self.tags,
"relations": self.relations,
"children": [c.to_dict() for c in self.children],
"compliance": {
"valid": len(self.compliance_issues) == 0,
"issues": self.compliance_issues
}
}
# [/DEF:to_dict:Function]
# [DEF:validate:Function]
# @PURPOSE: Checks for semantic compliance (closure, mandatory tags).
# @POST: Populates self.compliance_issues.
def validate(self):
# 1. Check Closure
if self.end_line is None:
self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
# 2. Check Mandatory Tags
required = MANDATORY_TAGS.get(self.type, [])
for req_tag in required:
found = False
for existing_tag in self.tags:
if existing_tag.upper() == req_tag:
found = True
break
if not found:
self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")
# Recursive validation
for child in self.children:
child.validate()
# [/DEF:validate:Function]
# [DEF:get_score:Function]
# @PURPOSE: Calculates a compliance score (0.0 to 1.0).
# @RETURN: Float score.
def get_score(self) -> float:
if self.end_line is None:
return 0.0
score = 1.0
required = MANDATORY_TAGS.get(self.type, [])
if required:
found_count = 0
for req_tag in required:
for existing_tag in self.tags:
if existing_tag.upper() == req_tag:
found_count += 1
break
if found_count < len(required):
# Penalty proportional to missing tags
score -= 0.5 * (1 - (found_count / len(required)))
return max(0.0, score)
# [/DEF:get_score:Function]
# [/DEF:SemanticEntity:Class]
# [DEF:get_patterns:Function]
# @PURPOSE: Returns regex patterns for a specific language.
# @PARAM: lang (str) - 'python' or 'svelte_js'
# @RETURN: Dict containing compiled regex patterns.
def get_patterns(lang: str) -> Dict[str, Pattern]:
if lang == "python":
return {
"anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
"relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
}
else:
return {
"html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
"html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
"js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
"html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
"jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
"relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
}
# [/DEF:get_patterns:Function]
# [DEF:parse_file:Function]
# @PURPOSE: Parses a single file to extract semantic entities.
# @PARAM: full_path - Absolute path to file.
# @PARAM: rel_path - Relative path from project root.
# @PARAM: lang - Language identifier.
# @RETURN: Tuple[List[SemanticEntity], List[str]] - Entities found and global issues.
def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticEntity], List[str]]:
issues: List[str] = []
try:
with open(full_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
except Exception as e:
return [], [f"Could not read file {rel_path}: {e}"]
stack: List[SemanticEntity] = []
file_entities: List[SemanticEntity] = []
patterns = get_patterns(lang)
for i, line in enumerate(lines):
lineno = i + 1
line = line.strip()
# 1. Check for Anchor Start
match_start = None
if lang == "python":
match_start = patterns["anchor_start"].search(line)
else:
match_start = patterns["html_anchor_start"].search(line) or patterns["js_anchor_start"].search(line)
if match_start:
name = match_start.group("name")
type_ = match_start.group("type")
entity = SemanticEntity(name, type_, lineno, rel_path)
if stack:
parent = stack[-1]
parent.children.append(entity)
entity.parent = parent
else:
file_entities.append(entity)
stack.append(entity)
continue
# 2. Check for Anchor End
match_end = None
if lang == "python":
match_end = patterns["anchor_end"].search(line)
else:
match_end = patterns["html_anchor_end"].search(line) or patterns["js_anchor_end"].search(line)
if match_end:
name = match_end.group("name")
type_ = match_end.group("type")
if not stack:
issues.append(f"{rel_path}:{lineno} Found closing anchor [/DEF:{name}:{type_}] without opening anchor.")
continue
top = stack[-1]
if top.name == name and top.type == type_:
top.end_line = lineno
stack.pop()
else:
issues.append(f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}:{top.type}], found [/DEF:{name}:{type_}].")
continue
# 3. Check for Tags/Relations
if stack:
current = stack[-1]
match_rel = patterns["relation"].search(line)
if match_rel:
current.relations.append({
"type": match_rel.group("type"),
"target": match_rel.group("target")
})
continue
match_tag = None
if lang == "python":
match_tag = patterns["tag"].search(line)
elif lang == "svelte_js":
match_tag = patterns["html_tag"].search(line)
if not match_tag and ("/*" in line or "*" in line or "//" in line):
match_tag = patterns["jsdoc_tag"].search(line)
if match_tag:
tag_name = match_tag.group("tag").upper()
tag_value = match_tag.group("value").strip()
current.tags[tag_name] = tag_value
# End of file check
if stack:
for unclosed in stack:
unclosed.compliance_issues.append(f"Unclosed Anchor at end of file (started line {unclosed.start_line})")
if unclosed.parent is None and unclosed not in file_entities:
file_entities.append(unclosed)
return file_entities, issues
# [/DEF:parse_file:Function]
# [DEF:SemanticMapGenerator:Class]
# @PURPOSE: Orchestrates the mapping process.
class SemanticMapGenerator:
def __init__(self, root_dir: str):
self.root_dir = root_dir
self.entities: List[SemanticEntity] = []
self.file_scores: Dict[str, float] = {}
self.global_issues: List[str] = []
# [DEF:run:Function]
# @PURPOSE: Main execution flow.
# @RELATION: CALLS -> _walk_and_parse
# @RELATION: CALLS -> _generate_artifacts
def run(self):
print(f"Starting Semantic Map Generation in {self.root_dir}...")
self._walk_and_parse()
self._generate_artifacts()
print("Done.")
# [/DEF:run:Function]
# [DEF:_walk_and_parse:Function]
# @PURPOSE: Recursively walks directories and triggers parsing.
def _walk_and_parse(self):
for root, dirs, files in os.walk(self.root_dir):
dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
for file in files:
if file in IGNORE_FILES:
continue
file_path = os.path.join(root, file)
rel_path = os.path.relpath(file_path, self.root_dir)
lang = None
if file.endswith(".py"):
lang = "python"
elif file.endswith((".svelte", ".js", ".ts")):
lang = "svelte_js"
if lang:
entities, issues = parse_file(file_path, rel_path, lang)
self.global_issues.extend(issues)
if entities:
self._process_file_results(rel_path, entities)
# [/DEF:_walk_and_parse:Function]
# [DEF:_process_file_results:Function]
# @PURPOSE: Validates entities and calculates file scores.
def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
total_score = 0
count = 0
def validate_recursive(ent_list):
nonlocal total_score, count
for e in ent_list:
e.validate()
total_score += e.get_score()
count += 1
validate_recursive(e.children)
validate_recursive(entities)
self.entities.extend(entities)
self.file_scores[rel_path] = (total_score / count) if count > 0 else 0.0
# [/DEF:_process_file_results:Function]
# [DEF:_generate_artifacts:Function]
# @PURPOSE: Writes output files.
def _generate_artifacts(self):
# 1. Full JSON Map
full_map = {
"project_root": self.root_dir,
"generated_at": datetime.datetime.now().isoformat(),
"modules": [e.to_dict() for e in self.entities]
}
os.makedirs(os.path.dirname(OUTPUT_JSON), exist_ok=True)
with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
json.dump(full_map, f, indent=2)
print(f"Generated {OUTPUT_JSON}")
# 2. Compliance Report
self._generate_report()
# 3. Compressed Map (Markdown)
self._generate_compressed_map()
# [/DEF:_generate_artifacts:Function]
# [DEF:_generate_report:Function]
# @PURPOSE: Generates the Markdown compliance report.
def _generate_report(self):
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
os.makedirs(REPORTS_DIR, exist_ok=True)
total_files = len(self.file_scores)
avg_score = sum(self.file_scores.values()) / total_files if total_files > 0 else 0
with open(report_path, 'w', encoding='utf-8') as f:
f.write(f"# Semantic Compliance Report\n\n")
f.write(f"**Generated At:** {datetime.datetime.now().isoformat()}\n")
f.write(f"**Global Compliance Score:** {avg_score:.1%}\n")
f.write(f"**Scanned Files:** {total_files}\n\n")
if self.global_issues:
f.write("## Critical Parsing Errors\n")
for issue in self.global_issues:
f.write(f"- 🔴 {issue}\n")
f.write("\n")
f.write("## File Compliance Status\n")
f.write("| File | Score | Issues |\n")
f.write("|------|-------|--------|\n")
sorted_files = sorted(self.file_scores.items(), key=lambda x: x[1])
for file_path, score in sorted_files:
issues = []
self._collect_issues(self.entities, file_path, issues)
status_icon = "🟢" if score == 1.0 else "🟡" if score > 0.5 else "🔴"
issue_text = "<br>".join(issues) if issues else "OK"
f.write(f"| {file_path} | {status_icon} {score:.0%} | {issue_text} |\n")
print(f"Generated {report_path}")
# [/DEF:_generate_report:Function]
# [DEF:_collect_issues:Function]
# @PURPOSE: Helper to collect issues for a specific file from the entity tree.
def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[str]):
for e in entities:
if e.file_path == file_path:
issues.extend([f"[{e.name}] {i}" for i in e.compliance_issues])
self._collect_issues(e.children, file_path, issues)
# [/DEF:_collect_issues:Function]
# [DEF:_generate_compressed_map:Function]
# @PURPOSE: Generates the token-optimized project map.
def _generate_compressed_map(self):
os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)
with open(OUTPUT_COMPRESSED_MD, 'w', encoding='utf-8') as f:
f.write("# Project Semantic Map\n\n")
f.write("> Compressed view for AI Context. Generated automatically.\n\n")
for entity in self.entities:
self._write_entity_md(f, entity, level=0)
print(f"Generated {OUTPUT_COMPRESSED_MD}")
# [/DEF:_generate_compressed_map:Function]
# [DEF:_write_entity_md:Function]
# @PURPOSE: Recursive helper to write entity tree to Markdown.
def _write_entity_md(self, f, entity: SemanticEntity, level: int):
indent = " " * level
icon = "📦"
if entity.type == "Component": icon = "🧩"
elif entity.type == "Function": icon = "ƒ"
elif entity.type == "Class": icon = ""
f.write(f"{indent}- {icon} **{entity.name}** (`{entity.type}`)\n")
purpose = entity.tags.get("PURPOSE") or entity.tags.get("purpose")
layer = entity.tags.get("LAYER") or entity.tags.get("layer")
if purpose:
f.write(f"{indent} - 📝 {purpose}\n")
if layer:
f.write(f"{indent} - 🏗️ Layer: {layer}\n")
for rel in entity.relations:
if rel['type'] in ['DEPENDS_ON', 'CALLS', 'INHERITS_FROM']:
f.write(f"{indent} - 🔗 {rel['type']} -> `{rel['target']}`\n")
if level < 2:
for child in entity.children:
self._write_entity_md(f, child, level + 1)
# [/DEF:_write_entity_md:Function]
# [/DEF:SemanticMapGenerator:Class]
if __name__ == "__main__":
generator = SemanticMapGenerator(PROJECT_ROOT)
generator.run()
# [/DEF:generate_semantic_map:Module]