project map script | semantic parcer
This commit is contained in:
439
generate_semantic_map.py
Normal file
439
generate_semantic_map.py
Normal file
@@ -0,0 +1,439 @@
|
||||
# [DEF:generate_semantic_map:Module]
|
||||
#
|
||||
# @SEMANTICS: semantic_analysis, parser, map_generator, compliance_checker
|
||||
# @PURPOSE: Scans the codebase to generate a Semantic Map and Compliance Report based on the System Standard.
|
||||
# @LAYER: DevOps/Tooling
|
||||
# @RELATION: READS -> FileSystem
|
||||
# @RELATION: PRODUCES -> semantics/semantic_map.json
|
||||
# @RELATION: PRODUCES -> specs/project_map.md
|
||||
# @RELATION: PRODUCES -> semantics/reports/semantic_report_*.md
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import datetime
|
||||
from typing import Dict, List, Optional, Any, Pattern, Tuple
|
||||
# [/SECTION]
|
||||
|
||||
# [SECTION: CONFIGURATION]
|
||||
PROJECT_ROOT = "."
|
||||
IGNORE_DIRS = {
|
||||
".git", "__pycache__", "node_modules", "venv", ".pytest_cache",
|
||||
".kilocode", "backups", "logs", "semantics", "specs"
|
||||
}
|
||||
IGNORE_FILES = {
|
||||
"package-lock.json", "poetry.lock", "yarn.lock"
|
||||
}
|
||||
OUTPUT_JSON = "semantics/semantic_map.json"
|
||||
OUTPUT_COMPRESSED_MD = "specs/project_map.md"
|
||||
REPORTS_DIR = "semantics/reports"
|
||||
|
||||
MANDATORY_TAGS = {
|
||||
"Module": ["PURPOSE", "LAYER"],
|
||||
"Component": ["PURPOSE", "LAYER"],
|
||||
"Function": ["PURPOSE"],
|
||||
"Class": ["PURPOSE"]
|
||||
}
|
||||
# [/SECTION]
|
||||
|
||||
# [DEF:SemanticEntity:Class]
|
||||
# @PURPOSE: Represents a code entity (Module, Function, Component) found during parsing.
|
||||
# @INVARIANT: start_line is always set; end_line is set upon closure.
|
||||
class SemanticEntity:
|
||||
def __init__(self, name: str, type_: str, start_line: int, file_path: str):
|
||||
self.name = name
|
||||
self.type = type_
|
||||
self.start_line = start_line
|
||||
self.end_line: Optional[int] = None
|
||||
self.file_path = file_path
|
||||
self.tags: Dict[str, str] = {}
|
||||
self.relations: List[Dict[str, str]] = []
|
||||
self.children: List['SemanticEntity'] = []
|
||||
self.parent: Optional['SemanticEntity'] = None
|
||||
self.compliance_issues: List[str] = []
|
||||
|
||||
# [DEF:to_dict:Function]
|
||||
# @PURPOSE: Serializes the entity to a dictionary for JSON output.
|
||||
# @RETURN: Dict representation of the entity.
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"type": self.type,
|
||||
"start_line": self.start_line,
|
||||
"end_line": self.end_line,
|
||||
"tags": self.tags,
|
||||
"relations": self.relations,
|
||||
"children": [c.to_dict() for c in self.children],
|
||||
"compliance": {
|
||||
"valid": len(self.compliance_issues) == 0,
|
||||
"issues": self.compliance_issues
|
||||
}
|
||||
}
|
||||
# [/DEF:to_dict:Function]
|
||||
|
||||
# [DEF:validate:Function]
|
||||
# @PURPOSE: Checks for semantic compliance (closure, mandatory tags).
|
||||
# @POST: Populates self.compliance_issues.
|
||||
def validate(self):
|
||||
# 1. Check Closure
|
||||
if self.end_line is None:
|
||||
self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
|
||||
|
||||
# 2. Check Mandatory Tags
|
||||
required = MANDATORY_TAGS.get(self.type, [])
|
||||
for req_tag in required:
|
||||
found = False
|
||||
for existing_tag in self.tags:
|
||||
if existing_tag.upper() == req_tag:
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")
|
||||
|
||||
# Recursive validation
|
||||
for child in self.children:
|
||||
child.validate()
|
||||
# [/DEF:validate:Function]
|
||||
|
||||
# [DEF:get_score:Function]
|
||||
# @PURPOSE: Calculates a compliance score (0.0 to 1.0).
|
||||
# @RETURN: Float score.
|
||||
def get_score(self) -> float:
|
||||
if self.end_line is None:
|
||||
return 0.0
|
||||
|
||||
score = 1.0
|
||||
required = MANDATORY_TAGS.get(self.type, [])
|
||||
if required:
|
||||
found_count = 0
|
||||
for req_tag in required:
|
||||
for existing_tag in self.tags:
|
||||
if existing_tag.upper() == req_tag:
|
||||
found_count += 1
|
||||
break
|
||||
if found_count < len(required):
|
||||
# Penalty proportional to missing tags
|
||||
score -= 0.5 * (1 - (found_count / len(required)))
|
||||
|
||||
return max(0.0, score)
|
||||
# [/DEF:get_score:Function]
|
||||
# [/DEF:SemanticEntity:Class]
|
||||
|
||||
|
||||
# [DEF:get_patterns:Function]
|
||||
# @PURPOSE: Returns regex patterns for a specific language.
|
||||
# @PARAM: lang (str) - 'python' or 'svelte_js'
|
||||
# @RETURN: Dict containing compiled regex patterns.
|
||||
def get_patterns(lang: str) -> Dict[str, Pattern]:
|
||||
if lang == "python":
|
||||
return {
|
||||
"anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
|
||||
"relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
|
||||
"html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
|
||||
"js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
|
||||
"html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
|
||||
"jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
|
||||
"relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
|
||||
}
|
||||
# [/DEF:get_patterns:Function]
|
||||
|
||||
|
||||
# [DEF:parse_file:Function]
|
||||
# @PURPOSE: Parses a single file to extract semantic entities.
|
||||
# @PARAM: full_path - Absolute path to file.
|
||||
# @PARAM: rel_path - Relative path from project root.
|
||||
# @PARAM: lang - Language identifier.
|
||||
# @RETURN: Tuple[List[SemanticEntity], List[str]] - Entities found and global issues.
|
||||
def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticEntity], List[str]]:
|
||||
issues: List[str] = []
|
||||
try:
|
||||
with open(full_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
except Exception as e:
|
||||
return [], [f"Could not read file {rel_path}: {e}"]
|
||||
|
||||
stack: List[SemanticEntity] = []
|
||||
file_entities: List[SemanticEntity] = []
|
||||
patterns = get_patterns(lang)
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
lineno = i + 1
|
||||
line = line.strip()
|
||||
|
||||
# 1. Check for Anchor Start
|
||||
match_start = None
|
||||
if lang == "python":
|
||||
match_start = patterns["anchor_start"].search(line)
|
||||
else:
|
||||
match_start = patterns["html_anchor_start"].search(line) or patterns["js_anchor_start"].search(line)
|
||||
|
||||
if match_start:
|
||||
name = match_start.group("name")
|
||||
type_ = match_start.group("type")
|
||||
entity = SemanticEntity(name, type_, lineno, rel_path)
|
||||
|
||||
if stack:
|
||||
parent = stack[-1]
|
||||
parent.children.append(entity)
|
||||
entity.parent = parent
|
||||
else:
|
||||
file_entities.append(entity)
|
||||
|
||||
stack.append(entity)
|
||||
continue
|
||||
|
||||
# 2. Check for Anchor End
|
||||
match_end = None
|
||||
if lang == "python":
|
||||
match_end = patterns["anchor_end"].search(line)
|
||||
else:
|
||||
match_end = patterns["html_anchor_end"].search(line) or patterns["js_anchor_end"].search(line)
|
||||
|
||||
if match_end:
|
||||
name = match_end.group("name")
|
||||
type_ = match_end.group("type")
|
||||
|
||||
if not stack:
|
||||
issues.append(f"{rel_path}:{lineno} Found closing anchor [/DEF:{name}:{type_}] without opening anchor.")
|
||||
continue
|
||||
|
||||
top = stack[-1]
|
||||
if top.name == name and top.type == type_:
|
||||
top.end_line = lineno
|
||||
stack.pop()
|
||||
else:
|
||||
issues.append(f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}:{top.type}], found [/DEF:{name}:{type_}].")
|
||||
continue
|
||||
|
||||
# 3. Check for Tags/Relations
|
||||
if stack:
|
||||
current = stack[-1]
|
||||
|
||||
match_rel = patterns["relation"].search(line)
|
||||
if match_rel:
|
||||
current.relations.append({
|
||||
"type": match_rel.group("type"),
|
||||
"target": match_rel.group("target")
|
||||
})
|
||||
continue
|
||||
|
||||
match_tag = None
|
||||
if lang == "python":
|
||||
match_tag = patterns["tag"].search(line)
|
||||
elif lang == "svelte_js":
|
||||
match_tag = patterns["html_tag"].search(line)
|
||||
if not match_tag and ("/*" in line or "*" in line or "//" in line):
|
||||
match_tag = patterns["jsdoc_tag"].search(line)
|
||||
|
||||
if match_tag:
|
||||
tag_name = match_tag.group("tag").upper()
|
||||
tag_value = match_tag.group("value").strip()
|
||||
current.tags[tag_name] = tag_value
|
||||
|
||||
# End of file check
|
||||
if stack:
|
||||
for unclosed in stack:
|
||||
unclosed.compliance_issues.append(f"Unclosed Anchor at end of file (started line {unclosed.start_line})")
|
||||
if unclosed.parent is None and unclosed not in file_entities:
|
||||
file_entities.append(unclosed)
|
||||
|
||||
return file_entities, issues
|
||||
# [/DEF:parse_file:Function]
|
||||
|
||||
|
||||
# [DEF:SemanticMapGenerator:Class]
|
||||
# @PURPOSE: Orchestrates the mapping process.
|
||||
class SemanticMapGenerator:
|
||||
def __init__(self, root_dir: str):
|
||||
self.root_dir = root_dir
|
||||
self.entities: List[SemanticEntity] = []
|
||||
self.file_scores: Dict[str, float] = {}
|
||||
self.global_issues: List[str] = []
|
||||
|
||||
# [DEF:run:Function]
|
||||
# @PURPOSE: Main execution flow.
|
||||
# @RELATION: CALLS -> _walk_and_parse
|
||||
# @RELATION: CALLS -> _generate_artifacts
|
||||
def run(self):
|
||||
print(f"Starting Semantic Map Generation in {self.root_dir}...")
|
||||
self._walk_and_parse()
|
||||
self._generate_artifacts()
|
||||
print("Done.")
|
||||
# [/DEF:run:Function]
|
||||
|
||||
# [DEF:_walk_and_parse:Function]
|
||||
# @PURPOSE: Recursively walks directories and triggers parsing.
|
||||
def _walk_and_parse(self):
|
||||
for root, dirs, files in os.walk(self.root_dir):
|
||||
dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
|
||||
|
||||
for file in files:
|
||||
if file in IGNORE_FILES:
|
||||
continue
|
||||
|
||||
file_path = os.path.join(root, file)
|
||||
rel_path = os.path.relpath(file_path, self.root_dir)
|
||||
|
||||
lang = None
|
||||
if file.endswith(".py"):
|
||||
lang = "python"
|
||||
elif file.endswith((".svelte", ".js", ".ts")):
|
||||
lang = "svelte_js"
|
||||
|
||||
if lang:
|
||||
entities, issues = parse_file(file_path, rel_path, lang)
|
||||
self.global_issues.extend(issues)
|
||||
|
||||
if entities:
|
||||
self._process_file_results(rel_path, entities)
|
||||
# [/DEF:_walk_and_parse:Function]
|
||||
|
||||
# [DEF:_process_file_results:Function]
|
||||
# @PURPOSE: Validates entities and calculates file scores.
|
||||
def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
|
||||
total_score = 0
|
||||
count = 0
|
||||
|
||||
def validate_recursive(ent_list):
|
||||
nonlocal total_score, count
|
||||
for e in ent_list:
|
||||
e.validate()
|
||||
total_score += e.get_score()
|
||||
count += 1
|
||||
validate_recursive(e.children)
|
||||
|
||||
validate_recursive(entities)
|
||||
|
||||
self.entities.extend(entities)
|
||||
self.file_scores[rel_path] = (total_score / count) if count > 0 else 0.0
|
||||
# [/DEF:_process_file_results:Function]
|
||||
|
||||
# [DEF:_generate_artifacts:Function]
|
||||
# @PURPOSE: Writes output files.
|
||||
def _generate_artifacts(self):
|
||||
# 1. Full JSON Map
|
||||
full_map = {
|
||||
"project_root": self.root_dir,
|
||||
"generated_at": datetime.datetime.now().isoformat(),
|
||||
"modules": [e.to_dict() for e in self.entities]
|
||||
}
|
||||
|
||||
os.makedirs(os.path.dirname(OUTPUT_JSON), exist_ok=True)
|
||||
with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
|
||||
json.dump(full_map, f, indent=2)
|
||||
print(f"Generated {OUTPUT_JSON}")
|
||||
|
||||
# 2. Compliance Report
|
||||
self._generate_report()
|
||||
|
||||
# 3. Compressed Map (Markdown)
|
||||
self._generate_compressed_map()
|
||||
# [/DEF:_generate_artifacts:Function]
|
||||
|
||||
# [DEF:_generate_report:Function]
|
||||
# @PURPOSE: Generates the Markdown compliance report.
|
||||
def _generate_report(self):
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
|
||||
os.makedirs(REPORTS_DIR, exist_ok=True)
|
||||
|
||||
total_files = len(self.file_scores)
|
||||
avg_score = sum(self.file_scores.values()) / total_files if total_files > 0 else 0
|
||||
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
f.write(f"# Semantic Compliance Report\n\n")
|
||||
f.write(f"**Generated At:** {datetime.datetime.now().isoformat()}\n")
|
||||
f.write(f"**Global Compliance Score:** {avg_score:.1%}\n")
|
||||
f.write(f"**Scanned Files:** {total_files}\n\n")
|
||||
|
||||
if self.global_issues:
|
||||
f.write("## Critical Parsing Errors\n")
|
||||
for issue in self.global_issues:
|
||||
f.write(f"- 🔴 {issue}\n")
|
||||
f.write("\n")
|
||||
|
||||
f.write("## File Compliance Status\n")
|
||||
f.write("| File | Score | Issues |\n")
|
||||
f.write("|------|-------|--------|\n")
|
||||
|
||||
sorted_files = sorted(self.file_scores.items(), key=lambda x: x[1])
|
||||
|
||||
for file_path, score in sorted_files:
|
||||
issues = []
|
||||
self._collect_issues(self.entities, file_path, issues)
|
||||
|
||||
status_icon = "🟢" if score == 1.0 else "🟡" if score > 0.5 else "🔴"
|
||||
issue_text = "<br>".join(issues) if issues else "OK"
|
||||
f.write(f"| {file_path} | {status_icon} {score:.0%} | {issue_text} |\n")
|
||||
|
||||
print(f"Generated {report_path}")
|
||||
# [/DEF:_generate_report:Function]
|
||||
|
||||
# [DEF:_collect_issues:Function]
|
||||
# @PURPOSE: Helper to collect issues for a specific file from the entity tree.
|
||||
def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[str]):
|
||||
for e in entities:
|
||||
if e.file_path == file_path:
|
||||
issues.extend([f"[{e.name}] {i}" for i in e.compliance_issues])
|
||||
self._collect_issues(e.children, file_path, issues)
|
||||
# [/DEF:_collect_issues:Function]
|
||||
|
||||
# [DEF:_generate_compressed_map:Function]
|
||||
# @PURPOSE: Generates the token-optimized project map.
|
||||
def _generate_compressed_map(self):
|
||||
os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)
|
||||
|
||||
with open(OUTPUT_COMPRESSED_MD, 'w', encoding='utf-8') as f:
|
||||
f.write("# Project Semantic Map\n\n")
|
||||
f.write("> Compressed view for AI Context. Generated automatically.\n\n")
|
||||
|
||||
for entity in self.entities:
|
||||
self._write_entity_md(f, entity, level=0)
|
||||
|
||||
print(f"Generated {OUTPUT_COMPRESSED_MD}")
|
||||
# [/DEF:_generate_compressed_map:Function]
|
||||
|
||||
# [DEF:_write_entity_md:Function]
|
||||
# @PURPOSE: Recursive helper to write entity tree to Markdown.
|
||||
def _write_entity_md(self, f, entity: SemanticEntity, level: int):
|
||||
indent = " " * level
|
||||
|
||||
icon = "📦"
|
||||
if entity.type == "Component": icon = "🧩"
|
||||
elif entity.type == "Function": icon = "ƒ"
|
||||
elif entity.type == "Class": icon = "ℂ"
|
||||
|
||||
f.write(f"{indent}- {icon} **{entity.name}** (`{entity.type}`)\n")
|
||||
|
||||
purpose = entity.tags.get("PURPOSE") or entity.tags.get("purpose")
|
||||
layer = entity.tags.get("LAYER") or entity.tags.get("layer")
|
||||
|
||||
if purpose:
|
||||
f.write(f"{indent} - 📝 {purpose}\n")
|
||||
if layer:
|
||||
f.write(f"{indent} - 🏗️ Layer: {layer}\n")
|
||||
|
||||
for rel in entity.relations:
|
||||
if rel['type'] in ['DEPENDS_ON', 'CALLS', 'INHERITS_FROM']:
|
||||
f.write(f"{indent} - 🔗 {rel['type']} -> `{rel['target']}`\n")
|
||||
|
||||
if level < 2:
|
||||
for child in entity.children:
|
||||
self._write_entity_md(f, child, level + 1)
|
||||
# [/DEF:_write_entity_md:Function]
|
||||
|
||||
# [/DEF:SemanticMapGenerator:Class]
|
||||
|
||||
if __name__ == "__main__":
|
||||
generator = SemanticMapGenerator(PROJECT_ROOT)
|
||||
generator.run()
|
||||
|
||||
# [/DEF:generate_semantic_map:Module]
|
||||
Reference in New Issue
Block a user