# [DEF:generate_semantic_map:Module]
#
# @SEMANTICS: semantic_analysis, parser, map_generator, compliance_checker
# @PURPOSE:   Scans the codebase to generate a Semantic Map and Compliance Report based on the System Standard.
# @LAYER:     DevOps/Tooling
# @RELATION:  READS -> FileSystem
# @RELATION:  PRODUCES -> semantics/semantic_map.json
# @RELATION:  PRODUCES -> specs/project_map.md
# @RELATION:  PRODUCES -> semantics/reports/semantic_report_*.md

# [SECTION: IMPORTS]
import os
import re
import json
import datetime
import fnmatch
from typing import Dict, List, Optional, Any, Pattern, Tuple, Set

# Mock belief_scope for the script itself to avoid import issues
class belief_scope:
    def __init__(self, name): self.name = name
    def __enter__(self): return self
    def __exit__(self, *args): pass
# [/SECTION]

# [SECTION: CONFIGURATION]
PROJECT_ROOT = "."
IGNORE_DIRS = {
    ".git", "__pycache__", "node_modules", "venv", ".pytest_cache", 
    ".kilocode", "backups", "logs", "semantics", "specs"
}
IGNORE_FILES = {
    "package-lock.json", "poetry.lock", "yarn.lock"
}
OUTPUT_JSON = "semantics/semantic_map.json"
OUTPUT_COMPRESSED_MD = "specs/project_map.md"
REPORTS_DIR = "semantics/reports"

MANDATORY_TAGS = {
    "Module": ["PURPOSE", "LAYER", "SEMANTICS"],
    "Component": ["PURPOSE", "LAYER", "SEMANTICS"],
    "Function": ["PURPOSE", "PRE", "POST"],
    "Class": ["PURPOSE"]
}
# [/SECTION]

# [DEF:SemanticEntity:Class]
# @PURPOSE:   Represents a code entity (Module, Function, Component) found during parsing.
# @INVARIANT: start_line is always set; end_line is set upon closure.
class SemanticEntity:
    # [DEF:__init__:Function]
    # @PURPOSE: Initializes a new SemanticEntity instance.
    # @PRE:     name, type_, start_line, file_path are provided.
    # @POST:    Instance is initialized with default values.
    def __init__(self, name: str, type_: str, start_line: int, file_path: str):
        with belief_scope("__init__"):
            self.name = name
            self.type = type_
            self.start_line = start_line
            self.end_line: Optional[int] = None
            self.file_path = file_path
            self.tags: Dict[str, str] = {}
            self.relations: List[Dict[str, str]] = []
            self.children: List['SemanticEntity'] = []
            self.parent: Optional['SemanticEntity'] = None
            self.compliance_issues: List[str] = []
    # [/DEF:__init__:Function]

    # [DEF:to_dict:Function]
    # @PURPOSE: Serializes the entity to a dictionary for JSON output.
    # @PRE:     Entity is fully populated.
    # @POST:    Returns a dictionary representation.
    # @RETURN:  Dict representation of the entity.
    def to_dict(self) -> Dict[str, Any]:
        with belief_scope("to_dict"):
            return {
                "name": self.name,
                "type": self.type,
                "start_line": self.start_line,
                "end_line": self.end_line,
                "tags": self.tags,
                "relations": self.relations,
                "children": [c.to_dict() for c in self.children],
                "compliance": {
                    "valid": len(self.compliance_issues) == 0,
                    "issues": self.compliance_issues
                }
            }
    # [/DEF:to_dict:Function]

    # [DEF:validate:Function]
    # @PURPOSE: Checks for semantic compliance (closure, mandatory tags, belief state).
    # @PRE:     Entity structure is complete.
    # @POST:    Populates self.compliance_issues.
    def validate(self):
        with belief_scope("validate"):
            # 1. Check Closure
            if self.end_line is None:
                self.compliance_issues.append(f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}")
            
            # 2. Check Mandatory Tags
            required = MANDATORY_TAGS.get(self.type, [])
            for req_tag in required:
                found = False
                for existing_tag in self.tags:
                    if existing_tag.upper() == req_tag:
                        found = True
                        break
                if not found:
                    self.compliance_issues.append(f"Missing Mandatory Tag: @{req_tag}")

            # 3. Check for Belief State Logging (Python only)
            if self.type == "Function" and self.file_path.endswith(".py"):
                if not getattr(self, 'has_belief_scope', False):
                    self.compliance_issues.append("Missing Belief State Logging: Function should use belief_scope context manager.")

            # Recursive validation
            for child in self.children:
                child.validate()
    # [/DEF:validate:Function]

    # [DEF:get_score:Function]
    # @PURPOSE: Calculates a compliance score (0.0 to 1.0).
    # @PRE:     validate() has been called.
    # @POST:    Returns a float score.
    # @RETURN:  Float score.
    def get_score(self) -> float:
        with belief_scope("get_score"):
            if self.end_line is None:
                return 0.0
            
            score = 1.0
            required = MANDATORY_TAGS.get(self.type, [])
            if required:
                found_count = 0
                for req_tag in required:
                     for existing_tag in self.tags:
                        if existing_tag.upper() == req_tag:
                            found_count += 1
                            break
                if found_count < len(required):
                    # Penalty proportional to missing tags
                    score -= 0.5 * (1 - (found_count / len(required)))
            
            return max(0.0, score)
    # [/DEF:get_score:Function]
# [/DEF:SemanticEntity:Class]


# [DEF:get_patterns:Function]
# @PURPOSE: Returns regex patterns for a specific language.
# @PRE:     lang is either 'python' or 'svelte_js'.
# @POST:    Returns a dictionary of compiled regex patterns.
# @PARAM:   lang (str) - 'python' or 'svelte_js'
# @RETURN:  Dict containing compiled regex patterns.
def get_patterns(lang: str) -> Dict[str, Pattern]:
    with belief_scope("get_patterns"):
        if lang == "python":
            return {
                "anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
                "anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
                "tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
                "relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
                "func_def": re.compile(r"^\s*(async\s+)?def\s+(?P<name>\w+)"),
                "belief_scope": re.compile(r"with\s+belief_scope\("),
            }
        else:
            return {
                "html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
                "html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
                "js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
                "js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
                "html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
                "jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
                "relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
                "func_def": re.compile(r"^\s*(export\s+)?(async\s+)?function\s+(?P<name>\w+)"),
            }
# [/DEF:get_patterns:Function]


# [DEF:parse_file:Function]
# @PURPOSE: Parses a single file to extract semantic entities.
# @PARAM:   full_path - Absolute path to file.
# @PARAM:   rel_path - Relative path from project root.
# @PARAM:   lang - Language identifier.
# @RETURN:  Tuple[List[SemanticEntity], List[str]] - Entities found and global issues.
def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticEntity], List[str]]:
    issues: List[str] = []
    try:
        with open(full_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        return [], [f"Could not read file {rel_path}: {e}"]

    stack: List[SemanticEntity] = []
    file_entities: List[SemanticEntity] = []
    patterns = get_patterns(lang)

    for i, line in enumerate(lines):
        lineno = i + 1
        line = line.strip()
        
        # 1. Check for Anchor Start
        match_start = None
        if lang == "python":
            match_start = patterns["anchor_start"].search(line)
        else:
            match_start = patterns["html_anchor_start"].search(line) or patterns["js_anchor_start"].search(line)

        if match_start:
            name = match_start.group("name")
            type_ = match_start.group("type")
            entity = SemanticEntity(name, type_, lineno, rel_path)
            
            if stack:
                parent = stack[-1]
                parent.children.append(entity)
                entity.parent = parent
            else:
                file_entities.append(entity)
            
            stack.append(entity)
            continue

        # 2. Check for Anchor End
        match_end = None
        if lang == "python":
            match_end = patterns["anchor_end"].search(line)
        else:
            match_end = patterns["html_anchor_end"].search(line) or patterns["js_anchor_end"].search(line)

        if match_end:
            name = match_end.group("name")
            type_ = match_end.group("type")
            
            if not stack:
                issues.append(f"{rel_path}:{lineno} Found closing anchor [/DEF:{name}:{type_}] without opening anchor.")
                continue
            
            top = stack[-1]
            if top.name == name and top.type == type_:
                top.end_line = lineno
                stack.pop()
            else:
                issues.append(f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}:{top.type}], found [/DEF:{name}:{type_}].")
            continue

        # 3. Check for Naked Functions (Missing Contracts)
        if "func_def" in patterns:
            match_func = patterns["func_def"].search(line)
            if match_func:
                func_name = match_func.group("name")
                is_covered = False
                if stack:
                    current = stack[-1]
                    # Check if we are inside a Function anchor that matches the name
                    if current.type == "Function" and current.name == func_name:
                        is_covered = True
                
                if not is_covered:
                    issues.append(f"{rel_path}:{lineno} Function '{func_name}' implementation found without matching [DEF:{func_name}:Function] contract.")

        # 4. Check for Tags/Relations
        if stack:
            current = stack[-1]
            
            match_rel = patterns["relation"].search(line)
            if match_rel:
                current.relations.append({
                    "type": match_rel.group("type"),
                    "target": match_rel.group("target")
                })
                continue

            match_tag = None
            if lang == "python":
                match_tag = patterns["tag"].search(line)
            elif lang == "svelte_js":
                match_tag = patterns["html_tag"].search(line)
                if not match_tag and ("/*" in line or "*" in line or "//" in line):
                     match_tag = patterns["jsdoc_tag"].search(line)

            if match_tag:
                tag_name = match_tag.group("tag").upper()
                tag_value = match_tag.group("value").strip()
                current.tags[tag_name] = tag_value

            # Check for belief scope in implementation
            if lang == "python" and "belief_scope" in patterns:
                if patterns["belief_scope"].search(line):
                    current.has_belief_scope = True

    # End of file check
    if stack:
        for unclosed in stack:
            unclosed.compliance_issues.append(f"Unclosed Anchor at end of file (started line {unclosed.start_line})")
            if unclosed.parent is None and unclosed not in file_entities:
                 file_entities.append(unclosed)

    return file_entities, issues
# [/DEF:parse_file:Function]


# [DEF:SemanticMapGenerator:Class]
# @PURPOSE: Orchestrates the mapping process.
class SemanticMapGenerator:
    # [DEF:__init__:Function]
    # @PURPOSE: Initializes the generator with a root directory.
    # @PRE:     root_dir is a valid path string.
    # @POST:    Generator instance is ready.
    def __init__(self, root_dir: str):
        self.root_dir = root_dir
        self.entities: List[SemanticEntity] = []
        self.file_scores: Dict[str, float] = {}
        self.global_issues: List[str] = []
        self.ignored_patterns = self._load_gitignore()
    # [/DEF:__init__:Function]

    # [DEF:_load_gitignore:Function]
    # @PURPOSE: Loads patterns from .gitignore file.
    # @RETURN: Set of patterns to ignore.
    def _load_gitignore(self) -> Set[str]:
        patterns = set()
        ignore_file = os.path.join(self.root_dir, ".gitignore")
        if os.path.exists(ignore_file):
            with open(ignore_file, 'r') as f:
                for line in f:
                    line = line.strip()
                    if line and not line.startswith("#"):
                        patterns.add(line)
        return patterns
    # [/DEF:_load_gitignore:Function]

    # [DEF:_is_ignored:Function]
    # @PURPOSE: Checks if a path should be ignored based on .gitignore or hardcoded defaults.
    # @PRE:     rel_path is a valid relative path string.
    # @POST:    Returns True if the path should be ignored.
    # @PARAM:   rel_path (str) - Path relative to root.
    # @RETURN:  bool - True if ignored.
    def _is_ignored(self, rel_path: str) -> bool:
        # Normalize path for matching
        rel_path = rel_path.replace(os.sep, '/')
        
        # Check hardcoded defaults
        parts = rel_path.split('/')
        for part in parts:
            if part in IGNORE_DIRS:
                return True
        
        if os.path.basename(rel_path) in IGNORE_FILES:
            return True

        # Check gitignore patterns
        for pattern in self.ignored_patterns:
            # Handle directory patterns like 'node_modules/'
            if pattern.endswith('/'):
                dir_pattern = pattern.rstrip('/')
                if rel_path == dir_pattern or rel_path.startswith(pattern):
                    return True
            
            # Check for patterns in frontend/ or backend/
            if rel_path.startswith("frontend/") and fnmatch.fnmatch(rel_path[9:], pattern):
                return True
            if rel_path.startswith("backend/") and fnmatch.fnmatch(rel_path[8:], pattern):
                return True

            # Use fnmatch for glob patterns
            if fnmatch.fnmatch(rel_path, pattern) or \
               fnmatch.fnmatch(os.path.basename(rel_path), pattern) or \
               any(fnmatch.fnmatch(part, pattern) for part in parts):
                return True
        
        return False
    # [/DEF:_is_ignored:Function]

    # [DEF:run:Function]
    # @PURPOSE: Main execution flow.
    # @PRE:     Generator is initialized.
    # @POST:    Semantic map and reports are generated.
    # @RELATION: CALLS -> _walk_and_parse
    # @RELATION: CALLS -> _generate_artifacts
    def run(self):
        print(f"Starting Semantic Map Generation in {self.root_dir}...")
        self._walk_and_parse()
        self._generate_artifacts()
        print("Done.")
    # [/DEF:run:Function]

    # [DEF:_walk_and_parse:Function]
    # @PURPOSE: Recursively walks directories and triggers parsing.
    # @PRE:     root_dir exists.
    # @POST:    All files are scanned and entities extracted.
    def _walk_and_parse(self):
        for root, dirs, files in os.walk(self.root_dir):
            # Optimization: don't enter ignored directories
            dirs[:] = [d for d in dirs if not self._is_ignored(os.path.relpath(os.path.join(root, d), self.root_dir) + "/")]
            
            for file in files:
                file_path = os.path.join(root, file)
                rel_path = os.path.relpath(file_path, self.root_dir)
                
                if self._is_ignored(rel_path):
                    continue
                
                lang = None
                if file.endswith(".py"):
                    lang = "python"
                elif file.endswith((".svelte", ".js", ".ts")):
                    lang = "svelte_js"
                
                if lang:
                    entities, issues = parse_file(file_path, rel_path, lang)
                    self.global_issues.extend(issues)
                    
                    if entities:
                        self._process_file_results(rel_path, entities)
    # [/DEF:_walk_and_parse:Function]

    # [DEF:_process_file_results:Function]
    # @PURPOSE: Validates entities and calculates file scores.
    # @PRE:     Entities have been parsed from the file.
    # @POST:    File score is calculated and issues collected.
    def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
        total_score = 0
        count = 0
        
        # [DEF:validate_recursive:Function]
        # @PURPOSE: Recursively validates a list of entities.
        # @PRE:     ent_list is a list of SemanticEntity objects.
        # @POST:    All entities and their children are validated.
        def validate_recursive(ent_list):
            nonlocal total_score, count
            for e in ent_list:
                e.validate()
                total_score += e.get_score()
                count += 1
                validate_recursive(e.children)
        # [/DEF:validate_recursive:Function]

        validate_recursive(entities)
        
        self.entities.extend(entities)
        self.file_scores[rel_path] = (total_score / count) if count > 0 else 0.0
    # [/DEF:_process_file_results:Function]

    # [DEF:_generate_artifacts:Function]
    # @PURPOSE: Writes output files.
    # @PRE:     Parsing and validation are complete.
    # @POST:    JSON and Markdown artifacts are written to disk.
    def _generate_artifacts(self):
        # 1. Full JSON Map
        full_map = {
            "project_root": self.root_dir,
            "generated_at": datetime.datetime.now().isoformat(),
            "modules": [e.to_dict() for e in self.entities]
        }
        
        os.makedirs(os.path.dirname(OUTPUT_JSON), exist_ok=True)
        with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
            json.dump(full_map, f, indent=2)
        print(f"Generated {OUTPUT_JSON}")

        # 2. Compliance Report
        self._generate_report()

        # 3. Compressed Map (Markdown)
        self._generate_compressed_map()
    # [/DEF:_generate_artifacts:Function]

    # [DEF:_generate_report:Function]
    # @PURPOSE: Generates the Markdown compliance report.
    # @PRE:     File scores and issues are available.
    # @POST:    Markdown report is created in reports directory.
    def _generate_report(self):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
        os.makedirs(REPORTS_DIR, exist_ok=True)

        total_files = len(self.file_scores)
        avg_score = sum(self.file_scores.values()) / total_files if total_files > 0 else 0
        
        with open(report_path, 'w', encoding='utf-8') as f:
            f.write(f"# Semantic Compliance Report\n\n")
            f.write(f"**Generated At:** {datetime.datetime.now().isoformat()}\n")
            f.write(f"**Global Compliance Score:** {avg_score:.1%}\n")
            f.write(f"**Scanned Files:** {total_files}\n\n")

            if self.global_issues:
                f.write("## Critical Parsing Errors\n")
                for issue in self.global_issues:
                    f.write(f"- 🔴 {issue}\n")
                f.write("\n")

            f.write("## File Compliance Status\n")
            f.write("| File | Score | Issues |\n")
            f.write("|------|-------|--------|\n")
            
            sorted_files = sorted(self.file_scores.items(), key=lambda x: x[1])
            
            for file_path, score in sorted_files:
                issues = []
                self._collect_issues(self.entities, file_path, issues)
                
                status_icon = "🟢" if score == 1.0 else "🟡" if score > 0.5 else "🔴"
                issue_text = "<br>".join(issues) if issues else "OK"
                f.write(f"| {file_path} | {status_icon} {score:.0%} | {issue_text} |\n")

        print(f"Generated {report_path}")
    # [/DEF:_generate_report:Function]

    # [DEF:_collect_issues:Function]
    # @PURPOSE: Helper to collect issues for a specific file from the entity tree.
    # @PRE:     entities list and file_path are valid.
    # @POST:    issues list is populated with compliance issues.
    def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[str]):
        for e in entities:
            if e.file_path == file_path:
                issues.extend([f"[{e.name}] {i}" for i in e.compliance_issues])
            self._collect_issues(e.children, file_path, issues)
    # [/DEF:_collect_issues:Function]

    # [DEF:_generate_compressed_map:Function]
    # @PURPOSE: Generates the token-optimized project map.
    # @PRE:     Entities have been processed.
    # @POST:    Markdown project map is written.
    def _generate_compressed_map(self):
        os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)
        
        with open(OUTPUT_COMPRESSED_MD, 'w', encoding='utf-8') as f:
            f.write("# Project Semantic Map\n\n")
            f.write("> Compressed view for AI Context. Generated automatically.\n\n")
            
            for entity in self.entities:
                self._write_entity_md(f, entity, level=0)

        print(f"Generated {OUTPUT_COMPRESSED_MD}")
    # [/DEF:_generate_compressed_map:Function]

    # [DEF:_write_entity_md:Function]
    # @PURPOSE: Recursive helper to write entity tree to Markdown.
    # @PRE:     f is an open file handle, entity is valid.
    # @POST:    Entity details are written to the file.
    def _write_entity_md(self, f, entity: SemanticEntity, level: int):
        indent = "  " * level
        
        icon = "📦"
        if entity.type == "Component": icon = "🧩"
        elif entity.type == "Function": icon = "ƒ"
        elif entity.type == "Class": icon = "ℂ"
        
        f.write(f"{indent}- {icon} **{entity.name}** (`{entity.type}`)\n")
        
        purpose = entity.tags.get("PURPOSE") or entity.tags.get("purpose")
        layer = entity.tags.get("LAYER") or entity.tags.get("layer")
        
        if purpose:
            f.write(f"{indent}  - 📝 {purpose}\n")
        if layer:
            f.write(f"{indent}  - 🏗️ Layer: {layer}\n")
            
        for rel in entity.relations:
            if rel['type'] in ['DEPENDS_ON', 'CALLS', 'INHERITS_FROM']:
                 f.write(f"{indent}  - 🔗 {rel['type']} -> `{rel['target']}`\n")

        if level < 2:
            for child in entity.children:
                self._write_entity_md(f, child, level + 1)
    # [/DEF:_write_entity_md:Function]

# [/DEF:SemanticMapGenerator:Class]

if __name__ == "__main__":
    generator = SemanticMapGenerator(PROJECT_ROOT)
    generator.run()

# [/DEF:generate_semantic_map:Module]