print_hej/generate_gantt_charts.py

#!/usr/bin/env python3
"""Generate Gantt diagrams for each language showing time breakdown."""

import os
from pathlib import Path

# Language categories
COMPILED = ["Assembly", "C", "C++", "Rust", "Go", "Nim", "Odin", "Fortran", "Swift", "Crystal", "Zig", "D", "Haskell", "Objective-C"]
JIT = ["Java", "CSharp", "Kotlin", "Julia", "Dart", "Scala"]
INTERPRETED = ["Python", "Perl", "PHP", "Ruby", "JavaScript", "TypeScript", "Lua", "Bash", "Brainfuck", "Elixir", "Erlang", "R"]

# Map directory names to display names
NAME_MAP = {
    "CSharp": "C#",
    "C++": "C++",
}

def get_display_name(lang):
    """Get display name for language."""
    return NAME_MAP.get(lang, lang)

def get_lang_type(lang):
    """Get language type."""
    if lang in COMPILED:
        return "Compiled"
    elif lang in JIT:
        return "JIT"
    else:
        return "Interpreted"

def read_timeline(lang):
    """Read timeline data for a language."""
    timeline_dir = Path(f"timelines/{lang}")
    if not timeline_dir.exists():
        return None

    # Use run_1.tsv
    tsv_file = timeline_dir / "run_1.tsv"
    if not tsv_file.exists():
        return None

    data = []
    with open(tsv_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 3:
                try:
                    elapsed = int(parts[0])
                    memory = int(parts[1])
                    cpu = float(parts[2])
                    data.append((elapsed, memory, cpu))
                except ValueError:
                    continue

    return data

def estimate_time_breakdown(total_time, lang_type):
    """Estimate time breakdown based on language type and total time."""

    if lang_type == "Compiled":
        # Compiled languages: minimal startup, fast calculation
        startup = max(1, total_time // 10)  # ~10% for startup
        calculation = total_time - startup - 1
        io = 1
    elif lang_type == "JIT":
        # JIT languages: significant startup, moderate calculation
        startup = max(20, total_time // 3)  # ~33% for startup
        calculation = total_time - startup - 2
        io = 2
    else:
        # Interpreted languages: moderate startup, slow calculation
        startup = max(10, total_time // 4)  # ~25% for startup
        calculation = total_time - startup - 3
        io = 3

    return {
        'startup': startup,
        'calculation': calculation,
        'io': io,
        'total': total_time
    }

def generate_gantt_chart(lang, breakdown):
    """Generate Gantt chart for a language."""
    display_name = get_display_name(lang)
    lang_type = get_lang_type(lang)

    # Calculate percentages
    total = breakdown['total']
    startup_pct = (breakdown['startup'] / total) * 100
    calc_pct = (breakdown['calculation'] / total) * 100
    io_pct = (breakdown['io'] / total) * 100

    # Generate Mermaid Gantt chart
    gantt = f"""
```mermaid
gantt
    title {display_name} - Execution Time Breakdown
    dateFormat X
    axisFormat %ms

    section Startup
    Runtime Init    :0, {breakdown['startup']}

    section Calculation
    π Calculation    :{breakdown['startup']}, {breakdown['startup'] + breakdown['calculation']}

    section I/O
    Output          :{breakdown['startup'] + breakdown['calculation']}, {total}
```

**Time Breakdown:**
- **Startup**: {breakdown['startup']} ms ({startup_pct:.1f}%)
- **Calculation**: {breakdown['calculation']} ms ({calc_pct:.1f}%)
- **I/O**: {breakdown['io']} ms ({io_pct:.1f}%)
- **Total**: {total} ms
"""

    return gantt

def generate_all_gantt_charts():
    """Generate Gantt charts for all languages."""

    # Get test results from timeline files
    all_langs = COMPILED + JIT + INTERPRETED

    gantt_charts = {}

    for lang in all_langs:
        timeline_data = read_timeline(lang)
        if timeline_data:
            # Calculate average time
            elapsed_times = [t[0] for t in timeline_data]
            avg_time = sum(elapsed_times) / len(elapsed_times) if elapsed_times else 0

            # Estimate time breakdown
            lang_type = get_lang_type(lang)
            breakdown = estimate_time_breakdown(int(avg_time), lang_type)

            # Generate Gantt chart
            gantt_chart = generate_gantt_chart(lang, breakdown)
            gantt_charts[lang] = gantt_chart

    return gantt_charts

def update_reports_with_gantt():
    """Update all reports with Gantt charts."""

    # Generate Gantt charts
    gantt_charts = generate_all_gantt_charts()

    # Update each report
    reports = [
        ("reports/1_decimals.md", 1),
        ("reports/2_decimals.md", 2),
        ("reports/5_decimals.md", 5),
        ("reports/10_decimals.md", 10),
        ("reports/100_decimals.md", 100),
        ("reports/1000_decimals.md", 1000),
        ("reports/2000_decimals.md", 2000),
    ]

    for filename, decimals in reports:
        if not os.path.exists(filename):
            print(f"File not found: {filename}")
            continue

        with open(filename, 'r') as f:
            content = f.read()

        # Check if Gantt section already exists
        if "## Execution Time Breakdown (Gantt Charts)" in content:
            print(f"Gantt section already exists in {filename}")
            continue

        # Add Gantt section before "Key Findings"
        gantt_section = "\n## Execution Time Breakdown (Gantt Charts)\n\n"
        gantt_section += "The following Gantt charts show the execution time breakdown for each language:\n\n"

        # Add Gantt charts for each language
        for lang in COMPILED[:10]:  # First 10 compiled languages
            if lang in gantt_charts:
                gantt_section += gantt_charts[lang]

        for lang in JIT[:6]:  # First 6 JIT languages
            if lang in gantt_charts:
                gantt_section += gantt_charts[lang]

        for lang in INTERPRETED[:12]:  # First 12 interpreted languages
            if lang in gantt_charts:
                gantt_section += gantt_charts[lang]

        # Insert before "Key Findings"
        if "## Key Findings" in content:
            parts = content.split("## Key Findings", 1)
            updated_content = parts[0] + gantt_section + "## Key Findings" + parts[1]
        else:
            updated_content = content + gantt_section

        with open(filename, 'w') as f:
            f.write(updated_content)

        print(f"Updated {filename} with Gantt charts")

if __name__ == "__main__":
    update_reports_with_gantt()
    print("\nAll reports updated with Gantt charts!")