Add comprehensive performance metrics and data collection

- Added instructions, cycles, and IPC metrics to all reports
- Created CSV data files for each language with detailed metrics
- Added timeline data (memory and CPU over time) for each run
- Updated all reports with new metrics
- Created analysis script to analyze collected data
- Generated reports for all decimal levels (1, 2, 5, 10, 100, 1000, 2000)

Key findings:
- D has highest IPC (4.00) - most efficient CPU usage
- Crystal is fastest (22ms) - faster than C and C++
- Assembly is most memory efficient (1.4MB)
- Rust and Fortran have IPC 3.11 - good optimization
This commit is contained in:
Ein Anderssono
2026-04-23 14:39:13 +02:00
parent 443172606b
commit a2e13a70a1
303 changed files with 2281 additions and 1810 deletions
+119 -67
View File
@@ -37,37 +37,45 @@ verify() {
fi
}
# Function to get memory usage using /usr/bin/time (more reliable for fast programs)
get_memory_with_time() {
# Function to get metrics from /usr/bin/time output
get_time_metrics() {
local output_file=$1
# Use /usr/bin/time -l (macOS) or -v (Linux) to get maximum resident set size
# This works even for very fast programs
if command -v /usr/bin/time >/dev/null 2>&1; then
# Try macOS format first (time -l)
if grep -q "maximum resident set size" "$output_file" 2>/dev/null; then
# macOS format: "1622016 maximum resident set size" (already in bytes)
grep "maximum resident set size" "$output_file" 2>/dev/null | \
awk '{print $1}' || echo "0"
# Try Linux format (time -v)
elif grep -q "Maximum resident set size" "$output_file" 2>/dev/null; then
# Linux format: "Maximum resident set size (kbytes): 1622" (in KB)
grep "Maximum resident set size" "$output_file" 2>/dev/null | \
awk '{print $NF}' | \
awk '{print $1 * 1024}' || echo "0"
else
echo "0"
fi
else
local metric=$2
if [ ! -f "$output_file" ]; then
echo "0"
return
fi
case "$metric" in
"real")
grep "real" "$output_file" 2>/dev/null | awk '{print $1}' || echo "0"
;;
"user")
grep "real" "$output_file" 2>/dev/null | awk '{print $3}' || echo "0"
;;
"sys")
grep "real" "$output_file" 2>/dev/null | awk '{print $5}' || echo "0"
;;
"memory")
grep "maximum resident set size" "$output_file" 2>/dev/null | awk '{print $1}' || echo "0"
;;
"instructions")
grep "instructions retired" "$output_file" 2>/dev/null | awk '{print $1}' || echo "0"
;;
"cycles")
grep "cycles elapsed" "$output_file" 2>/dev/null | awk '{print $1}' || echo "0"
;;
*)
echo "0"
;;
esac
}
# Function to profile memory and CPU during execution (fallback for slower programs)
# Function to profile memory and CPU during execution (optimized for macOS)
profile_resources() {
local pid=$1
local mem_output_file=$2
local cpu_output_file=$3
local timeline_file=$4
local csv_file=$2
local peak_mem=0
local peak_cpu=0
local current_mem
@@ -75,46 +83,47 @@ profile_resources() {
local start_time=$(date +%s%N)
local sample_count=0
# Clear output files
> "$mem_output_file"
> "$cpu_output_file"
> "$timeline_file"
# Create CSV header
echo "timestamp_ms,memory_bytes,cpu_percent" > "$csv_file"
# Sample resources every 1ms while process is running
# Sample resources as fast as possible using ps
# Use a single ps call with multiple iterations to reduce overhead
while kill -0 "$pid" 2>/dev/null; do
# Use ps to get RSS (resident set size) in KB, then convert to bytes
local kb=$(ps -o rss= -p "$pid" 2>/dev/null || echo "0")
# Ensure kb is a valid number
if ! [[ "$kb" =~ ^[0-9]+$ ]]; then
kb=0
# Use ps with -o pid,rss,%cpu for faster parsing
local ps_output=$(ps -o pid,rss,%cpu -p "$pid" 2>/dev/null | tail -n 1)
if [ -n "$ps_output" ]; then
# Parse ps output: "1234 5678 12.3" -> PID=1234, RSS=5678, CPU=12.3
local kb=$(echo "$ps_output" | awk '{print $2}')
local cpu_raw=$(echo "$ps_output" | awk '{print $3}' | awk -F'.' '{print $1}')
# Ensure kb is a valid number
if ! [[ "$kb" =~ ^[0-9]+$ ]]; then
kb=0
fi
current_mem=$((kb * 1024))
# Ensure cpu is a valid number
if ! [[ "$cpu_raw" =~ ^[0-9]+$ ]]; then
cpu_raw=0
fi
current_cpu=$cpu_raw
current_time=$(date +%s%N)
elapsed_ms=$(( (current_time - start_time) / 1000000 ))
if [ "$current_mem" -gt "$peak_mem" ]; then
peak_mem=$current_mem
fi
if [ "$current_cpu" -gt "$peak_cpu" ]; then
peak_cpu=$current_cpu
fi
# Store time-series data in CSV format
echo "$elapsed_ms,$current_mem,$current_cpu" >> "$csv_file"
sample_count=$((sample_count + 1))
fi
current_mem=$((kb * 1024))
# Use ps to get CPU percentage
local cpu_raw=$(ps -o %cpu= -p "$pid" 2>/dev/null | awk '{print int($1)}' || echo "0")
# Ensure cpu is a valid number
if ! [[ "$cpu_raw" =~ ^[0-9]+$ ]]; then
cpu_raw=0
fi
current_cpu=$cpu_raw
current_time=$(date +%s%N)
elapsed_ms=$(( (current_time - start_time) / 1000000 ))
if [ "$current_mem" -gt "$peak_mem" ]; then
peak_mem=$current_mem
fi
if [ "$current_cpu" -gt "$peak_cpu" ]; then
peak_cpu=$current_cpu
fi
# Store time-series data
echo "$elapsed_ms $current_mem" >> "$mem_output_file"
echo "$elapsed_ms $current_cpu" >> "$cpu_output_file"
echo "$elapsed_ms $current_mem $current_cpu" >> "$timeline_file"
sample_count=$((sample_count + 1))
sleep 0.001 2>/dev/null || sleep 0.01
done
echo "$peak_mem $peak_cpu $sample_count"
@@ -136,18 +145,26 @@ run_program() {
local total_time=0
local total_memory=0
local total_cpu=0
local total_real_time=0
local total_user_time=0
local total_sys_time=0
local total_instructions=0
local total_cycles=0
local success_count=0
local result
local peak_memory=0
local peak_cpu=0
local timeline_dir="timelines/$name"
local data_dir="data/$name"
# Create timeline directory
# Create directories
mkdir -p "$timeline_dir"
mkdir -p "$data_dir"
# Run 4 times, discard first run (warmup)
for i in 1 2 3 4; do
local timeline_file="$timeline_dir/run_$i.tsv"
local csv_file="$data_dir/run_$i.csv"
local time_output_file="/tmp/time_output_$$_$i.txt"
local start=$(date +%s%N)
@@ -171,7 +188,7 @@ run_program() {
fi
# Profile resources in background (for CPU and timeline)
local resources=$(profile_resources "$pid" "/dev/null" "/dev/null" "$timeline_file")
local resources=$(profile_resources "$pid" "$csv_file")
local peak_mem_ps=$(echo "$resources" | awk '{print $1}')
local peak_cpu_val=$(echo "$resources" | awk '{print $2}')
@@ -182,8 +199,13 @@ run_program() {
local end=$(date +%s%N)
local elapsed=$(( (end - start) / 1000000 ))
# Get memory from /usr/bin/time -v output (more reliable for fast programs)
local peak_mem_time=$(get_memory_with_time "$time_output_file")
# Get all metrics from /usr/bin/time output
local real_time=$(get_time_metrics "$time_output_file" "real")
local user_time=$(get_time_metrics "$time_output_file" "user")
local sys_time=$(get_time_metrics "$time_output_file" "sys")
local peak_mem_time=$(get_time_metrics "$time_output_file" "memory")
local instructions=$(get_time_metrics "$time_output_file" "instructions")
local cycles=$(get_time_metrics "$time_output_file" "cycles")
# Use the larger of the two memory measurements (time -v is more reliable)
local peak_mem=$peak_mem_time
@@ -203,6 +225,11 @@ run_program() {
total_time=$((total_time + elapsed))
total_memory=$((total_memory + peak_mem))
total_cpu=$((total_cpu + peak_cpu_val))
total_real_time=$(echo "$total_real_time + $real_time" | bc)
total_user_time=$(echo "$total_user_time + $user_time" | bc)
total_sys_time=$(echo "$total_sys_time + $sys_time" | bc)
total_instructions=$((total_instructions + instructions))
total_cycles=$((total_cycles + cycles))
if [ "$peak_mem" -gt "$peak_memory" ]; then
peak_memory=$peak_mem
@@ -226,13 +253,38 @@ run_program() {
local avg_time=$((total_time / 3))
local avg_memory=$((total_memory / 3))
local avg_cpu=$((total_cpu / 3))
local avg_real_time=$(echo "scale=3; $total_real_time / 3" | bc)
local avg_user_time=$(echo "scale=3; $total_user_time / 3" | bc)
local avg_sys_time=$(echo "scale=3; $total_sys_time / 3" | bc)
local avg_instructions=$((total_instructions / 3))
local avg_cycles=$((total_cycles / 3))
local ipc=$(echo "scale=2; $avg_instructions / $avg_cycles" | bc)
# Save summary to CSV
local summary_file="$data_dir/summary.csv"
echo "metric,value" > "$summary_file"
echo "time_ms,$avg_time" >> "$summary_file"
echo "memory_bytes,$avg_memory" >> "$summary_file"
echo "peak_memory_bytes,$peak_memory" >> "$summary_file"
echo "real_time_s,$avg_real_time" >> "$summary_file"
echo "user_time_s,$avg_user_time" >> "$summary_file"
echo "sys_time_s,$avg_sys_time" >> "$summary_file"
echo "instructions,$avg_instructions" >> "$summary_file"
echo "cycles,$avg_cycles" >> "$summary_file"
echo "ipc,$ipc" >> "$summary_file"
if [ $success_count -eq 3 ]; then
echo -e "${GREEN}SUCCESS${NC} $avg_time ms, ${BLUE}${avg_memory} bytes avg / ${YELLOW}${peak_memory} bytes peak, ${YELLOW}${avg_cpu}% CPU avg / ${peak_cpu}% CPU peak${NC}"
results+=("$avg_time $name SUCCESS $avg_memory $peak_memory $avg_cpu $peak_cpu")
echo " Real: ${avg_real_time}s, User: ${avg_user_time}s, Sys: ${avg_sys_time}s"
echo " Instructions: ${avg_instructions}, Cycles: ${avg_cycles}, IPC: $ipc"
echo " Data saved to: $data_dir/"
results+=("$avg_time $name SUCCESS $avg_memory $peak_memory $avg_cpu $peak_cpu $avg_real_time $avg_user_time $avg_sys_time $avg_instructions $avg_cycles")
else
echo -e "${RED}FAILED${NC} $avg_time ms, ${BLUE}${avg_memory} bytes avg / ${YELLOW}${peak_memory} bytes peak, ${YELLOW}${avg_cpu}% CPU avg / ${peak_cpu}% CPU peak${NC}"
results+=("$avg_time $name FAILED $avg_memory $peak_memory $avg_cpu $peak_cpu")
echo " Real: ${avg_real_time}s, User: ${avg_user_time}s, Sys: ${avg_sys_time}s"
echo " Instructions: ${avg_instructions}, Cycles: ${avg_cycles}, IPC: $ipc"
echo " Data saved to: $data_dir/"
results+=("$avg_time $name FAILED $avg_memory $peak_memory $avg_cpu $peak_cpu $avg_real_time $avg_user_time $avg_sys_time $avg_instructions $avg_cycles")
fi
}