#!/usr/bin/env bash set -euo pipefail # ============================================================ # Organize Barcelona EDSS project safely # - Creates a timestamped backup first # - Creates a cleaner folder structure # - Moves files conservatively # - Does NOT delete anything # ============================================================ PROJECT_ROOT="$(pwd)" TIMESTAMP="$(date +%Y%m%d_%H%M%S)" BACKUP_PARENT="${PROJECT_ROOT}/../Barcelona_backups" BACKUP_DIR="${BACKUP_PARENT}/Barcelona_backup_${TIMESTAMP}" echo "Project root: ${PROJECT_ROOT}" echo "Backup dir: ${BACKUP_DIR}" echo # ------------------------------------------------------------ # Safety checks # ------------------------------------------------------------ if [ ! -f "${PROJECT_ROOT}/README.md" ]; then echo "WARNING: README.md not found. Are you sure you are in the project root?" echo "Current directory: ${PROJECT_ROOT}" read -r -p "Continue anyway? [y/N] " answer case "$answer" in y|Y|yes|YES) ;; *) echo "Aborted."; exit 1 ;; esac fi if [ -d "${PROJECT_ROOT}/.git" ]; then if ! git diff --quiet || ! git diff --cached --quiet; then echo "ERROR: Git working tree is not clean." echo "Please commit or stash changes before organizing." exit 1 fi fi echo "This script will:" echo "1. Create a full backup." echo "2. Create organized folders." echo "3. Move files into data/, prompts/, scripts/, results/, archive/." echo "4. Keep your original files in the backup." echo read -r -p "Proceed? [y/N] " answer case "$answer" in y|Y|yes|YES) ;; *) echo "Aborted."; exit 1 ;; esac # ------------------------------------------------------------ # Backup # ------------------------------------------------------------ mkdir -p "${BACKUP_PARENT}" echo echo "Creating backup..." rsync -a \ --exclude "enarcelona/" \ --exclude "env/" \ --exclude ".venv/" \ --exclude "__pycache__/" \ "${PROJECT_ROOT}/" "${BACKUP_DIR}/" echo "Backup created at:" echo "${BACKUP_DIR}" # ------------------------------------------------------------ # Create target structure # ------------------------------------------------------------ echo echo "Creating new directory structure..." mkdir -p \ data/raw \ data/processed \ data/ground_truth \ data/external \ prompts \ scripts \ results/benchmark_runs \ results/final_results/model_outputs \ results/figures \ results/tables \ results/logs \ manuscript/figures \ manuscript/tables \ archive/old_scripts \ archive/old_results \ archive/tmp \ archive/old_data \ archive/old_project_files # ------------------------------------------------------------ # Helper move functions # ------------------------------------------------------------ move_if_exists() { src="$1" dest="$2" if [ -e "$src" ]; then mkdir -p "$(dirname "$dest")" if [ -e "$dest" ]; then echo "SKIP: destination exists: $dest" else echo "MOVE: $src -> $dest" mv "$src" "$dest" fi fi } move_glob_if_exists() { pattern="$1" dest_dir="$2" mkdir -p "$dest_dir" shopt -s nullglob files=( $pattern ) shopt -u nullglob for f in "${files[@]}"; do base="$(basename "$f")" dest="${dest_dir}/${base}" if [ -e "$dest" ]; then echo "SKIP: destination exists: $dest" else echo "MOVE: $f -> $dest" mv "$f" "$dest" fi done } # ------------------------------------------------------------ # Move prompts / attached instruction files # ------------------------------------------------------------ echo echo "Moving prompt and instruction files..." move_if_exists "attach/Komplett.txt" "prompts/Komplett.txt" move_if_exists "attach/just_edss_schema.gbnf" "prompts/just_edss_schema.gbnf" move_if_exists "attach/just_edss_text.txt" "prompts/just_edss_text.txt" # Move leftover attach folder if empty or archive it if [ -d "attach" ]; then if [ -z "$(ls -A attach)" ]; then rmdir attach else move_if_exists "attach" "archive/old_project_files/attach" fi fi # ------------------------------------------------------------ # Move important data files # ------------------------------------------------------------ echo echo "Moving data files..." move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv" \ "data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv" move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv" \ "data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv" move_if_exists "Data/Join_edssandsub.tsv" \ "data/ground_truth/Join_edssandsub.tsv" move_if_exists "Data/GT_Numbers.csv" \ "data/ground_truth/GT_Numbers.csv" move_if_exists "Data/Annika1.csv" \ "data/ground_truth/Annika1.csv" move_if_exists "Data/comparison.tsv" \ "data/ground_truth/comparison.tsv" move_if_exists "Data/edss_distribution_summary.csv" \ "data/processed/edss_distribution_summary.csv" move_if_exists "Data/empirical_confidence_table.csv" \ "data/processed/empirical_confidence_table.csv" move_if_exists "Data/functional_system_colors.json" \ "data/processed/functional_system_colors.json" move_if_exists "Data/Test.csv" \ "archive/tmp/Test.csv" move_if_exists "Data/Hernan" \ "data/external/Hernan" move_if_exists "Data/iteration" \ "archive/old_data/iteration" # Old generated JSON/results from Data folder move_glob_if_exists "Data/*results*.json" "archive/old_results" move_glob_if_exists "Data/join_*.tsv" "archive/old_results" # Move remaining Data folder if anything left if [ -d "Data" ]; then if [ -z "$(ls -A Data)" ]; then rmdir Data else move_if_exists "Data" "archive/old_data/Data_remaining" fi fi # ------------------------------------------------------------ # Move benchmark results # ------------------------------------------------------------ echo echo "Moving benchmark results..." if [ -d "results_edss_benchmark" ]; then move_glob_if_exists "results_edss_benchmark/run_*" "results/benchmark_runs" move_if_exists "results_edss_benchmark/endresults" \ "results/final_results/model_outputs" move_if_exists "results_edss_benchmark/confusion_matrices" \ "results/figures/confusion_matrices" if [ -z "$(ls -A results_edss_benchmark 2>/dev/null || true)" ]; then rmdir results_edss_benchmark else move_if_exists "results_edss_benchmark" \ "archive/old_results/results_edss_benchmark_remaining" fi fi # ------------------------------------------------------------ # Move old/general results # ------------------------------------------------------------ echo echo "Moving existing results files..." if [ -d "results" ]; then # Figures move_glob_if_exists "results/*.png" "results/figures" move_glob_if_exists "results/*.PNG" "results/figures" move_glob_if_exists "results/*.jpg" "results/figures" move_glob_if_exists "results/*.jpeg" "results/figures" move_glob_if_exists "results/*.svg" "results/figures" # Tables move_glob_if_exists "results/*.csv" "results/tables" move_glob_if_exists "results/*.tsv" "results/tables" move_glob_if_exists "results/*.xlsx" "results/tables" # Subfolders that look like old results move_if_exists "results/Jan_visual" "archive/old_results/Jan_visual" move_if_exists "results/Lab_meeting" "archive/old_results/Lab_meeting" move_if_exists "results/just_edss" "archive/old_results/just_edss" fi # Root-level result tables move_if_exists "edss_distribution_summary.csv" \ "results/tables/edss_distribution_summary.csv" # Logs move_if_exists "edss_benchmark_terminal.log" \ "results/logs/edss_benchmark_terminal.log" # ------------------------------------------------------------ # Move scripts # ------------------------------------------------------------ echo echo "Moving scripts..." move_if_exists "audit.py" "scripts/audit_outputs.py" move_if_exists "certainty.py" "scripts/analyze_certainty.py" move_if_exists "certainty_show.py" "scripts/certainty_show.py" move_if_exists "figure1.py" "scripts/figure1.py" move_if_exists "show_plots.py" "scripts/show_plots.py" move_if_exists "show_plots.py.orig" "archive/old_scripts/show_plots.py.orig" # Apps / old entry points move_if_exists "app.py" "archive/old_scripts/app.py" move_if_exists "total_app.py" "archive/old_scripts/total_app.py" # Existing project visuals folder move_if_exists "project/visuals" "results/figures/project_visuals" if [ -d "project" ]; then if [ -z "$(ls -A project)" ]; then rmdir project else move_if_exists "project" "archive/old_project_files/project" fi fi # ------------------------------------------------------------ # Environment folder # ------------------------------------------------------------ echo echo "Handling virtual environment..." if [ -d "enarcelona" ]; then echo "Leaving virtual environment in place: enarcelona/" echo "It should remain ignored by .gitignore." fi # ------------------------------------------------------------ # Create README notes # ------------------------------------------------------------ echo echo "Writing organization notes..." cat > "PROJECT_STRUCTURE.md" <<'EOF' # Project Structure This project was reorganized into: - `data/` - `raw/`: original raw data, if retained locally - `processed/`: cleaned or derived input data - `ground_truth/`: manually annotated reference data - `external/`: externally provided data - `prompts/` - EDSS instructions and prompt/schema assets - `scripts/` - runnable analysis and plotting scripts - `results/` - `benchmark_runs/`: full model benchmark runs - `final_results/`: final selected model outputs - `figures/`: generated figures - `tables/`: generated tables - `logs/`: terminal logs - `manuscript/` - final figures and tables for paper/thesis writing - `archive/` - old scripts, old results, temporary files, and unclear legacy files Important: The reorganization was performed after creating a full timestamped backup. EOF # ------------------------------------------------------------ # Final checks # ------------------------------------------------------------ echo echo "Organization complete." echo echo "Backup is here:" echo "${BACKUP_DIR}" echo echo "New top-level structure:" find . -maxdepth 2 -type d | sort echo if [ -d ".git" ]; then echo "Git status:" git status --short fi echo echo "Next recommended commands:" echo " git status" echo " git add ." echo " git commit -m \"Reorganize project structure\""