New Organised one

This commit is contained in:
2026-05-19 10:03:52 +02:00
parent 69f6e76bfe
commit 98df7c70f1
13 changed files with 1377 additions and 3282 deletions
+31
View File
@@ -0,0 +1,31 @@
# Project Structure
This project was reorganized into:
- `data/`
- `raw/`: original raw data, if retained locally
- `processed/`: cleaned or derived input data
- `ground_truth/`: manually annotated reference data
- `external/`: externally provided data
- `prompts/`
- EDSS instructions and prompt/schema assets
- `scripts/`
- runnable analysis and plotting scripts
- `results/`
- `benchmark_runs/`: full model benchmark runs
- `final_results/`: final selected model outputs
- `figures/`: generated figures
- `tables/`: generated tables
- `logs/`: terminal logs
- `manuscript/`
- final figures and tables for paper/thesis writing
- `archive/`
- old scripts, old results, temporary files, and unclear legacy files
Important:
The reorganization was performed after creating a full timestamped backup.
+384
View File
@@ -0,0 +1,384 @@
#!/usr/bin/env bash
set -euo pipefail
# ============================================================
# Organize Barcelona EDSS project safely
# - Creates a timestamped backup first
# - Creates a cleaner folder structure
# - Moves files conservatively
# - Does NOT delete anything
# ============================================================
PROJECT_ROOT="$(pwd)"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
BACKUP_PARENT="${PROJECT_ROOT}/../Barcelona_backups"
BACKUP_DIR="${BACKUP_PARENT}/Barcelona_backup_${TIMESTAMP}"
echo "Project root: ${PROJECT_ROOT}"
echo "Backup dir: ${BACKUP_DIR}"
echo
# ------------------------------------------------------------
# Safety checks
# ------------------------------------------------------------
if [ ! -f "${PROJECT_ROOT}/README.md" ]; then
echo "WARNING: README.md not found. Are you sure you are in the project root?"
echo "Current directory: ${PROJECT_ROOT}"
read -r -p "Continue anyway? [y/N] " answer
case "$answer" in
y|Y|yes|YES) ;;
*) echo "Aborted."; exit 1 ;;
esac
fi
if [ -d "${PROJECT_ROOT}/.git" ]; then
if ! git diff --quiet || ! git diff --cached --quiet; then
echo "ERROR: Git working tree is not clean."
echo "Please commit or stash changes before organizing."
exit 1
fi
fi
echo "This script will:"
echo "1. Create a full backup."
echo "2. Create organized folders."
echo "3. Move files into data/, prompts/, scripts/, results/, archive/."
echo "4. Keep your original files in the backup."
echo
read -r -p "Proceed? [y/N] " answer
case "$answer" in
y|Y|yes|YES) ;;
*) echo "Aborted."; exit 1 ;;
esac
# ------------------------------------------------------------
# Backup
# ------------------------------------------------------------
mkdir -p "${BACKUP_PARENT}"
echo
echo "Creating backup..."
rsync -a \
--exclude "enarcelona/" \
--exclude "env/" \
--exclude ".venv/" \
--exclude "__pycache__/" \
"${PROJECT_ROOT}/" "${BACKUP_DIR}/"
echo "Backup created at:"
echo "${BACKUP_DIR}"
# ------------------------------------------------------------
# Create target structure
# ------------------------------------------------------------
echo
echo "Creating new directory structure..."
mkdir -p \
data/raw \
data/processed \
data/ground_truth \
data/external \
prompts \
scripts \
results/benchmark_runs \
results/final_results/model_outputs \
results/figures \
results/tables \
results/logs \
manuscript/figures \
manuscript/tables \
archive/old_scripts \
archive/old_results \
archive/tmp \
archive/old_data \
archive/old_project_files
# ------------------------------------------------------------
# Helper move functions
# ------------------------------------------------------------
move_if_exists() {
src="$1"
dest="$2"
if [ -e "$src" ]; then
mkdir -p "$(dirname "$dest")"
if [ -e "$dest" ]; then
echo "SKIP: destination exists: $dest"
else
echo "MOVE: $src -> $dest"
mv "$src" "$dest"
fi
fi
}
move_glob_if_exists() {
pattern="$1"
dest_dir="$2"
mkdir -p "$dest_dir"
shopt -s nullglob
files=( $pattern )
shopt -u nullglob
for f in "${files[@]}"; do
base="$(basename "$f")"
dest="${dest_dir}/${base}"
if [ -e "$dest" ]; then
echo "SKIP: destination exists: $dest"
else
echo "MOVE: $f -> $dest"
mv "$f" "$dest"
fi
done
}
# ------------------------------------------------------------
# Move prompts / attached instruction files
# ------------------------------------------------------------
echo
echo "Moving prompt and instruction files..."
move_if_exists "attach/Komplett.txt" "prompts/Komplett.txt"
move_if_exists "attach/just_edss_schema.gbnf" "prompts/just_edss_schema.gbnf"
move_if_exists "attach/just_edss_text.txt" "prompts/just_edss_text.txt"
# Move leftover attach folder if empty or archive it
if [ -d "attach" ]; then
if [ -z "$(ls -A attach)" ]; then
rmdir attach
else
move_if_exists "attach" "archive/old_project_files/attach"
fi
fi
# ------------------------------------------------------------
# Move important data files
# ------------------------------------------------------------
echo
echo "Moving data files..."
move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv" \
"data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv"
move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv" \
"data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
move_if_exists "Data/Join_edssandsub.tsv" \
"data/ground_truth/Join_edssandsub.tsv"
move_if_exists "Data/GT_Numbers.csv" \
"data/ground_truth/GT_Numbers.csv"
move_if_exists "Data/Annika1.csv" \
"data/ground_truth/Annika1.csv"
move_if_exists "Data/comparison.tsv" \
"data/ground_truth/comparison.tsv"
move_if_exists "Data/edss_distribution_summary.csv" \
"data/processed/edss_distribution_summary.csv"
move_if_exists "Data/empirical_confidence_table.csv" \
"data/processed/empirical_confidence_table.csv"
move_if_exists "Data/functional_system_colors.json" \
"data/processed/functional_system_colors.json"
move_if_exists "Data/Test.csv" \
"archive/tmp/Test.csv"
move_if_exists "Data/Hernan" \
"data/external/Hernan"
move_if_exists "Data/iteration" \
"archive/old_data/iteration"
# Old generated JSON/results from Data folder
move_glob_if_exists "Data/*results*.json" "archive/old_results"
move_glob_if_exists "Data/join_*.tsv" "archive/old_results"
# Move remaining Data folder if anything left
if [ -d "Data" ]; then
if [ -z "$(ls -A Data)" ]; then
rmdir Data
else
move_if_exists "Data" "archive/old_data/Data_remaining"
fi
fi
# ------------------------------------------------------------
# Move benchmark results
# ------------------------------------------------------------
echo
echo "Moving benchmark results..."
if [ -d "results_edss_benchmark" ]; then
move_glob_if_exists "results_edss_benchmark/run_*" "results/benchmark_runs"
move_if_exists "results_edss_benchmark/endresults" \
"results/final_results/model_outputs"
move_if_exists "results_edss_benchmark/confusion_matrices" \
"results/figures/confusion_matrices"
if [ -z "$(ls -A results_edss_benchmark 2>/dev/null || true)" ]; then
rmdir results_edss_benchmark
else
move_if_exists "results_edss_benchmark" \
"archive/old_results/results_edss_benchmark_remaining"
fi
fi
# ------------------------------------------------------------
# Move old/general results
# ------------------------------------------------------------
echo
echo "Moving existing results files..."
if [ -d "results" ]; then
# Figures
move_glob_if_exists "results/*.png" "results/figures"
move_glob_if_exists "results/*.PNG" "results/figures"
move_glob_if_exists "results/*.jpg" "results/figures"
move_glob_if_exists "results/*.jpeg" "results/figures"
move_glob_if_exists "results/*.svg" "results/figures"
# Tables
move_glob_if_exists "results/*.csv" "results/tables"
move_glob_if_exists "results/*.tsv" "results/tables"
move_glob_if_exists "results/*.xlsx" "results/tables"
# Subfolders that look like old results
move_if_exists "results/Jan_visual" "archive/old_results/Jan_visual"
move_if_exists "results/Lab_meeting" "archive/old_results/Lab_meeting"
move_if_exists "results/just_edss" "archive/old_results/just_edss"
fi
# Root-level result tables
move_if_exists "edss_distribution_summary.csv" \
"results/tables/edss_distribution_summary.csv"
# Logs
move_if_exists "edss_benchmark_terminal.log" \
"results/logs/edss_benchmark_terminal.log"
# ------------------------------------------------------------
# Move scripts
# ------------------------------------------------------------
echo
echo "Moving scripts..."
move_if_exists "audit.py" "scripts/audit_outputs.py"
move_if_exists "certainty.py" "scripts/analyze_certainty.py"
move_if_exists "certainty_show.py" "scripts/certainty_show.py"
move_if_exists "figure1.py" "scripts/figure1.py"
move_if_exists "show_plots.py" "scripts/show_plots.py"
move_if_exists "show_plots.py.orig" "archive/old_scripts/show_plots.py.orig"
# Apps / old entry points
move_if_exists "app.py" "archive/old_scripts/app.py"
move_if_exists "total_app.py" "archive/old_scripts/total_app.py"
# Existing project visuals folder
move_if_exists "project/visuals" "results/figures/project_visuals"
if [ -d "project" ]; then
if [ -z "$(ls -A project)" ]; then
rmdir project
else
move_if_exists "project" "archive/old_project_files/project"
fi
fi
# ------------------------------------------------------------
# Environment folder
# ------------------------------------------------------------
echo
echo "Handling virtual environment..."
if [ -d "enarcelona" ]; then
echo "Leaving virtual environment in place: enarcelona/"
echo "It should remain ignored by .gitignore."
fi
# ------------------------------------------------------------
# Create README notes
# ------------------------------------------------------------
echo
echo "Writing organization notes..."
cat > "PROJECT_STRUCTURE.md" <<'EOF'
# Project Structure
This project was reorganized into:
- `data/`
- `raw/`: original raw data, if retained locally
- `processed/`: cleaned or derived input data
- `ground_truth/`: manually annotated reference data
- `external/`: externally provided data
- `prompts/`
- EDSS instructions and prompt/schema assets
- `scripts/`
- runnable analysis and plotting scripts
- `results/`
- `benchmark_runs/`: full model benchmark runs
- `final_results/`: final selected model outputs
- `figures/`: generated figures
- `tables/`: generated tables
- `logs/`: terminal logs
- `manuscript/`
- final figures and tables for paper/thesis writing
- `archive/`
- old scripts, old results, temporary files, and unclear legacy files
Important:
The reorganization was performed after creating a full timestamped backup.
EOF
# ------------------------------------------------------------
# Final checks
# ------------------------------------------------------------
echo
echo "Organization complete."
echo
echo "Backup is here:"
echo "${BACKUP_DIR}"
echo
echo "New top-level structure:"
find . -maxdepth 2 -type d | sort
echo
if [ -d ".git" ]; then
echo "Git status:"
git status --short
fi
echo
echo "Next recommended commands:"
echo " git status"
echo " git add ."
echo " git commit -m \"Reorganize project structure\""
File diff suppressed because it is too large Load Diff
View File
-2320
View File
File diff suppressed because it is too large Load Diff