adjusting the script with new paths
This commit is contained in:
@@ -1,384 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# Organize Barcelona EDSS project safely
|
|
||||||
# - Creates a timestamped backup first
|
|
||||||
# - Creates a cleaner folder structure
|
|
||||||
# - Moves files conservatively
|
|
||||||
# - Does NOT delete anything
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
PROJECT_ROOT="$(pwd)"
|
|
||||||
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
|
||||||
BACKUP_PARENT="${PROJECT_ROOT}/../Barcelona_backups"
|
|
||||||
BACKUP_DIR="${BACKUP_PARENT}/Barcelona_backup_${TIMESTAMP}"
|
|
||||||
|
|
||||||
echo "Project root: ${PROJECT_ROOT}"
|
|
||||||
echo "Backup dir: ${BACKUP_DIR}"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Safety checks
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ ! -f "${PROJECT_ROOT}/README.md" ]; then
|
|
||||||
echo "WARNING: README.md not found. Are you sure you are in the project root?"
|
|
||||||
echo "Current directory: ${PROJECT_ROOT}"
|
|
||||||
read -r -p "Continue anyway? [y/N] " answer
|
|
||||||
case "$answer" in
|
|
||||||
y|Y|yes|YES) ;;
|
|
||||||
*) echo "Aborted."; exit 1 ;;
|
|
||||||
esac
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -d "${PROJECT_ROOT}/.git" ]; then
|
|
||||||
if ! git diff --quiet || ! git diff --cached --quiet; then
|
|
||||||
echo "ERROR: Git working tree is not clean."
|
|
||||||
echo "Please commit or stash changes before organizing."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "This script will:"
|
|
||||||
echo "1. Create a full backup."
|
|
||||||
echo "2. Create organized folders."
|
|
||||||
echo "3. Move files into data/, prompts/, scripts/, results/, archive/."
|
|
||||||
echo "4. Keep your original files in the backup."
|
|
||||||
echo
|
|
||||||
read -r -p "Proceed? [y/N] " answer
|
|
||||||
case "$answer" in
|
|
||||||
y|Y|yes|YES) ;;
|
|
||||||
*) echo "Aborted."; exit 1 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Backup
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
mkdir -p "${BACKUP_PARENT}"
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Creating backup..."
|
|
||||||
rsync -a \
|
|
||||||
--exclude "enarcelona/" \
|
|
||||||
--exclude "env/" \
|
|
||||||
--exclude ".venv/" \
|
|
||||||
--exclude "__pycache__/" \
|
|
||||||
"${PROJECT_ROOT}/" "${BACKUP_DIR}/"
|
|
||||||
|
|
||||||
echo "Backup created at:"
|
|
||||||
echo "${BACKUP_DIR}"
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Create target structure
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Creating new directory structure..."
|
|
||||||
|
|
||||||
mkdir -p \
|
|
||||||
data/raw \
|
|
||||||
data/processed \
|
|
||||||
data/ground_truth \
|
|
||||||
data/external \
|
|
||||||
prompts \
|
|
||||||
scripts \
|
|
||||||
results/benchmark_runs \
|
|
||||||
results/final_results/model_outputs \
|
|
||||||
results/figures \
|
|
||||||
results/tables \
|
|
||||||
results/logs \
|
|
||||||
manuscript/figures \
|
|
||||||
manuscript/tables \
|
|
||||||
archive/old_scripts \
|
|
||||||
archive/old_results \
|
|
||||||
archive/tmp \
|
|
||||||
archive/old_data \
|
|
||||||
archive/old_project_files
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Helper move functions
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
move_if_exists() {
|
|
||||||
src="$1"
|
|
||||||
dest="$2"
|
|
||||||
|
|
||||||
if [ -e "$src" ]; then
|
|
||||||
mkdir -p "$(dirname "$dest")"
|
|
||||||
|
|
||||||
if [ -e "$dest" ]; then
|
|
||||||
echo "SKIP: destination exists: $dest"
|
|
||||||
else
|
|
||||||
echo "MOVE: $src -> $dest"
|
|
||||||
mv "$src" "$dest"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
move_glob_if_exists() {
|
|
||||||
pattern="$1"
|
|
||||||
dest_dir="$2"
|
|
||||||
|
|
||||||
mkdir -p "$dest_dir"
|
|
||||||
|
|
||||||
shopt -s nullglob
|
|
||||||
files=( $pattern )
|
|
||||||
shopt -u nullglob
|
|
||||||
|
|
||||||
for f in "${files[@]}"; do
|
|
||||||
base="$(basename "$f")"
|
|
||||||
dest="${dest_dir}/${base}"
|
|
||||||
|
|
||||||
if [ -e "$dest" ]; then
|
|
||||||
echo "SKIP: destination exists: $dest"
|
|
||||||
else
|
|
||||||
echo "MOVE: $f -> $dest"
|
|
||||||
mv "$f" "$dest"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Move prompts / attached instruction files
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Moving prompt and instruction files..."
|
|
||||||
|
|
||||||
move_if_exists "attach/Komplett.txt" "prompts/Komplett.txt"
|
|
||||||
move_if_exists "attach/just_edss_schema.gbnf" "prompts/just_edss_schema.gbnf"
|
|
||||||
move_if_exists "attach/just_edss_text.txt" "prompts/just_edss_text.txt"
|
|
||||||
|
|
||||||
# Move leftover attach folder if empty or archive it
|
|
||||||
if [ -d "attach" ]; then
|
|
||||||
if [ -z "$(ls -A attach)" ]; then
|
|
||||||
rmdir attach
|
|
||||||
else
|
|
||||||
move_if_exists "attach" "archive/old_project_files/attach"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Move important data files
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Moving data files..."
|
|
||||||
|
|
||||||
move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv" \
|
|
||||||
"data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv" \
|
|
||||||
"data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/Join_edssandsub.tsv" \
|
|
||||||
"data/ground_truth/Join_edssandsub.tsv"
|
|
||||||
|
|
||||||
move_if_exists "Data/GT_Numbers.csv" \
|
|
||||||
"data/ground_truth/GT_Numbers.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/Annika1.csv" \
|
|
||||||
"data/ground_truth/Annika1.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/comparison.tsv" \
|
|
||||||
"data/ground_truth/comparison.tsv"
|
|
||||||
|
|
||||||
move_if_exists "Data/edss_distribution_summary.csv" \
|
|
||||||
"data/processed/edss_distribution_summary.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/empirical_confidence_table.csv" \
|
|
||||||
"data/processed/empirical_confidence_table.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/functional_system_colors.json" \
|
|
||||||
"data/processed/functional_system_colors.json"
|
|
||||||
|
|
||||||
move_if_exists "Data/Test.csv" \
|
|
||||||
"archive/tmp/Test.csv"
|
|
||||||
|
|
||||||
move_if_exists "Data/Hernan" \
|
|
||||||
"data/external/Hernan"
|
|
||||||
|
|
||||||
move_if_exists "Data/iteration" \
|
|
||||||
"archive/old_data/iteration"
|
|
||||||
|
|
||||||
# Old generated JSON/results from Data folder
|
|
||||||
move_glob_if_exists "Data/*results*.json" "archive/old_results"
|
|
||||||
move_glob_if_exists "Data/join_*.tsv" "archive/old_results"
|
|
||||||
|
|
||||||
# Move remaining Data folder if anything left
|
|
||||||
if [ -d "Data" ]; then
|
|
||||||
if [ -z "$(ls -A Data)" ]; then
|
|
||||||
rmdir Data
|
|
||||||
else
|
|
||||||
move_if_exists "Data" "archive/old_data/Data_remaining"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Move benchmark results
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Moving benchmark results..."
|
|
||||||
|
|
||||||
if [ -d "results_edss_benchmark" ]; then
|
|
||||||
move_glob_if_exists "results_edss_benchmark/run_*" "results/benchmark_runs"
|
|
||||||
|
|
||||||
move_if_exists "results_edss_benchmark/endresults" \
|
|
||||||
"results/final_results/model_outputs"
|
|
||||||
|
|
||||||
move_if_exists "results_edss_benchmark/confusion_matrices" \
|
|
||||||
"results/figures/confusion_matrices"
|
|
||||||
|
|
||||||
if [ -z "$(ls -A results_edss_benchmark 2>/dev/null || true)" ]; then
|
|
||||||
rmdir results_edss_benchmark
|
|
||||||
else
|
|
||||||
move_if_exists "results_edss_benchmark" \
|
|
||||||
"archive/old_results/results_edss_benchmark_remaining"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Move old/general results
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Moving existing results files..."
|
|
||||||
|
|
||||||
if [ -d "results" ]; then
|
|
||||||
# Figures
|
|
||||||
move_glob_if_exists "results/*.png" "results/figures"
|
|
||||||
move_glob_if_exists "results/*.PNG" "results/figures"
|
|
||||||
move_glob_if_exists "results/*.jpg" "results/figures"
|
|
||||||
move_glob_if_exists "results/*.jpeg" "results/figures"
|
|
||||||
move_glob_if_exists "results/*.svg" "results/figures"
|
|
||||||
|
|
||||||
# Tables
|
|
||||||
move_glob_if_exists "results/*.csv" "results/tables"
|
|
||||||
move_glob_if_exists "results/*.tsv" "results/tables"
|
|
||||||
move_glob_if_exists "results/*.xlsx" "results/tables"
|
|
||||||
|
|
||||||
# Subfolders that look like old results
|
|
||||||
move_if_exists "results/Jan_visual" "archive/old_results/Jan_visual"
|
|
||||||
move_if_exists "results/Lab_meeting" "archive/old_results/Lab_meeting"
|
|
||||||
move_if_exists "results/just_edss" "archive/old_results/just_edss"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Root-level result tables
|
|
||||||
move_if_exists "edss_distribution_summary.csv" \
|
|
||||||
"results/tables/edss_distribution_summary.csv"
|
|
||||||
|
|
||||||
# Logs
|
|
||||||
move_if_exists "edss_benchmark_terminal.log" \
|
|
||||||
"results/logs/edss_benchmark_terminal.log"
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Move scripts
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Moving scripts..."
|
|
||||||
|
|
||||||
move_if_exists "audit.py" "scripts/audit_outputs.py"
|
|
||||||
move_if_exists "certainty.py" "scripts/analyze_certainty.py"
|
|
||||||
move_if_exists "certainty_show.py" "scripts/certainty_show.py"
|
|
||||||
move_if_exists "figure1.py" "scripts/figure1.py"
|
|
||||||
move_if_exists "show_plots.py" "scripts/show_plots.py"
|
|
||||||
|
|
||||||
move_if_exists "show_plots.py.orig" "archive/old_scripts/show_plots.py.orig"
|
|
||||||
|
|
||||||
# Apps / old entry points
|
|
||||||
move_if_exists "app.py" "archive/old_scripts/app.py"
|
|
||||||
move_if_exists "total_app.py" "archive/old_scripts/total_app.py"
|
|
||||||
|
|
||||||
# Existing project visuals folder
|
|
||||||
move_if_exists "project/visuals" "results/figures/project_visuals"
|
|
||||||
|
|
||||||
if [ -d "project" ]; then
|
|
||||||
if [ -z "$(ls -A project)" ]; then
|
|
||||||
rmdir project
|
|
||||||
else
|
|
||||||
move_if_exists "project" "archive/old_project_files/project"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Environment folder
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Handling virtual environment..."
|
|
||||||
|
|
||||||
if [ -d "enarcelona" ]; then
|
|
||||||
echo "Leaving virtual environment in place: enarcelona/"
|
|
||||||
echo "It should remain ignored by .gitignore."
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Create README notes
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Writing organization notes..."
|
|
||||||
|
|
||||||
cat > "PROJECT_STRUCTURE.md" <<'EOF'
|
|
||||||
# Project Structure
|
|
||||||
|
|
||||||
This project was reorganized into:
|
|
||||||
|
|
||||||
- `data/`
|
|
||||||
- `raw/`: original raw data, if retained locally
|
|
||||||
- `processed/`: cleaned or derived input data
|
|
||||||
- `ground_truth/`: manually annotated reference data
|
|
||||||
- `external/`: externally provided data
|
|
||||||
|
|
||||||
- `prompts/`
|
|
||||||
- EDSS instructions and prompt/schema assets
|
|
||||||
|
|
||||||
- `scripts/`
|
|
||||||
- runnable analysis and plotting scripts
|
|
||||||
|
|
||||||
- `results/`
|
|
||||||
- `benchmark_runs/`: full model benchmark runs
|
|
||||||
- `final_results/`: final selected model outputs
|
|
||||||
- `figures/`: generated figures
|
|
||||||
- `tables/`: generated tables
|
|
||||||
- `logs/`: terminal logs
|
|
||||||
|
|
||||||
- `manuscript/`
|
|
||||||
- final figures and tables for paper/thesis writing
|
|
||||||
|
|
||||||
- `archive/`
|
|
||||||
- old scripts, old results, temporary files, and unclear legacy files
|
|
||||||
|
|
||||||
Important:
|
|
||||||
The reorganization was performed after creating a full timestamped backup.
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
# Final checks
|
|
||||||
# ------------------------------------------------------------
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Organization complete."
|
|
||||||
echo
|
|
||||||
echo "Backup is here:"
|
|
||||||
echo "${BACKUP_DIR}"
|
|
||||||
echo
|
|
||||||
echo "New top-level structure:"
|
|
||||||
find . -maxdepth 2 -type d | sort
|
|
||||||
echo
|
|
||||||
|
|
||||||
if [ -d ".git" ]; then
|
|
||||||
echo "Git status:"
|
|
||||||
git status --short
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo "Next recommended commands:"
|
|
||||||
echo " git status"
|
|
||||||
echo " git add ."
|
|
||||||
echo " git commit -m \"Reorganize project structure\""
|
|
||||||
@@ -1258,7 +1258,7 @@
|
|||||||
|
|
||||||
|
|
||||||
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
|
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
|
||||||
#
|
|
||||||
#import time
|
#import time
|
||||||
#import json
|
#import json
|
||||||
#import os
|
#import os
|
||||||
@@ -1279,9 +1279,9 @@
|
|||||||
# print("Install with: pip install psutil")
|
# print("Install with: pip install psutil")
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## CONFIGURATION
|
# CONFIGURATION
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#load_dotenv()
|
#load_dotenv()
|
||||||
#
|
#
|
||||||
@@ -1289,21 +1289,21 @@
|
|||||||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||||||
#
|
#
|
||||||
#MODEL_CONFIGS = [
|
#MODEL_CONFIGS = [
|
||||||
## {
|
# {
|
||||||
## "model_name": "qwen3.6-35b-a3b",
|
# "model_name": "qwen3.6-35b-a3b",
|
||||||
## "use_response_format": False,
|
# "use_response_format": False,
|
||||||
## "temperature": 0.0,
|
# "temperature": 0.0,
|
||||||
## "max_tokens": 4096,
|
# "max_tokens": 4096,
|
||||||
##
|
#
|
||||||
## # If your backend is vLLM / Qwen chat-template compatible,
|
# # If your backend is vLLM / Qwen chat-template compatible,
|
||||||
## # this may reduce long hidden reasoning and JSON truncation.
|
# # this may reduce long hidden reasoning and JSON truncation.
|
||||||
## # If your server errors because of extra_body, set this to None.
|
# # If your server errors because of extra_body, set this to None.
|
||||||
## "extra_body": {
|
# "extra_body": {
|
||||||
## "chat_template_kwargs": {
|
# "chat_template_kwargs": {
|
||||||
## "enable_thinking": False
|
# "enable_thinking": False
|
||||||
## }
|
# }
|
||||||
## },
|
# },
|
||||||
## },
|
# },
|
||||||
# {
|
# {
|
||||||
# "model_name": "gemma-4-31B-it",
|
# "model_name": "gemma-4-31B-it",
|
||||||
# "use_response_format": False,
|
# "use_response_format": False,
|
||||||
@@ -1328,10 +1328,10 @@
|
|||||||
#NUM_ITERATIONS = 10
|
#NUM_ITERATIONS = 10
|
||||||
#STOP_ON_FIRST_ERROR = False
|
#STOP_ON_FIRST_ERROR = False
|
||||||
#
|
#
|
||||||
## For testing, set to e.g. 2.
|
# For testing, set to e.g. 2.
|
||||||
## For full run, set to None.
|
# For full run, set to None.
|
||||||
|
#MAX_ROWS = 2
|
||||||
# MAX_ROWS = 2
|
# MAX_ROWS = 2
|
||||||
## MAX_ROWS = 2
|
|
||||||
#
|
#
|
||||||
#MAX_TOKENS = 4096
|
#MAX_TOKENS = 4096
|
||||||
#TEMPERATURE = 0.0
|
#TEMPERATURE = 0.0
|
||||||
@@ -1340,14 +1340,14 @@
|
|||||||
#
|
#
|
||||||
#SAVE_EVERY_N_ROWS = 1
|
#SAVE_EVERY_N_ROWS = 1
|
||||||
#
|
#
|
||||||
## Retries for invalid JSON / truncated JSON
|
# Retries for invalid JSON / truncated JSON
|
||||||
#MAX_JSON_RETRIES = 2
|
#MAX_JSON_RETRIES = 2
|
||||||
#RETRY_SLEEP_SEC = 2
|
#RETRY_SLEEP_SEC = 2
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## CLIENT
|
# CLIENT
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#client = OpenAI(
|
#client = OpenAI(
|
||||||
# api_key=OPENAI_API_KEY,
|
# api_key=OPENAI_API_KEY,
|
||||||
@@ -1355,9 +1355,9 @@
|
|||||||
#)
|
#)
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## HELPERS
|
# HELPERS
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def safe_dir_name(name: str) -> str:
|
#def safe_dir_name(name: str) -> str:
|
||||||
# name = str(name).strip()
|
# name = str(name).strip()
|
||||||
@@ -1433,9 +1433,9 @@
|
|||||||
# return max(self.samples_mb)
|
# return max(self.samples_mb)
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## JSON EXTRACTION
|
# JSON EXTRACTION
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def extract_json_from_text(text):
|
#def extract_json_from_text(text):
|
||||||
# if text is None:
|
# if text is None:
|
||||||
@@ -1587,17 +1587,17 @@
|
|||||||
# return None
|
# return None
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## READ INSTRUCTIONS
|
# READ INSTRUCTIONS
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
||||||
# EDSS_INSTRUCTIONS = f.read().strip()
|
# EDSS_INSTRUCTIONS = f.read().strip()
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## PROMPT
|
# PROMPT
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def build_prompt(patient_text):
|
#def build_prompt(patient_text):
|
||||||
# return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
|
# return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
|
||||||
@@ -1676,9 +1676,9 @@
|
|||||||
#'''
|
#'''
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## VALIDATION / NORMALIZATION
|
# VALIDATION / NORMALIZATION
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def normalize_model_output(parsed):
|
#def normalize_model_output(parsed):
|
||||||
# if not isinstance(parsed, dict):
|
# if not isinstance(parsed, dict):
|
||||||
@@ -1755,9 +1755,9 @@
|
|||||||
# return parsed
|
# return parsed
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## API CALL
|
# API CALL
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def make_chat_completion(model_config, prompt):
|
#def make_chat_completion(model_config, prompt):
|
||||||
# model_name = model_config["model_name"]
|
# model_name = model_config["model_name"]
|
||||||
@@ -1794,9 +1794,9 @@
|
|||||||
# return client.chat.completions.create(**kwargs)
|
# return client.chat.completions.create(**kwargs)
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## INFERENCE FUNCTION WITH RETRIES
|
# INFERENCE FUNCTION WITH RETRIES
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def run_inference(patient_text, model_config):
|
#def run_inference(patient_text, model_config):
|
||||||
# model_name = model_config["model_name"]
|
# model_name = model_config["model_name"]
|
||||||
@@ -1912,9 +1912,9 @@
|
|||||||
# }
|
# }
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## BUILD PATIENT TEXT
|
# BUILD PATIENT TEXT
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def build_patient_text(row):
|
#def build_patient_text(row):
|
||||||
# return (
|
# return (
|
||||||
@@ -1925,9 +1925,9 @@
|
|||||||
# )
|
# )
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## FLATTEN RESULTS FOR CSV
|
# FLATTEN RESULTS FOR CSV
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def flatten_result(record):
|
#def flatten_result(record):
|
||||||
# flat = {
|
# flat = {
|
||||||
@@ -2012,9 +2012,9 @@
|
|||||||
# return pd.DataFrame([summary])
|
# return pd.DataFrame([summary])
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## INCREMENTAL SAVE HELPERS
|
# INCREMENTAL SAVE HELPERS
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#def append_jsonl(path, record):
|
#def append_jsonl(path, record):
|
||||||
# with open(path, "a", encoding="utf-8") as f:
|
# with open(path, "a", encoding="utf-8") as f:
|
||||||
@@ -2030,9 +2030,9 @@
|
|||||||
# df_one.to_csv(path, mode="a", header=not file_exists, index=False)
|
# df_one.to_csv(path, mode="a", header=not file_exists, index=False)
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
## =========================
|
# =========================
|
||||||
## MAIN LOOP
|
# MAIN LOOP
|
||||||
## =========================
|
# =========================
|
||||||
#
|
#
|
||||||
#if __name__ == "__main__":
|
#if __name__ == "__main__":
|
||||||
#
|
#
|
||||||
@@ -2277,26 +2277,26 @@ MODEL_CONFIGS = [
|
|||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"extra_body": None,
|
"extra_body": None,
|
||||||
},
|
},
|
||||||
# {
|
{
|
||||||
# "model_name": "GPT-OSS-120B",
|
"model_name": "GPT-OSS-120B",
|
||||||
# "use_response_format": True,
|
"use_response_format": True,
|
||||||
# "temperature": 0.0,
|
"temperature": 0.0,
|
||||||
# "max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
# "extra_body": None,
|
"extra_body": None,
|
||||||
# },
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
INPUT_CSV ="/home/shahin/Lab/Doktorarbeit/Barcelona/data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||||||
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/prompts/Komplett.txt"
|
||||||
|
|
||||||
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results_edss_benchmark"
|
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results/benchmark_runs"
|
||||||
|
|
||||||
NUM_ITERATIONS = 10
|
NUM_ITERATIONS = 2
|
||||||
STOP_ON_FIRST_ERROR = False
|
STOP_ON_FIRST_ERROR = False
|
||||||
|
|
||||||
# For testing, set to e.g. 2.
|
# For testing, set to e.g. 2.
|
||||||
# For full run, set to None.
|
# For full run, set to None.
|
||||||
MAX_ROWS = None
|
MAX_ROWS = 2
|
||||||
# MAX_ROWS = 2
|
# MAX_ROWS = 2
|
||||||
|
|
||||||
MAX_TOKENS = 4096
|
MAX_TOKENS = 4096
|
||||||
|
|||||||
Reference in New Issue
Block a user