Adding Date to Dashboard

2026-01-20 13:46:53 +01:00
16 changed files with 973 additions and 12362 deletions
@@ -1,88 +1,16 @@
-# =========================
+# 1. Broad Ignores
-# Python
+/Data/*
-# =========================
+/attach/*
-__pycache__/
+/results/*
-*.py[cod]
+/enarcelona/*
 *$py.class
 .ipynb_checkpoints/
 # =========================
 # Virtual environments
 # =========================
 env/
 env*/
 venv/
 .venv/
 enarcelona/
 # =========================
 # Secrets
 # =========================
 .env
-*.env
+__pycache__/
 *.pyc
-# =========================
+# 2. Ignore virtual environments COMPLETELY
-# Patient data / sensitive data
+# This must come BEFORE the unignore rule
-# =========================
+env*/
 Data/
 data/raw/
 data/processed/
 data/ground_truth/
 reference/
-# =========================
+# 3. The "Unignore" rule (Whitelisting)
-# Generated results and logs
+# We only unignore .py files that aren't already blocked by the rules above
-# =========================
+!**/*.py
 results/
 results_edss_benchmark/
 *.log
 # =========================
 # Large/generated file types
 # =========================
 *.csv
 *.tsv
 *.json
 *.jsonl
 *.xlsx
 *.xls
 *.png
 *.PNG
 *.jpg
 *.jpeg
 *.svg
 *.pdf
 # =========================
 # Temporary / backup files
 # =========================
 *.tmp
 *.bak
 *.orig
 .DS_Store
 # =========================
 # Keep important code/config/docs
 # =========================
 !README.md
 !requirements.txt
 !*.py
 !*.md
 !*.yml
 !*.yaml
 !*.toml
 # Keep prompt templates / schemas if safe to publish
 !prompts/
 !prompts/**
 !attach/
 !attach/*.gbnf
 !attach/just_edss_text.txt
 !attach/Komplett.txt
 # Keep example/synthetic data only
 !data/
 !data/example/
 !data/example/**
 !Data/example/
 !Data/example/**
@@ -0,0 +1,748 @@
 # %% Scatter
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 # Load your data from TSV file
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_results+MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Replace comma with dot for numeric conversion in GT_EDSS and LLM_Results
 df['GT_EDSS'] = df['GT_EDSS'].astype(str).str.replace(',', '.')
 df['LLM_Results'] = df['LLM_Results'].astype(str).str.replace(',', '.')
 # Convert to float (handle invalid entries gracefully)
 df['GT_EDSS'] = pd.to_numeric(df['GT_EDSS'], errors='coerce')
 df['LLM_Results'] = pd.to_numeric(df['LLM_Results'], errors='coerce')
 # Drop rows where either column is NaN
 df_clean = df.dropna(subset=['GT_EDSS', 'LLM_Results'])
 # Create scatter plot
 plt.figure(figsize=(8, 6))
 plt.scatter(df_clean['GT_EDSS'], df_clean['LLM_Results'], alpha=0.7, color='blue')
 # Add labels and title
 plt.xlabel('GT_EDSS')
 plt.ylabel('LLM_Results')
 plt.title('Comparison of GT_EDSS vs LLM_Results')
 # Optional: Add a diagonal line for reference (perfect prediction)
 plt.plot([0, max(df_clean['GT_EDSS'])], [0, max(df_clean['GT_EDSS'])], color='red', linestyle='--', label='Perfect Prediction')
 plt.legend()
 # Show plot
 plt.grid(True)
 plt.tight_layout()
 plt.show()
 ##
 # %% Bland0-altman
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 import statsmodels.api as sm
 # Load your data from TSV file
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_results+MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Replace comma with dot for numeric conversion in GT_EDSS and LLM_Results
 df['GT_EDSS'] = df['GT_EDSS'].astype(str).str.replace(',', '.')
 df['LLM_Results'] = df['LLM_Results'].astype(str).str.replace(',', '.')
 # Convert to float (handle invalid entries gracefully)
 df['GT_EDSS'] = pd.to_numeric(df['GT_EDSS'], errors='coerce')
 df['LLM_Results'] = pd.to_numeric(df['LLM_Results'], errors='coerce')
 # Drop rows where either column is NaN
 df_clean = df.dropna(subset=['GT_EDSS', 'LLM_Results'])
 # Create Bland-Altman plot
 f, ax = plt.subplots(1, figsize=(8, 5))
 sm.graphics.mean_diff_plot(df_clean['GT_EDSS'], df_clean['LLM_Results'], ax=ax)
 # Add labels and title
 ax.set_title('Bland-Altman Plot: GT_EDSS vs LLM_Results')
 ax.set_xlabel('Mean of GT_EDSS and LLM_Results')
 ax.set_ylabel('Difference between GT_EDSS and LLM_Results')
 # Display Bland-Altman plot
 plt.tight_layout()
 plt.show()
 # Print some statistics
 mean_diff = np.mean(df_clean['GT_EDSS'] - df_clean['LLM_Results'])
 std_diff = np.std(df_clean['GT_EDSS'] - df_clean['LLM_Results'])
 print(f"Mean difference: {mean_diff:.3f}")
 print(f"Standard deviation of differences: {std_diff:.3f}")
 print(f"95% Limits of Agreement: [{mean_diff - 1.96*std_diff:.3f}, {mean_diff + 1.96*std_diff:.3f}]")
 ##
 # %%  Confusion matrix
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.metrics import confusion_matrix, classification_report
 import seaborn as sns
 # Load your data from TSV file
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
 df['GT.EDSS'] = df['GT.EDSS'].astype(str).str.replace(',', '.')
 df['result.EDSS'] = df['result.EDSS'].astype(str).str.replace(',', '.')
 # Convert to float (handle invalid entries gracefully)
 df['GT.EDSS'] = pd.to_numeric(df['GT.EDSS'], errors='coerce')
 df['result.EDSS'] = pd.to_numeric(df['result.EDSS'], errors='coerce')
 # Drop rows where either column is NaN
 df_clean = df.dropna(subset=['GT.EDSS', 'result.EDSS'])
 # For confusion matrix, we need to categorize the values
 # Let's create categories up to 10 (0-1, 1-2, 2-3, ..., 9-10)
 def categorize_edss(value):
    if pd.isna(value):
        return np.nan
    elif value <= 1.0:
        return '0-1'
    elif value <= 2.0:
        return '1-2'
    elif value <= 3.0:
        return '2-3'
    elif value <= 4.0:
        return '3-4'
    elif value <= 5.0:
        return '4-5'
    elif value <= 6.0:
        return '5-6'
    elif value <= 7.0:
        return '6-7'
    elif value <= 8.0:
        return '7-8'
    elif value <= 9.0:
        return '8-9'
    elif value <= 10.0:
        return '9-10'
    else:
        return '10+'
 # Create categorical versions
 df_clean['GT.EDSS_cat'] = df_clean['GT.EDSS'].apply(categorize_edss)
 df_clean['result.EDSS_cat'] = df_clean['result.EDSS'].apply(categorize_edss)
 # Remove any NaN categories
 df_clean = df_clean.dropna(subset=['GT.EDSS_cat', 'result.EDSS_cat'])
 # Create confusion matrix
 cm = confusion_matrix(df_clean['GT.EDSS_cat'], df_clean['result.EDSS_cat'],
                     labels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'])
 # Plot confusion matrix
 plt.figure(figsize=(10, 8))
 sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'],
            yticklabels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'])
 plt.title('Confusion Matrix: Ground truth EDSS vs interferred EDSS (Categorized 0-10)')
 plt.xlabel('LLM Generated EDSS')
 plt.ylabel('Ground Truth EDSS')
 plt.tight_layout()
 plt.show()
 # Print classification report
 print("Classification Report:")
 print(classification_report(df_clean['GT.EDSS_cat'], df_clean['result.EDSS_cat']))
 # Print raw counts
 print("\nConfusion Matrix (Raw Counts):")
 print(cm)
 ##
 # %% Classification 
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.metrics import confusion_matrix
 import numpy as np
 # Load your data from TSV file
 file_path ='/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Check data structure
 print("Data shape:", df.shape)
 print("First few rows:")
 print(df.head())
 print("\nColumn names:")
 for col in df.columns:
    print(f"  {col}")
 # Function to safely convert to boolean
 def safe_bool_convert(series):
    '''Safely convert series to boolean, handling various input formats'''
    # Convert to string first, then to boolean
    series_str = series.astype(str).str.strip().str.lower()
    # Handle different true/false representations
    bool_map = {
        'true': True, '1': True, 'yes': True, 'y': True,
        'false': False, '0': False, 'no': False, 'n': False
    }
    converted = series_str.map(bool_map)
    # Handle remaining NaN values
    converted = converted.fillna(False)  # or True, depending on your preference
    return converted
 # Convert columns safely
 if 'result.klassifizierbar' in df.columns:
    print("\nresult.klassifizierbar column info:")
    print(df['result.klassifizierbar'].head(10))
    print("Unique values:", df['result.klassifizierbar'].unique())
    df['result.klassifizierbar'] = safe_bool_convert(df['result.klassifizierbar'])
    print("After conversion:")
    print(df['result.klassifizierbar'].value_counts())
 if 'GT.klassifizierbar' in df.columns:
    print("\nGT.klassifizierbar column info:")
    print(df['GT.klassifizierbar'].head(10))
    print("Unique values:", df['GT.klassifizierbar'].unique())
    df['GT.klassifizierbar'] = safe_bool_convert(df['GT.klassifizierbar'])
    print("After conversion:")
    print(df['GT.klassifizierbar'].value_counts())
 # Create bar chart showing only True values for klassifizierbar
 if 'result.klassifizierbar' in df.columns and 'GT.klassifizierbar' in df.columns:
    # Get counts for True values only
    llm_true_count = df['result.klassifizierbar'].sum()
    gt_true_count = df['GT.klassifizierbar'].sum()
    # Plot using matplotlib directly
    fig, ax = plt.subplots(figsize=(8, 6))
    x = np.arange(2)
    width = 0.35
    bars1 = ax.bar(x[0] - width/2, llm_true_count, width, label='LLM', color='skyblue', alpha=0.8)
    bars2 = ax.bar(x[1] + width/2, gt_true_count, width, label='GT', color='lightcoral', alpha=0.8)
    # Add value labels on bars
    ax.annotate(f'{llm_true_count}',
                xy=(x[0], llm_true_count),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom')
    ax.annotate(f'{gt_true_count}',
                xy=(x[1], gt_true_count),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom')
    ax.set_xlabel('Classification Status (klassifizierbar)')
    ax.set_ylabel('Count')
    ax.set_title('True Values Comparison: LLM vs GT for "klassifizierbar"')
    ax.set_xticks(x)
    ax.set_xticklabels(['LLM', 'GT'])
    ax.legend()
    plt.tight_layout()
    plt.show()
 # Create confusion matrix if both columns exist
 if 'result.klassifizierbar' in df.columns and 'GT.klassifizierbar' in df.columns:
    try:
        # Ensure both columns are boolean
        llm_bool = df['result.klassifizierbar'].fillna(False).astype(bool)
        gt_bool = df['GT.klassifizierbar'].fillna(False).astype(bool)
        cm = confusion_matrix(gt_bool, llm_bool)
        # Plot confusion matrix
        fig, ax = plt.subplots(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=['False ', 'True '],
                   yticklabels=['False', 'True '],
                   ax=ax)
        ax.set_xlabel('LLM Predictions ')
        ax.set_ylabel('GT Labels ')
        ax.set_title('Confusion Matrix: LLM vs GT for "klassifizierbar"')
        plt.tight_layout()
        plt.show()
        print("Confusion Matrix:")
        print(cm)
    except Exception as e:
        print(f"Error creating confusion matrix: {e}")
 # Show final data info
 print("\nFinal DataFrame info:")
 print(df[['result.klassifizierbar', 'GT.klassifizierbar']].info())
 ##
 # %% Boxplot
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 import numpy as np
 # Load your data from TSV file
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_results_unique.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
 df['GT.EDSS'] = df['GT.EDSS'].astype(str).str.replace(',', '.')
 df['result.EDSS'] = df['result.EDSS'].astype(str).str.replace(',', '.')
 # Convert to float (handle invalid entries gracefully)
 df['GT.EDSS'] = pd.to_numeric(df['GT.EDSS'], errors='coerce')
 df['result.EDSS'] = pd.to_numeric(df['result.EDSS'], errors='coerce')
 # Drop rows where either column is NaN
 df_clean = df.dropna(subset=['GT.EDSS', 'result.EDSS'])
 # 1. DEFINE CATEGORY ORDER
 # This ensures the X-axis is numerically logical (0-1 comes before 1-2)
 category_order = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10', '10+']
 # Convert the column to a Categorical type with the specific order
 df_clean['GT.EDSS_cat'] = pd.Categorical(df_clean['GT.EDSS'].apply(categorize_edss), 
                                         categories=category_order, 
                                         ordered=True)
 plt.figure(figsize=(14, 8))
 # 2. ADD HUE FOR LEGEND
 # Assigning x to 'hue' allows Seaborn to generate a legend automatically
 box_plot = sns.boxplot(
    data=df_clean, 
    x='GT.EDSS_cat', 
    y='result.EDSS',
    hue='GT.EDSS_cat',  # Added hue
    palette='viridis', 
    linewidth=1.5,
    legend=True         # Ensure legend is enabled
 )
 # 3. CUSTOMIZE PLOT
 plt.title('Distribution of result.EDSS by GT.EDSS Category', fontsize=18, pad=20)
 plt.xlabel('Ground Truth EDSS Category', fontsize=14)
 plt.ylabel('LLM Predicted EDSS', fontsize=14)
 # Move legend to the side or top
 plt.legend(title="EDSS Categories", bbox_to_anchor=(1.05, 1), loc='upper left')
 plt.xticks(rotation=45, ha='right', fontsize=10)
 plt.grid(True, axis='y', alpha=0.3)
 plt.tight_layout()
 plt.show()
 ##
 # %% Postproccessing Column names
 import pandas as pd
 # Read the TSV file
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Create a mapping dictionary for German to English column names
 column_mapping = {
    'EDSS':'GT.EDSS',
    'klassifizierbar': 'GT.klassifizierbar',
    'Sehvermögen': 'GT.VISUAL_OPTIC_FUNCTIONS',
    'Cerebellum': 'GT.CEREBELLAR_FUNCTIONS',
    'Hirnstamm': 'GT.BRAINSTEM_FUNCTIONS',
    'Sensibiliät': 'GT.SENSORY_FUNCTIONS',
    'Pyramidalmotorik': 'GT.PYRAMIDAL_FUNCTIONS',
    'Ambulation': 'GT.AMBULATION',
    'Cerebrale_Funktion': 'GT.CEREBRAL_FUNCTIONS',
    'Blasen-_und_Mastdarmfunktion': 'GT.BOWEL_AND_BLADDER_FUNCTIONS'
 }
 # Rename columns
 df = df.rename(columns=column_mapping)
 # Save the modified dataframe back to TSV file
 df.to_csv(file_path, sep='\t', index=False)
 print("Columns have been successfully renamed!")
 print("Renamed columns:")
 for old_name, new_name in column_mapping.items():
    if old_name in df.columns:
        print(f"  {old_name} -> {new_name}")
 ##
 # %% Styled table
 import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib.pyplot as plt
 import dataframe_image as dfi
 # Load data
 df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
 # 1. Identify all GT and result columns
 gt_columns = [col for col in df.columns if col.startswith('GT.')]
 result_columns = [col for col in df.columns if col.startswith('result.')]
 print("GT Columns found:", gt_columns)
 print("Result Columns found:", result_columns)
 # 2. Create proper mapping between GT and result columns
 # Handle various naming conventions (spaces, underscores, etc.)
 column_mapping = {}
 for gt_col in gt_columns:
    base_name = gt_col.replace('GT.', '')
    # Clean the base name for matching - remove spaces, underscores, etc.
    # Try different matching approaches
    candidates = [
        f'result.{base_name}',  # Exact match
        f'result.{base_name.replace(" ", "_")}',  # With underscores
        f'result.{base_name.replace("_", " ")}',  # With spaces
        f'result.{base_name.replace(" ", "")}',   # No spaces
        f'result.{base_name.replace("_", "")}'    # No underscores
    ]
    # Also try case-insensitive matching
    candidates.append(f'result.{base_name.lower()}')
    candidates.append(f'result.{base_name.upper()}')
    # Try to find matching result column
    matched = False
    for candidate in candidates:
        if candidate in result_columns:
            column_mapping[gt_col] = candidate
            matched = True
            break
    # If no exact match found, try partial matching
    if not matched:
        # Try to match by removing special characters and comparing
        base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
        for result_col in result_columns:
            result_base = result_col.replace('result.', '')
            result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
            if base_clean.lower() == result_clean.lower():
                column_mapping[gt_col] = result_col
                matched = True
                break
 print("Column mapping:", column_mapping)
 # 3. Faster, vectorized computation using the corrected mapping
 data_list = []
 for gt_col, result_col in column_mapping.items():
    print(f"Processing {gt_col} vs {result_col}")
    # Convert to numeric, forcing errors to NaN
    s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
    s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
    # Calculate matches (abs difference <= 0.5)
    diff = np.abs(s1 - s2)
    matches = (diff <= 0.5).sum()
    # Determine the denominator (total valid comparisons)
    valid_count = diff.notna().sum()
    if valid_count > 0:
        percentage = (matches / valid_count) * 100
    else:
        percentage = 0
    # Extract clean base name for display
    base_name = gt_col.replace('GT.', '')
    data_list.append({
        'GT': base_name,
        'Match %': round(percentage, 1)
    })
 # 4. Prepare Data
 match_df = pd.DataFrame(data_list)
 # Clean up labels: Replace underscores with spaces and capitalize
 match_df['GT'] = match_df['GT'].str.replace('_', ' ').str.title()
 match_df = match_df.sort_values('Match %', ascending=False)
 # 5. Create a "Beautiful" Table using Seaborn Heatmap
 def create_luxury_table(df, output_file="edss_agreement.png"):
    # Set the aesthetic style
    sns.set_theme(style="white", font="sans-serif")
    # Prepare data for heatmap
    plot_data = df.set_index('GT')[['Match %']]
    # Initialize the figure
    # Height is dynamic based on number of rows
    fig, ax = plt.subplots(figsize=(8, len(df) * 0.6))
    # Create a custom diverging color map (Deep Red -> Mustard -> Emerald)
    # This looks more professional than standard 'RdYlGn'
    cmap = sns.diverging_palette(15, 135, s=80, l=55, as_cmap=True)
    # Draw the heatmap
    sns.heatmap(
        plot_data,
        annot=True,
        fmt=".1f",
        cmap=cmap,
        center=85,      # Centers the color transition
        vmin=50, vmax=100, # Range of the gradient
        linewidths=2,
        linecolor='white',
        cbar=False,     # Remove color bar for a "table" look
        annot_kws={"size": 14, "weight": "bold", "family": "sans-serif"}
    )
    # Styling the Axes (Turning the heatmap into a table)
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.xaxis.tick_top() # Move "Match %" label to top
    ax.set_xticklabels(['Agreement (%)'], fontsize=14, fontweight='bold', color='#2c3e50')
    ax.tick_params(axis='y', labelsize=12, labelcolor='#2c3e50', length=0)
    # Add a thin border around the plot
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_color('#ecf0f1')
    plt.title('EDSS Subcategory Consistency Analysis', fontsize=16, pad=40, fontweight='bold', color='#2c3e50')
    # Add a subtle footer
    plt.figtext(0.5, 0.0, "Tolerance: ±0.5 points",
                wrap=True, horizontalalignment='center', fontsize=10, color='gray', style='italic')
    # Save with high resolution
    plt.tight_layout()
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    print(f"Beautiful table saved as {output_file}")
 # Execute
 create_luxury_table(match_df)
 # Run the function
 save_styled_table(match_df)
 # 6. Save as SVG
 plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
 print("Successfully saved agreement_table.svg")
 # Show plot if running in a GUI environment
 plt.show()
 ##
 # %% Time Plot
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 from scipy import stats
 # Load the TSV file
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Extract the inference_time_sec column
 inference_times = df['inference_time_sec'].dropna()  # Remove NaN values
 # Calculate statistics
 mean_time = inference_times.mean()
 std_time = inference_times.std()
 median_time = np.median(inference_times)
 # Create the histogram
 fig, ax = plt.subplots(figsize=(10, 6))
 # Create histogram with bins of 1 second width
 min_time = int(inference_times.min())
 max_time = int(inference_times.max()) + 1
 bins = np.arange(min_time, max_time + 1, 1)  # Bins of 1 second width
 # Create histogram with counts (not probability density)
 n, bins, patches = ax.hist(inference_times, bins=bins, color='lightblue', alpha=0.7, edgecolor='black', linewidth=0.5)
 # Generate Gaussian curve for fit
 x = np.linspace(inference_times.min(), inference_times.max(), 100)
 # Scale Gaussian to match histogram counts
 gaussian_counts = stats.norm.pdf(x, mean_time, std_time) * len(inference_times) * (bins[1] - bins[0])
 # Plot Gaussian fit
 ax.plot(x, gaussian_counts, color='red', linewidth=2, label=f'Gaussian Fit (μ={mean_time:.1f}s, σ={std_time:.1f}s)')
 # Add vertical lines for mean and median
 ax.axvline(mean_time, color='blue', linestyle='--', linewidth=2, label=f'Mean = {mean_time:.1f}s')
 ax.axvline(median_time, color='green', linestyle='--', linewidth=2, label=f'Median = {median_time:.1f}s')
 # Add standard deviation as vertical lines
 ax.axvline(mean_time + std_time, color='saddlebrown', linestyle=':', linewidth=1, alpha=0.7, label=f'+1σ = {mean_time + std_time:.1f}s')
 ax.axvline(mean_time - std_time, color='saddlebrown', linestyle=':', linewidth=1, alpha=0.7, label=f'-1σ = {mean_time - std_time:.1f}s')
 ax.set_xlabel('Inference Time (seconds)')
 ax.set_ylabel('Frequency')
 ax.set_title('Inference Time Distribution with Gaussian Fit')
 ax.legend()
 ax.grid(True, alpha=0.3)
 plt.tight_layout()
 plt.show()
 ##
 # %% Dashboard 
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 from datetime import datetime
 import numpy as np
 # Load the data
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Rename columns to remove 'result.' prefix and handle spaces
 column_mapping = {}
 for col in df.columns:
    if col.startswith('result.'):
        new_name = col.replace('result.', '')
        # Handle spaces in column names (replace with underscores if needed)
        new_name = new_name.replace(' ', '_')
        column_mapping[col] = new_name
 df = df.rename(columns=column_mapping)
 # Convert MedDatum to datetime
 df['MedDatum'] = pd.to_datetime(df['MedDatum'])
 # Check what columns actually exist in the dataset
 print("Available columns:")
 print(df.columns.tolist())
 print("\nFirst few rows:")
 print(df.head())
 # Hardcode specific patient names
 patient_names = ['bc55b1b2']
 # Define the functional systems (columns to plot) - adjust based on actual column names
 functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual']
 # Create subplots horizontally (2 columns, adjust rows as needed)
 num_plots = len(functional_systems)
 num_cols = 2
 num_rows = (num_plots + num_cols - 1) // num_cols  # Ceiling division
 fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=False)  # Changed sharex=False
 if num_plots == 1:
    axes = [axes]
 elif num_rows == 1:
    axes = axes
 else:
    axes = axes.flatten()
 # Plot for the hardcoded patient
 for i, system in enumerate(functional_systems):
    # Filter data for this specific patient
    patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum')
    # Check if patient data exists
    if patient_data.empty:
        print(f"No data found for patient: {patient_names[0]}")
        continue
    # Check if the system column exists in the data
    if system in patient_data.columns:
        # Plot the specific functional system
        if not patient_data[system].isna().all():
            axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system)
            axes[i].set_ylabel('Score')
            axes[i].set_title(f'Functional System: {system}')
            axes[i].grid(True, alpha=0.3)
            axes[i].legend()
        else:
            axes[i].set_title(f'Functional System: {system} (No data)')
            axes[i].set_ylabel('Score')
            axes[i].grid(True, alpha=0.3)
    else:
        # Try to find column with similar name (case insensitive)
        found_column = None
        for col in df.columns:
            if system.lower() in col.lower():
                found_column = col
                break
        if found_column:
            print(f"Found similar column: {found_column}")
            if not patient_data[found_column].isna().all():
                axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column)
                axes[i].set_ylabel('Score')
                axes[i].set_title(f'Functional System: {system} (found as: {found_column})')
                axes[i].grid(True, alpha=0.3)
                axes[i].legend()
        else:
            axes[i].set_title(f'Functional System: {system} (Column not found)')
            axes[i].set_ylabel('Score')
            axes[i].grid(True, alpha=0.3)
 # Hide empty subplots
 for i in range(len(functional_systems), len(axes)):
    axes[i].set_visible(False)
 # Set x-axis label for the last row only
 for i in range(len(functional_systems)):
    if i >= len(axes) - num_cols:  # Last row
        axes[i].set_xlabel('Date')
 # Force date formatting on all axes
 for ax in axes:
    ax.tick_params(axis='x', rotation=45)
    ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))
    ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
 # Automatically format x-axis dates
 plt.gcf().autofmt_xdate()
 plt.tight_layout()
 plt.show()
 ##
@@ -0,0 +1,135 @@
 import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib.pyplot as plt
 import dataframe_image as dfi
 # Load data
 df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
 # 1. Identify all GT and result columns
 gt_columns = [col for col in df.columns if col.startswith('GT.')]
 result_columns = [col for col in df.columns if col.startswith('result.')]
 print("GT Columns found:", gt_columns)
 print("Result Columns found:", result_columns)
 # 2. Create proper mapping between GT and result columns
 # Handle various naming conventions (spaces, underscores, etc.)
 column_mapping = {}
 for gt_col in gt_columns:
    base_name = gt_col.replace('GT.', '')
    # Clean the base name for matching - remove spaces, underscores, etc.
    # Try different matching approaches
    candidates = [
        f'result.{base_name}',  # Exact match
        f'result.{base_name.replace(" ", "_")}',  # With underscores
        f'result.{base_name.replace("_", " ")}',  # With spaces
        f'result.{base_name.replace(" ", "")}',   # No spaces
        f'result.{base_name.replace("_", "")}'    # No underscores
    ]
    # Also try case-insensitive matching
    candidates.append(f'result.{base_name.lower()}')
    candidates.append(f'result.{base_name.upper()}')
    # Try to find matching result column
    matched = False
    for candidate in candidates:
        if candidate in result_columns:
            column_mapping[gt_col] = candidate
            matched = True
            break
    # If no exact match found, try partial matching
    if not matched:
        # Try to match by removing special characters and comparing
        base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
        for result_col in result_columns:
            result_base = result_col.replace('result.', '')
            result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
            if base_clean.lower() == result_clean.lower():
                column_mapping[gt_col] = result_col
                matched = True
                break
 print("Column mapping:", column_mapping)
 # 3. Faster, vectorized computation using the corrected mapping
 data_list = []
 for gt_col, result_col in column_mapping.items():
    print(f"Processing {gt_col} vs {result_col}")
    # Convert to numeric, forcing errors to NaN
    s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
    s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
    # Calculate matches (abs difference <= 0.5)
    diff = np.abs(s1 - s2)
    matches = (diff <= 0.5).sum()
    # Determine the denominator (total valid comparisons)
    valid_count = diff.notna().sum()
    if valid_count > 0:
        percentage = (matches / valid_count) * 100
    else:
        percentage = 0
    # Extract clean base name for display
    base_name = gt_col.replace('GT.', '')
    data_list.append({
        'GT': base_name,
        'Match %': round(percentage, 1)
    })
 # 4. Prepare Data for Plotting
 match_df = pd.DataFrame(data_list)
 match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow
 # 5. Create the Styled Gradient Table
 def style_agreement_table(df):
    return (df.style
        .format({'Match %': '{:.1f}%'}) # Add % sign
        .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
        .set_properties(**{
            'text-align': 'center',
            'font-size': '12pt',
            'border-collapse': 'collapse',
            'border': '1px solid #D3D3D3'
        })
        .set_table_styles([
            # Style the header
            {'selector': 'th', 'props': [
                ('background-color', '#404040'), 
                ('color', 'white'),
                ('font-weight', 'bold'),
                ('text-transform', 'uppercase'),
                ('padding', '10px')
            ]},
            # Add hover effect
            {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
        ])
        .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
    )
 # To display in a Jupyter Notebook:
 styled_table = style_agreement_table(match_df)
 styled_table
 dfi.export(styled_table, "styled_table.png")
 #styled_table.to_html("agreement_report.html")
 # 6. Save as SVG
 #plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
 #print("Successfully saved agreement_table.svg")
 # Show plot if running in a GUI environment
 plt.show()
@@ -0,0 +1,74 @@
 import pandas as pd
 import numpy as np
 import seaborn as sns
 # Sample data (replace with your actual df)
 df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
 # Identify GT and Result columns
 gt_columns = [col for col in df.columns if col.startswith('GT.')]
 result_columns = [col for col in df.columns if col.startswith('result.')]
 # Create mapping
 column_mapping = {}
 for gt_col in gt_columns:
    base_name = gt_col.replace('GT.', '')
    result_col = f'result.{base_name}'
    if result_col in result_columns:
        column_mapping[gt_col] = result_col
 # Function to compute match percentage for each GT-Result pair
 def compute_match_percentages(df, column_mapping):
    percentages = []
    for gt_col, result_col in column_mapping.items():
        count = 0
        total = len(df)
        for _, row in df.iterrows():
            gt_val = row[gt_col]
            result_val = row[result_col]
            # Handle NaN values
            if pd.isna(gt_val) or pd.isna(result_val):
                continue
            # Handle non-numeric values
            try:
                gt_float = float(gt_val)
                result_float = float(result_val)
            except (ValueError, TypeError):
                # Skip rows with non-numeric values
                continue
            # Check if values are within 0.5 tolerance
            if abs(gt_float - result_float) <= 0.5:
                count += 1
        percentage = (count / total) * 100
        percentages.append({
            'GT_Column': gt_col,
            'Result_Column': result_col,
            'Match_Percentage': round(percentage, 1)
        })
    return pd.DataFrame(percentages)
 # Compute match percentages
 match_df = compute_match_percentages(df, column_mapping)
 # Create a pivot table for gradient display (optional but helpful)
 pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
 # Apply gradient background
 cm = sns.light_palette("green", as_cmap=True)
 styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
 # Display result
 print("Agreement Percentage Table (with gradient):")
 styled_table
 # Save the styled table to a file
 styled_table.to_html("agreement_report.html")
 print("Report saved to agreement_report.html")
@@ -1,31 +0,0 @@
 # Project Structure
 This project was reorganized into:
 - `data/`
  - `raw/`: original raw data, if retained locally
  - `processed/`: cleaned or derived input data
  - `ground_truth/`: manually annotated reference data
  - `external/`: externally provided data
 - `prompts/`
  - EDSS instructions and prompt/schema assets
 - `scripts/`
  - runnable analysis and plotting scripts
 - `results/`
  - `benchmark_runs/`: full model benchmark runs
  - `final_results/`: final selected model outputs
  - `figures/`: generated figures
  - `tables/`: generated tables
  - `logs/`: terminal logs
 - `manuscript/`
  - final figures and tables for paper/thesis writing
 - `archive/`
  - old scripts, old results, temporary files, and unclear legacy files
 Important:
 The reorganization was performed after creating a full timestamped backup.
@@ -216,3 +216,6 @@ if __name__ == "__main__":
 # %% name
 eXXXXXXXX
 ##
@@ -263,120 +263,3 @@ plt.legend(frameon=False, loc='upper center', bbox_to_anchor=(0.5, -0.05))
 plt.tight_layout()
 plt.show()
 ##
 # %% name
 import matplotlib.pyplot as plt
 # Data
 data = {
    'Visit': [9, 8, 7, 6, 5, 4, 3, 2, 1],
    'patient_count': [2, 3, 3, 6, 13, 17, 28, 24, 32]
 }
 # Create figure and axis
 fig, ax = plt.subplots(figsize=(10, 6))
 # Plot the bar chart
 bars = ax.bar(data['Visit'], data['patient_count'], color='darkblue', label='Patients by Visit Count')
 # Add labels and title
 ax.set_xlabel('Visit Number (from last to first)', fontsize=12)
 ax.set_ylabel('Number of Patients', fontsize=12)
 ax.set_title('Patient Visits by Visit Number', fontsize=14)
 # Invert x-axis to show Visit 9 on the left (descending order) if desired, but keep natural order (1–9 left to right)
 # For descending order (9→1 from left to right), we'd need to reverse:
 # Visit = data['Visit'][::-1], patient_count = data['patient_count'][::-1]
 # But standard practice is ascending (1 to 9), so we'll sort accordingly:
 # Let's sort by Visit to ensure left-to-right: 1,2,...,9
 # Actually, your current Visit list is [9,8,...,1], which is descending.
 # Let's sort by Visit for intuitive left-to-right increasing order:
 sorted_indices = sorted(range(len(data['Visit'])), key=lambda i: data['Visit'][i])
 visit_sorted = [data['Visit'][i] for i in sorted_indices]
 count_sorted = [data['patient_count'][i] for i in sorted_indices]
 # Re-plot with sorted x-axis:
 ax.clear()
 bars = ax.bar(visit_sorted, count_sorted, color='darkblue', label='Patients by Visit Count')
 # Re-apply labels, etc.
 ax.set_xlabel('Number of Visits', fontsize=12)
 ax.set_ylabel('Number of Unique Patients', fontsize=12)
 #ax.set_title('Number of Patients by Visit Number', fontsize=14)
 # Add legend
 ax.legend()
 # Improve layout and grid
 ax.grid(axis='y', linestyle='--', alpha=0.7)
 plt.xticks(visit_sorted)  # Ensure all integer visit numbers are shown
 # Show the plot
 plt.tight_layout()
 plt.show()
 ##
 # %% Patientjourney Bubble chart
 import matplotlib.pyplot as plt
 import numpy as np
 import matplotlib as mpl
 mpl.rcParams["font.family"] = "DejaVu Sans"   # or "Arial", "Calibri", "Times New Roman", ...
 mpl.rcParams["font.size"] = 12                # default size for text
 mpl.rcParams["axes.titlesize"] = 14
 mpl.rcParams["axes.titleweight"] = "bold"
 # Data (your counts)
 visits = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
 patient_count = np.array([32, 24, 28, 17, 13, 6, 3, 3, 2])
 # "Remaining" = patients with >= that many visits (cumulative from the right)
 remaining = np.array([patient_count[i:].sum() for i in range(len(patient_count))])
 # --- Plot ---
 fig, ax = plt.subplots(figsize=(12, 3))
 y = 0.0  # all bubbles on one horizontal line
 # Horizontal line
 ax.hlines(y, visits.min() - 0.4, visits.max() + 0.4, color="#1f77b4", linewidth=3)
 # Bubble sizes (scale as needed)
 # (Matplotlib scatter uses area in points^2)
 sizes = patient_count * 35  # tweak this multiplier if you want bigger/smaller bubbles
 ax.scatter(visits, np.full_like(visits, y), s=sizes, color="#1f77b4", zorder=3)
 # Title
 #ax.set_title("Patient Journey by Visit Count", fontsize=14, pad=18)
 # Top labels: "1 visits", "2 visits", ...
 for x in visits:
    label = f"{x} visit" if x == 1 else f"{x} visits"
    ax.text(x, y + 0.18, label, ha="center", va="bottom", fontsize=10)
 # Bottom labels: "X patients" and "Y remaining"
 for x, pc, rem in zip(visits, patient_count, remaining):
    ax.text(x, y - 0.20, f"{pc} patients", ha="center", va="top", fontsize=9)
    ax.text(x, y - 0.32, f"{rem} remaining", ha="center", va="top", fontsize=9)
 # Cosmetics: remove axes, keep spacing nice
 ax.set_xlim(visits.min() - 0.6, visits.max() + 0.6)
 ax.set_ylim(-0.5, 0.35)
 ax.set_xticks([])
 ax.set_yticks([])
 for spine in ax.spines.values():
    spine.set_visible(False)
 plt.tight_layout()
 plt.show()
 plt.savefig("patient_journey.svg", format="svg", bbox_inches="tight")
 ##
@@ -1,384 +0,0 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # ============================================================
 # Organize Barcelona EDSS project safely
 # - Creates a timestamped backup first
 # - Creates a cleaner folder structure
 # - Moves files conservatively
 # - Does NOT delete anything
 # ============================================================
 PROJECT_ROOT="$(pwd)"
 TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
 BACKUP_PARENT="${PROJECT_ROOT}/../Barcelona_backups"
 BACKUP_DIR="${BACKUP_PARENT}/Barcelona_backup_${TIMESTAMP}"
 echo "Project root: ${PROJECT_ROOT}"
 echo "Backup dir:   ${BACKUP_DIR}"
 echo
 # ------------------------------------------------------------
 # Safety checks
 # ------------------------------------------------------------
 if [ ! -f "${PROJECT_ROOT}/README.md" ]; then
    echo "WARNING: README.md not found. Are you sure you are in the project root?"
    echo "Current directory: ${PROJECT_ROOT}"
    read -r -p "Continue anyway? [y/N] " answer
    case "$answer" in
        y|Y|yes|YES) ;;
        *) echo "Aborted."; exit 1 ;;
    esac
 fi
 if [ -d "${PROJECT_ROOT}/.git" ]; then
    if ! git diff --quiet || ! git diff --cached --quiet; then
        echo "ERROR: Git working tree is not clean."
        echo "Please commit or stash changes before organizing."
        exit 1
    fi
 fi
 echo "This script will:"
 echo "1. Create a full backup."
 echo "2. Create organized folders."
 echo "3. Move files into data/, prompts/, scripts/, results/, archive/."
 echo "4. Keep your original files in the backup."
 echo
 read -r -p "Proceed? [y/N] " answer
 case "$answer" in
    y|Y|yes|YES) ;;
    *) echo "Aborted."; exit 1 ;;
 esac
 # ------------------------------------------------------------
 # Backup
 # ------------------------------------------------------------
 mkdir -p "${BACKUP_PARENT}"
 echo
 echo "Creating backup..."
 rsync -a \
    --exclude "enarcelona/" \
    --exclude "env/" \
    --exclude ".venv/" \
    --exclude "__pycache__/" \
    "${PROJECT_ROOT}/" "${BACKUP_DIR}/"
 echo "Backup created at:"
 echo "${BACKUP_DIR}"
 # ------------------------------------------------------------
 # Create target structure
 # ------------------------------------------------------------
 echo
 echo "Creating new directory structure..."
 mkdir -p \
    data/raw \
    data/processed \
    data/ground_truth \
    data/external \
    prompts \
    scripts \
    results/benchmark_runs \
    results/final_results/model_outputs \
    results/figures \
    results/tables \
    results/logs \
    manuscript/figures \
    manuscript/tables \
    archive/old_scripts \
    archive/old_results \
    archive/tmp \
    archive/old_data \
    archive/old_project_files
 # ------------------------------------------------------------
 # Helper move functions
 # ------------------------------------------------------------
 move_if_exists() {
    src="$1"
    dest="$2"
    if [ -e "$src" ]; then
        mkdir -p "$(dirname "$dest")"
        if [ -e "$dest" ]; then
            echo "SKIP: destination exists: $dest"
        else
            echo "MOVE: $src -> $dest"
            mv "$src" "$dest"
        fi
    fi
 }
 move_glob_if_exists() {
    pattern="$1"
    dest_dir="$2"
    mkdir -p "$dest_dir"
    shopt -s nullglob
    files=( $pattern )
    shopt -u nullglob
    for f in "${files[@]}"; do
        base="$(basename "$f")"
        dest="${dest_dir}/${base}"
        if [ -e "$dest" ]; then
            echo "SKIP: destination exists: $dest"
        else
            echo "MOVE: $f -> $dest"
            mv "$f" "$dest"
        fi
    done
 }
 # ------------------------------------------------------------
 # Move prompts / attached instruction files
 # ------------------------------------------------------------
 echo
 echo "Moving prompt and instruction files..."
 move_if_exists "attach/Komplett.txt" "prompts/Komplett.txt"
 move_if_exists "attach/just_edss_schema.gbnf" "prompts/just_edss_schema.gbnf"
 move_if_exists "attach/just_edss_text.txt" "prompts/just_edss_text.txt"
 # Move leftover attach folder if empty or archive it
 if [ -d "attach" ]; then
    if [ -z "$(ls -A attach)" ]; then
        rmdir attach
    else
        move_if_exists "attach" "archive/old_project_files/attach"
    fi
 fi
 # ------------------------------------------------------------
 # Move important data files
 # ------------------------------------------------------------
 echo
 echo "Moving data files..."
 move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv" \
    "data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.csv"
 move_if_exists "Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv" \
    "data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
 move_if_exists "Data/Join_edssandsub.tsv" \
    "data/ground_truth/Join_edssandsub.tsv"
 move_if_exists "Data/GT_Numbers.csv" \
    "data/ground_truth/GT_Numbers.csv"
 move_if_exists "Data/Annika1.csv" \
    "data/ground_truth/Annika1.csv"
 move_if_exists "Data/comparison.tsv" \
    "data/ground_truth/comparison.tsv"
 move_if_exists "Data/edss_distribution_summary.csv" \
    "data/processed/edss_distribution_summary.csv"
 move_if_exists "Data/empirical_confidence_table.csv" \
    "data/processed/empirical_confidence_table.csv"
 move_if_exists "Data/functional_system_colors.json" \
    "data/processed/functional_system_colors.json"
 move_if_exists "Data/Test.csv" \
    "archive/tmp/Test.csv"
 move_if_exists "Data/Hernan" \
    "data/external/Hernan"
 move_if_exists "Data/iteration" \
    "archive/old_data/iteration"
 # Old generated JSON/results from Data folder
 move_glob_if_exists "Data/*results*.json" "archive/old_results"
 move_glob_if_exists "Data/join_*.tsv" "archive/old_results"
 # Move remaining Data folder if anything left
 if [ -d "Data" ]; then
    if [ -z "$(ls -A Data)" ]; then
        rmdir Data
    else
        move_if_exists "Data" "archive/old_data/Data_remaining"
    fi
 fi
 # ------------------------------------------------------------
 # Move benchmark results
 # ------------------------------------------------------------
 echo
 echo "Moving benchmark results..."
 if [ -d "results_edss_benchmark" ]; then
    move_glob_if_exists "results_edss_benchmark/run_*" "results/benchmark_runs"
    move_if_exists "results_edss_benchmark/endresults" \
        "results/final_results/model_outputs"
    move_if_exists "results_edss_benchmark/confusion_matrices" \
        "results/figures/confusion_matrices"
    if [ -z "$(ls -A results_edss_benchmark 2>/dev/null || true)" ]; then
        rmdir results_edss_benchmark
    else
        move_if_exists "results_edss_benchmark" \
            "archive/old_results/results_edss_benchmark_remaining"
    fi
 fi
 # ------------------------------------------------------------
 # Move old/general results
 # ------------------------------------------------------------
 echo
 echo "Moving existing results files..."
 if [ -d "results" ]; then
    # Figures
    move_glob_if_exists "results/*.png" "results/figures"
    move_glob_if_exists "results/*.PNG" "results/figures"
    move_glob_if_exists "results/*.jpg" "results/figures"
    move_glob_if_exists "results/*.jpeg" "results/figures"
    move_glob_if_exists "results/*.svg" "results/figures"
    # Tables
    move_glob_if_exists "results/*.csv" "results/tables"
    move_glob_if_exists "results/*.tsv" "results/tables"
    move_glob_if_exists "results/*.xlsx" "results/tables"
    # Subfolders that look like old results
    move_if_exists "results/Jan_visual" "archive/old_results/Jan_visual"
    move_if_exists "results/Lab_meeting" "archive/old_results/Lab_meeting"
    move_if_exists "results/just_edss" "archive/old_results/just_edss"
 fi
 # Root-level result tables
 move_if_exists "edss_distribution_summary.csv" \
    "results/tables/edss_distribution_summary.csv"
 # Logs
 move_if_exists "edss_benchmark_terminal.log" \
    "results/logs/edss_benchmark_terminal.log"
 # ------------------------------------------------------------
 # Move scripts
 # ------------------------------------------------------------
 echo
 echo "Moving scripts..."
 move_if_exists "audit.py" "scripts/audit_outputs.py"
 move_if_exists "certainty.py" "scripts/analyze_certainty.py"
 move_if_exists "certainty_show.py" "scripts/certainty_show.py"
 move_if_exists "figure1.py" "scripts/figure1.py"
 move_if_exists "show_plots.py" "scripts/show_plots.py"
 move_if_exists "show_plots.py.orig" "archive/old_scripts/show_plots.py.orig"
 # Apps / old entry points
 move_if_exists "app.py" "archive/old_scripts/app.py"
 move_if_exists "total_app.py" "archive/old_scripts/total_app.py"
 # Existing project visuals folder
 move_if_exists "project/visuals" "results/figures/project_visuals"
 if [ -d "project" ]; then
    if [ -z "$(ls -A project)" ]; then
        rmdir project
    else
        move_if_exists "project" "archive/old_project_files/project"
    fi
 fi
 # ------------------------------------------------------------
 # Environment folder
 # ------------------------------------------------------------
 echo
 echo "Handling virtual environment..."
 if [ -d "enarcelona" ]; then
    echo "Leaving virtual environment in place: enarcelona/"
    echo "It should remain ignored by .gitignore."
 fi
 # ------------------------------------------------------------
 # Create README notes
 # ------------------------------------------------------------
 echo
 echo "Writing organization notes..."
 cat > "PROJECT_STRUCTURE.md" <<'EOF'
 # Project Structure
 This project was reorganized into:
 - `data/`
  - `raw/`: original raw data, if retained locally
  - `processed/`: cleaned or derived input data
  - `ground_truth/`: manually annotated reference data
  - `external/`: externally provided data
 - `prompts/`
  - EDSS instructions and prompt/schema assets
 - `scripts/`
  - runnable analysis and plotting scripts
 - `results/`
  - `benchmark_runs/`: full model benchmark runs
  - `final_results/`: final selected model outputs
  - `figures/`: generated figures
  - `tables/`: generated tables
  - `logs/`: terminal logs
 - `manuscript/`
  - final figures and tables for paper/thesis writing
 - `archive/`
  - old scripts, old results, temporary files, and unclear legacy files
 Important:
 The reorganization was performed after creating a full timestamped backup.
 EOF
 # ------------------------------------------------------------
 # Final checks
 # ------------------------------------------------------------
 echo
 echo "Organization complete."
 echo
 echo "Backup is here:"
 echo "${BACKUP_DIR}"
 echo
 echo "New top-level structure:"
 find . -maxdepth 2 -type d | sort
 echo
 if [ -d ".git" ]; then
    echo "Git status:"
    git status --short
 fi
 echo
 echo "Next recommended commands:"
 echo "  git status"
 echo "  git add ."
 echo "  git commit -m \"Reorganize project structure\""
@@ -1,481 +0,0 @@
 1 VISUAL OPTIC FUNCTIONS
 VISUAL ACUITY
 The visual acuity score is based on the line in the Snellen chart at 20 feet 5 meters
 for which the patient makes no more than one error using best available correction
 Alternatively best corrected near vision can be assessed but this should be noted and
 consistently performed during follow up examinations Switching from near to distance
 visual acuity measurements should be avoided in follow up examinations
 VISUAL FIELDS
 0 normal
 1 signs only deficits present only on formal confrontational testing
 2 moderate patient aware of deficit but incomplete hemianopsia on examination
 3 marked complete homonymous hemianopsia or equivalent
 SCOTOMA
 0 none
 1 small detectable only on formal confrontational testing
 2 large spontaneously reported by patient
 * DISC PALLOR
 0 not present
 1 present
 NOTE
 When determining the EDSS step the Visual FS score must be converted to a lower
 score as follows
 Visual FS Score 6 5 4 3 2 1
 Converted Visual FS Score 4 3 3 2 2 1
 FUNCTIONAL SYSTEM SCORE
 0 normal
 1 disc pallor and or small scotoma and or visual acuity corrected of worse eye less than 20 20 1.0 but better than 20 30 0.67
 2 worse eye with maximal visual acuity corrected of 20 30 to 20 59 0.67 – 0.34
 3 worse eye with large scotoma and or moderate decrease in fields and or maximal visual acuity corrected of 20 60 to 20 99 0.33 – 0.21
 4 worse eye with marked decrease of fields and or maximal visual acuity corrected of 20 100 to 20 200 0.2 – 0.1 grade 3 plus maximal acuity of better eye of 20 60 0.33 or less
 5 worse eye with maximal visual acuity corrected less than 20 200 0.1 grade 4 plus maximal acuity of better eye of 20 60 0.33 or less
 6 grade 5 plus maximal acuity of better eye of 20 60 0.33 or less *  =  optional part of the examination
 ### BRAINSTEM FUNCTIONS
 **DYSARTHRIA**
 - **0**: None
 - **1**: Signs only
 - **2**: Mild: Clinically detectable, patient is aware
 - **3**: Moderate: Obvious during conversation, impairs comprehension
 - **4**: Marked: Incomprehensible speech
 - **5**: Inability to speak
 **DYSPHAGIA**
 - **0**: None
 - **1**: Signs only
 - **2**: Mild: Difficulty with thin liquids
 - **3**: Moderate: Difficulty with liquids and solid food
 - **4**: Marked: Sustained difficulty, requires pureed diet
 - **5**: Inability to swallow
 **OTHER CRANIAL NERVE FUNCTIONS**
 - **0**: Normal
 - **1**: Signs only
 - **2**: Mild disability: Clinically detectable deficit, patient is usually aware
 - **3**: Moderate disability
 - **4**: Marked disability
 **EXTRAOCULAR MOVEMENTS (EOM) IMPAIRMENT**
 - **0**: None
 - **1**: Signs only: Subtle EOM weakness, no complaints of vision issues
 - **2**: Mild: Subtle EOM weakness or obvious incomplete paralysis not noticed by patient
 - **3**: Moderate: Obvious incomplete paralysis noticed by patient or complete loss in one direction
 - **4**: Marked: Complete loss in more than one direction
 **NYSTAGMUS**
 - **0**: None
 - **1**: Signs only or mild: Gaze-evoked nystagmus below moderate limits (equivalent to Brainstem FS score of 1)
 - **2**: Moderate: Sustained nystagmus on horizontal/vertical gaze at 30 degrees, patient may not notice
 - **3**: Severe: Nystagmus in primary position or coarse persistent nystagmus interfering with vision; complete internuclear ophthalmoplegia; oscillopsia
 **TRIGEMINAL DAMAGE**
 - **0**: None
 - **1**: Signs only
 - **2**: Mild: Clinically detectable numbness, patient is aware
 - **3**: Moderate: Impaired sharp/dull discrimination in one to three branches or trigeminal neuralgia (at least one recent attack)
 - **4**: Marked: Unable to discriminate between sharp/dull or complete loss of sensation in one or both nerves
 **FACIAL WEAKNESS**
 - **0**: None
 - **1**: Signs only
 - **2**: Mild: Clinically detectable weakness, patient is aware
 - **3**: Moderate: Incomplete facial palsy (e.g., eye closure requires patching, drooling)
 - **4**: Marked: Complete unilateral or bilateral facial palsy with lagophthalmus or difficulty with liquids
 **HEARING LOSS**
 - **0**: None
 - **1**: Signs only: Hears finger rub less on one/both sides, lateralized Weber test but no complaints
 - **2**: Mild: As in 1, aware of hearing problem
 - **3**: Moderate: Does not hear finger rub on one/both sides, misses several whispered numbers
 - **4**: Marked: Misses all or nearly all whispered numbers
 **FUNCTIONAL SYSTEM SCORE**
 - **0**: Normal
 - **1**: Signs only
 - **2**: Moderate nystagmus/EOM impairment/other mild disability
 - **3**: Severe nystagmus/marked EOM impairment/moderate other cranial nerve disability
 - **4**: Marked dysarthria/other marked disability
 - **5**: Inability to swallow or speak
 ### PYRAMIDAL FUNCTIONS
 #### REFLEXES
 - **0**: Absent
 - **1**: Diminished
 - **2**: Normal
 - **3**: Exaggerated
 - **4**: Nonsustained clonus (a few beats of clonus)
 - **5**: Sustained clonus
 ##### Cutaneous Reflexes
 - **0**: Normal
 - **1**: Weak
 - **2**: Absent
 ###### Palmomental Reflex
 - **0**: Absent
 - **1**: Present
 ###### Plantar Response
 - **0**: Flexor
 - **1**: Neutral or equivocal
 - **2**: Extensor
 #### LIMB STRENGTH
 The weakest muscle in each group defines the score for that muscle group. Optional functional tests (hopping on one foot and walking on heels/toes) are recommended for BMRC grades 3–5.
 ##### BMRC Rating Scale
 - **0**: No muscle contraction detected
 - **1**: Visible contraction without visible joint movement
 - **2**: Visible movement only on the plane of gravity
 - **3**: Active movement against gravity, but not against resistance
 - **4**: Active movement against resistance, but not full strength
 - **5**: Normal strength
 #### FUNCTIONAL TESTS
 ##### Pronator Drift (Upper Extremities)
 Pronation and downward drift:
 - **0**: None
 - **1**: Mild
 - **2**: Evident
 ##### Position Test (Lower Extremities)
 Ask patient to lift both legs together, with legs fully extended at the knee. Sinking:
 - **0**: None
 - **1**: Mild
 - **2**: Evident
 - **3**: Able to lift only one leg at a time (grade from the horizontal position at the hip joints in degrees)
 - **4**: Unable to lift one leg at a time
 ##### Walking on Heels/Toes
 - **0**: Normal
 - **1**: Impaired
 - **2**: Not possible
 ##### Hopping on One Foot
 - **0**: Normal
 - **1**: 6–10 times
 - **2**: 1–5 times
 - **3**: Not possible
 #### LIMB SPASTICITY (AFTER RAPID FLEXION OF THE EXTREMITY)
 - **0**: None
 - **1**: Mild: barely increased muscle tone
 - **2**: Moderate: moderately increased muscle tone that can be overcome; full range of motion is possible
 - **3**: Severe: severely increased muscle tone that is extremely difficult to overcome; full range of motion is not possible
 - **4**: Contracted
 #### GAIT SPASTICITY
 - **0**: None
 - **1**: Barely perceptible
 - **2**: Evident: minor interference with function
 - **3**: Permanent shuffling: major interference with function
 #### OVERALL MOTOR PERFORMANCE
 - **0**: Normal
 - **1**: Abnormal weakness (as compared to peers) in performing more demanding tasks, e.g., walking longer distances; no reduction in limb strength on formal testing
 - **2**: Reduction in strength of individual muscle groups at confrontational testing
 #### FUNCTIONAL SYSTEM SCORE
 - **0**: Normal
 - **1**: Abnormal signs without disability
 - **2**: Minimal disability: patient complains of motor-fatigability or reduced performance in strenuous motor tasks (motor performance grade 1) and/or BMRC grade 4 in one or two muscle groups
 - **3**: Mild to moderate paraparesis or hemiparesis: usually BMRC grade 4 in more than two muscle groups; and/or BMRC grade 3 in one or two muscle groups (movements against gravity
 are possible); and/or severe monoparesis: BMRC grade 2 or less in one muscle group
 - **4**: Marked paraparesis or hemiparesis: usually BMRC grade 2 in two limbs or monoplegia with BMRC grade 0 or 1 in one limb; and/or moderate tetraparesis: BMRC grade 3 in three or more limbs
 - **5**: Paraplegia: BMRC grade 0 or 1 in all muscle groups of the lower limbs; and/or marked tetraparesis: BMRC grade 2 or less in three or more limbs; and/or hemiplegia
 - **6**: Tetraplegia: BMRC grade 0 or 1 in all muscle groups of the upper and lower limbs
 ### CEREBELLAR FUNCTIONS
 #### HEAD TREMOR
 - **0**: none
 - **1**: mild
 - **2**: moderate
 - **3**: severe
 #### TRUNCAL ATAXIA
 - **0**: none
 - **1**: signs only
 - **2**: mild (swaying with eyes closed)
 - **3**: moderate (swaying with eyes open)
 - **4**: severe (unable to sit without assistance)
 #### LIMB ATAXIA (TREMOR / DYSMETRIA AND RAPID ALTERNATING MOVEMENTS)
 - **0**: none
 - **1**: signs only
 - **2**: mild (tremor or clumsy movements easily seen, minor interference with function)
 - **3**: moderate (tremor or clumsy movements interfere with function in all spheres)
 - **4**: severe (most functions are very difficult)
 #### TANDEM (STRAIGHT LINE) WALKING
 - **0**: normal
 - **1**: impaired
 - **2**: not possible
 #### GAIT ATAXIA
 - **0**: none
 - **1**: signs only
 - **2**: mild (problems with balance realized by patient and/or significant other)
 - **3**: moderate (abnormal balance with ordinary walking)
 - **4**: severe (unable to walk more than a few steps unassisted or requires a walking aid or assistance due to ataxia)
 #### ROMBERG TEST
 - **0**: normal
 - **1**: mild (mild instability with eyes closed)
 - **2**: moderate (not stable with eyes closed)
 - **3**: severe (not stable with eyes open)
 #### OTHER CEREBELLAR TESTS
 - **0**: normal
 - **1**: mild abnormality
 - **2**: moderate abnormality
 - **3**: severe abnormality
 **NOTE:**
 - The presence of severe gait and/or truncal ataxia alone (without severe ataxia in three or four limbs) results in a Cerebellar FS score of 3.
 - If weakness or sensory deficits interfere with the testing of ataxia, score the patient’s actual performance. Indicate the possible role of weakness by marking an "X" after the
 affected subsystems and Cerebellar FS score.
 #### FUNCTIONAL SYSTEM SCORE
 - **0**: normal
 - **1**: abnormal signs without disability
 - **2**: mild ataxia and/or moderate station ataxia (Romberg) and/or tandem walking not possible
 - **3**: moderate limb ataxia and/or moderate or severe gait/truncal ataxia
 - **4**: severe gait/truncal ataxia and severe ataxia in three or four limbs
 - **5**: unable to perform coordinated movements due to ataxia
 - **X**: pyramidal weakness (BMRC grade 3 or worse in limb strength) or sensory deficits interfere with cerebellar testing
 ### SENSORY FUNCTIONS
 #### SUPERFICIAL SENSATION (LIGHT TOUCH AND PAIN)
 - **0**: normal
 - **1**: signs only (slightly diminished sensation on formal testing, patient not aware)
 - **2**: mild (patient aware of impaired light touch or pain but can discriminate sharp/dull)
 - **3**: moderate (impaired discrimination of sharp/dull)
 - **4**: marked (unable to discriminate between sharp/dull and/or unable to feel light touch)
 - **5**: complete loss (anesthesia)
 #### VIBRATION SENSE (AT THE MOST DISTAL JOINT)
 - **0**: normal
 - **1**: mild (graded tuning fork 5–7 of 8; detects more than 10 seconds but less than examiner)
 - **2**: moderate (graded tuning fork 1–4 of 8; detects between 2 and 10 sec.)
 - **3**: marked (complete loss of vibration sense)
 #### POSITION SENSE
 - **0**: normal
 - **1**: mild (1–2 incorrect responses, only distal joints affected)
 - **2**: moderate (misses many movements of fingers or toes; proximal joints affected)
 - **3**: marked (no perception of movement, astasia)
 * **LHERMITTE’S SIGN** (does not contribute to the Sensory FS score)
  - **0**: negative
  - **1**: positive
 * **PARAESTHESIAE (TINGLING)** (does not contribute to the Sensory FS score)
  - **0**: none
  - **1**: present
 #### FUNCTIONAL SYSTEM SCORE
 - **0**: normal
 - **1**: impaired superficial sensation in one or two limbs
 - **2**: mild impairment in more than two limbs, no major proprioceptive deficits
 - **3**: moderate impairment in more than two limbs with minor proprioceptive deficits
 - **4**: severe impairment in more than two limbs with significant proprioceptive deficits
 - **5**: loss of sensation in one or two limbs, significant proprioceptive deficits in most of the body below the head
 - **6**: essentially no sensation below the head
 ### BOWEL AND BLADDER FUNCTIONS
 #### URINARY HESITANCY AND RETENTION
 - **0**: none
 - **1**: mild (no major impact on lifestyle)
 - **2**: moderate (urinary retention; frequent urinary tract infections)
 - **3**: severe (requires catheterization)
 - **4**: loss of function (overflow incontinence)
 #### URINARY URGENCY AND INCONTINENCE
 - **0**: none
 - **1**: mild (no major impact on lifestyle)
 - **2**: moderate (rare incontinence occurring no more than once a week; must wear pads)
 - **3**: severe (frequent incontinence occurring from several times a week to more than once a day; must wear urinal or pads)
 - **4**: loss of function (loss of bladder control)
 #### BLADDER CATHETERIZATION
 - **0**: none
 - **1**: intermittent self-catheterization
 - **2**: constant catheterization
 #### BOWEL DYSFUNCTION
 - **0**: none
 - **1**: mild (no incontinence, no major impact on lifestyle, mild constipation)
 - **2**: moderate (must wear pads or alter lifestyle to be near lavatory)
 - **3**: severe (in need of enemas or manual measures to evacuate bowels)
 - **4**: complete loss of function
 #### SEXUAL DYSFUNCTION
 **Male**
 - **0**: none
 - **1**: mild (difficulty maintaining erection during intercourse, but achieves erection and still has intercourse)
 - **2**: moderate (difficulty achieving erection, decreased libido, still has intercourse and reaches orgasm)
 - **3**: severe (marked decrease in libido, inability to achieve full erection, intercourse with difficulty, hypoorgasmia)
 - **4**: loss of function
 **Female**
 - **0**: none
 - **1**: mild (mild lack of lubrication, still sexually active and reaches orgasm)
 - **2**: moderate (dyspareunia, hypoorgasmia, decrease in sexual activity)
 - **3**: severe (marked decrease in sexual activity, anorgasmia)
 - **4**: loss of function
 **NOTE**
 When determining the EDSS step, the Bowel and Bladder FS score must be converted to a lower score as follows:
 - Bowel and Bladder FS Score: 6 → Converted Bowel and Bladder FS Score: 5
 - Bowel and Bladder FS Score: 5 → Converted Bowel and Bladder FS Score: 4
 - Bowel and Bladder FS Score: 4 → Converted Bowel and Bladder FS Score: 3
 - Bowel and Bladder FS Score: 3 → Converted Bowel and Bladder FS Score: 3
 - Bowel and Bladder FS Score: 2 → Converted Bowel and Bladder FS Score: 2
 - Bowel and Bladder FS Score: 1 → Converted Bowel and Bladder FS Score: 1
 Sexual dysfunction can be documented but generally does not impact the FS score due to assessment difficulties by examining physicians.
 ### FUNCTIONAL SYSTEM SCORE
 - **0**: normal
 - **1**: mild urinary hesitancy, urgency, and/or constipation
 - **2**: moderate urinary hesitancy/retention and/or moderate urinary urgency/incontinence and/or moderate bowel dysfunction
 - **3**: frequent urinary incontinence or intermittent self-catheterization; needs enemas or manual measures to evacuate bowels
 - **4**: in need of almost constant catheterization
 - **5**: loss of bladder or bowel function (external or indwelling catheter)
 - **6**: loss of bowel and bladder function
 ### CEREBRAL FUNCTIONS
 #### DEPRESSION AND EUPHORIA
 - **0**: none
 - **1**: present (Patient complains of depression or is considered depressed or euphoric by the investigator or significant other.)
 **Note**: Depression and Euphoria are documented on the scoring sheet but are not taken into consideration for FS and EDSS calculation.
 #### DECREASE IN MENTATION
 - **0**: none
 - **1**: signs only (not apparent to patient and/or significant other)
 - **2**: mild (Patient and/or significant other report mild changes in mentation. Examples include: impaired ability to follow a rapid course of association or survey complex matters;
 impaired judgment in certain demanding situations; capable of handling routine daily activities, but unable to tolerate additional stressors; intermittently symptomatic even with
 normal levels of stress; reduced performance; tendency toward negligence due to obliviousness or fatigue.)
 - **3**: moderate (Definite abnormalities on brief mental status testing, but still oriented to person, place, and time)
 - **4**: marked (Not oriented in one or two spheres (person, place, or time); marked effect on lifestyle)
 - **5**: dementia, confusion, and/or complete disorientation
 #### FATIGUE
 - **0**: none
 - **1**: mild (Does not usually interfere with daily activities)
 - **2**: moderate (Interferes but does not limit daily activities for more than 50%)
 - **3**: severe (Significant limitation in daily activities (> 50% reduction))
 **Note**: Because fatigue is difficult to evaluate objectively, in some studies it does not contribute to the Cerebral FS score or EDSS step. Please adhere to the study’s specific
 instructions.
 ### FUNCTIONAL SYSTEM SCORE
 - **0**: normal
 - **1**: signs only in decrease in mentation; mild fatigue
 - **2**: mild decrease in mentation; moderate or severe fatigue
 - **3**: moderate decrease in mentation
 - **4**: marked decrease in mentation
 - **5**: dementia
 ### AMBULATION
 **Unrestricted Ambulation**
 - The patient can walk a normal distance without assistance, comparable to healthy individuals of similar age and physical condition.
 - EDSS step can range from 0 to 5.0, depending on the Functional System (FS) scores.
 **Fully Ambulatory**
 - At least 500 meters of ambulation without assistance, but not unrestricted.
 - EDSS step can range from 2.0 to 5.0, depending on FS scores.
 - The Pyramidal and/or Cerebellar FS must be ≥ 2 to reflect this restriction in ambulation.
 **Ambulation < 500 Meters**
 - If the walking distance is less than 500 meters, the EDSS step must be ≥ 4.5, depending on the walking ranges provided by the ambulation score and combination of FS scores.
 - EDSS steps 5.5 to 8.0 are exclusively defined by the ability to ambulate and type of assistance required, or the ability to use a wheelchair.
 **Assistance Needed**
 - Definitions for EDSS steps 6.0 or 6.5 include both the type of assistance required when walking and the walking range.
 - Assistance by another person is equivalent to bilateral assistance.
 **Note:**
 - The ambulation score represents both the walking range and the type of assistance required.
 - This score replaces several checkboxes used previously on the scoring sheet but does not introduce new definitions.
 - Use of a wheelchair can now be scored on the scoring sheet.
 - Indicate the reported distance and time for the patient in the appropriate field on the scoring sheet, followed by the type of assistance and walking distance measured during assessment.
 ### DISTANCE AND TIME REPORTED BY PATIENT
 **Maximal Unassisted Walking Distance**
 - Maximal unassisted walking distance reported by the patient (in meters) without rest or assistance.
 - Time required to walk the maximum distance according to the patient (in minutes).
 **Assistance**
 0. Without help or assistance (allowing use of an ankle-foot orthotic device, but no other assistive devices).
 1. Unilateral assistance: one stick/crutch/brace.
 2. Bilateral assistance: two sticks/crutches/braces or assistance by another person.
 3. Wheelchair.
 **Distance**
 - Measure the distance the patient can walk in meters.
  - **Unassisted:** Observe walking for a minimum of 500 meters and measure time needed, if possible.
  - **Assisted:** Observe walking with assistive devices or help from another person for a minimum of 130 meters, if possible.
 ---
 ### AMBULATION SCORE
 0. Unrestricted
 1. Fully ambulatory
 2. ≥ 300 meters but < 500 meters, without help or assistance (EDSS 4.5 or 5.0)
 3. ≥ 200 meters but < 300 meters, without help or assistance (EDSS 5.0)
 4. ≥ 100 meters but < 200 meters, without help or assistance (EDSS 5.5)
 5. Walking range < 100 meters without assistance (EDSS 6.0)
 6. Unilateral assistance, ≥ 50 meters (EDSS 6.0)
 7. Bilateral assistance, ≥ 120 meters (EDSS 6.0)
 8. Unilateral assistance, < 50 meters (EDSS 6.5)
 9. Bilateral assistance, ≥ 5 meters but < 120 meters (EDSS 6.5)
 10. Uses wheelchair without help; unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair for some 12 hours a day (EDSS 7.0)
 11. Uses wheelchair with help; unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and wheeling self (EDSS 7.5)
 12. Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms (EDSS 8.0)
 Expanded Disability Status Scale (EDSS)
 0   - Normal neurological exam (all Functional Systems [FS] grade 0)
 1.0 - No disability, minimal signs in one FS (one FS grade 1)
 1.5 - No disability, minimal signs in more than one FS (more than one FS grade 1)
 2.0 - Minimal disability in one FS (one FS grade 2, others 0 or 1)
 2.5 - Minimal disability in two FS (two FS grades 2, others 0 or 1)
 3.0 - Moderate disability in one FS (one FS grade 3, others 0 or 1) though fully ambulatory;
 or mild disability in three or four FS (three/four FS grades 2, others 0 or 1) though fully ambulatory
 3.5 - Fully ambulatory but with moderate disability in one FS (one FS grade 3) and mild disability in one or two FS (one/two FS grade 2) and others 0 or 1;
 or fully ambulatory with two FS grades 3 (others 0 or 1);
 or fully ambulatory with five FS grades 2 (others 0 or 1)
 4.0 - Unable to walk > 25 feet without aid
 4.5 - Unable to walk > 100 feet without aid
 5.0 - Relies on a walking aid; unable to walk > 300 feet without resting
 5.5 - Relies on a walking aid; unable to walk > 200 feet without resting
 6.0 - Unable to walk more than 50 feet with or without aid; cannot stand unaided for five minutes
 6.5 - Unable to walk more than 10 feet with or without aid; cannot stand unaided for two minutes
 7.0 - Unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair some 12 hours a day
 7.5 - Unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and in wheeling self
 8.0 - Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms
 8.5 - Essentially restricted to bed much of the day; has some effective use of arm(s); retains some self-care functions
 9.0 - Helpless bed patient; can communicate and eat
 9.5 - Totally helpless bed patient; unable to communicate effectively or eat/swallow
 10  - Death due to MS
@@ -1,11 +0,0 @@
 EDSS-kv ::= "\"EDSS\"" space ":" space number
 Reason ::= "\"" char{0,400} "\"" space
 Reason-kv ::= "\"Reason\"" space ":" space Reason
 boolean ::= ("true" | "false") space
 char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
 decimal-part ::= [0-9]{1,16}
 integral-part ::= [0] | [1-9] [0-9]{0,15}
 nicht-klassifizierbar-kv ::= "\"nicht_klassifizierbar\"" space ":" space boolean
 number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
 root ::= "{" space Reason-kv "," space nicht-klassifizierbar-kv ( "," space ( EDSS-kv ) )? "}" space
 space ::= | " " | "\n"{1,2} [ \t]{0,20}
@@ -1,25 +0,0 @@
 Expanded Disability Status Scale (EDSS)
 0   - Normal neurological exam (all Functional Systems [FS] grade 0)
 1.0 - No disability, minimal signs in one FS (one FS grade 1)
 1.5 - No disability, minimal signs in more than one FS (more than one FS grade 1)
 2.0 - Minimal disability in one FS (one FS grade 2, others 0 or 1)
 2.5 - Minimal disability in two FS (two FS grades 2, others 0 or 1)
 3.0 - Moderate disability in one FS (one FS grade 3, others 0 or 1) though fully ambulatory;
 or mild disability in three or four FS (three/four FS grades 2, others 0 or 1) though fully ambulatory
 3.5 - Fully ambulatory but with moderate disability in one FS (one FS grade 3) and mild disability in one or two FS (one/two FS grade 2) and others 0 or 1;
 or fully ambulatory with two FS grades 3 (others 0 or 1);
 or fully ambulatory with five FS grades 2 (others 0 or 1)
 4.0 - Unable to walk > 25 feet without aid
 4.5 - Unable to walk > 100 feet without aid
 5.0 - Relies on a walking aid; unable to walk > 300 feet without resting
 5.5 - Relies on a walking aid; unable to walk > 200 feet without resting
 6.0 - Unable to walk more than 50 feet with or without aid; cannot stand unaided for five minutes
 6.5 - Unable to walk more than 10 feet with or without aid; cannot stand unaided for two minutes
 7.0 - Unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair some 12 hours a day
 7.5 - Unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and in wheeling self
 8.0 - Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms
 8.5 - Essentially restricted to bed much of the day; has some effective use of arm(s); retains some self-care functions
 9.0 - Helpless bed patient; can communicate and eat
 9.5 - Totally helpless bed patient; unable to communicate effectively or eat/swallow
 10  - Death due to MS