From c986ab92c580a6c7aa40517ec8b64952ca3976ff Mon Sep 17 00:00:00 2001
From: Shahin Ramezanzadeh <shahin.rmz@tutanota.com>
Date: Mon, 26 Jan 2026 02:03:08 +0100
Subject: [PATCH] deleting not important scripts

---
 Data/style2.py        | 135 ------------------------------------------
 Data/styled_tables.py |  74 -----------------------
 2 files changed, 209 deletions(-)
 delete mode 100644 Data/style2.py
 delete mode 100644 Data/styled_tables.py

diff --git a/Data/style2.py b/Data/style2.py
deleted file mode 100644
index ef26d04..0000000
--- a/Data/style2.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import pandas as pd
-import numpy as np
-import seaborn as sns
-import matplotlib.pyplot as plt
-import dataframe_image as dfi
-# Load data
-df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
-
-# 1. Identify all GT and result columns
-gt_columns = [col for col in df.columns if col.startswith('GT.')]
-result_columns = [col for col in df.columns if col.startswith('result.')]
-
-print("GT Columns found:", gt_columns)
-print("Result Columns found:", result_columns)
-
-# 2. Create proper mapping between GT and result columns
-# Handle various naming conventions (spaces, underscores, etc.)
-column_mapping = {}
-
-for gt_col in gt_columns:
-    base_name = gt_col.replace('GT.', '')
-
-    # Clean the base name for matching - remove spaces, underscores, etc.
-    # Try different matching approaches
-    candidates = [
-        f'result.{base_name}',  # Exact match
-        f'result.{base_name.replace(" ", "_")}',  # With underscores
-        f'result.{base_name.replace("_", " ")}',  # With spaces
-        f'result.{base_name.replace(" ", "")}',   # No spaces
-        f'result.{base_name.replace("_", "")}'    # No underscores
-    ]
-
-    # Also try case-insensitive matching
-    candidates.append(f'result.{base_name.lower()}')
-    candidates.append(f'result.{base_name.upper()}')
-
-    # Try to find matching result column
-    matched = False
-    for candidate in candidates:
-        if candidate in result_columns:
-            column_mapping[gt_col] = candidate
-            matched = True
-            break
-
-    # If no exact match found, try partial matching
-    if not matched:
-        # Try to match by removing special characters and comparing
-        base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
-        for result_col in result_columns:
-            result_base = result_col.replace('result.', '')
-            result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
-            if base_clean.lower() == result_clean.lower():
-                column_mapping[gt_col] = result_col
-                matched = True
-                break
-
-print("Column mapping:", column_mapping)
-
-# 3. Faster, vectorized computation using the corrected mapping
-data_list = []
-
-for gt_col, result_col in column_mapping.items():
-    print(f"Processing {gt_col} vs {result_col}")
-
-    # Convert to numeric, forcing errors to NaN
-    s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
-    s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
-
-    # Calculate matches (abs difference <= 0.5)
-    diff = np.abs(s1 - s2)
-    matches = (diff <= 0.5).sum()
-
-    # Determine the denominator (total valid comparisons)
-    valid_count = diff.notna().sum()
-
-    if valid_count > 0:
-        percentage = (matches / valid_count) * 100
-    else:
-        percentage = 0
-
-    # Extract clean base name for display
-    base_name = gt_col.replace('GT.', '')
-
-    data_list.append({
-        'GT': base_name,
-        'Match %': round(percentage, 1)
-    })
-
-
-
-# 4. Prepare Data for Plotting
-match_df = pd.DataFrame(data_list)
-match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow
-
-# 5. Create the Styled Gradient Table
-def style_agreement_table(df):
-    return (df.style
-        .format({'Match %': '{:.1f}%'}) # Add % sign
-        .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
-        .set_properties(**{
-            'text-align': 'center',
-            'font-size': '12pt',
-            'border-collapse': 'collapse',
-            'border': '1px solid #D3D3D3'
-        })
-        .set_table_styles([
-            # Style the header
-            {'selector': 'th', 'props': [
-                ('background-color', '#404040'), 
-                ('color', 'white'),
-                ('font-weight', 'bold'),
-                ('text-transform', 'uppercase'),
-                ('padding', '10px')
-            ]},
-            # Add hover effect
-            {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
-        ])
-        .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
-    )
-
-# To display in a Jupyter Notebook:
-styled_table = style_agreement_table(match_df)
-styled_table
-
-dfi.export(styled_table, "styled_table.png")
-#styled_table.to_html("agreement_report.html")
-# 6. Save as SVG
-
-#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
-#print("Successfully saved agreement_table.svg")
-
-# Show plot if running in a GUI environment
-plt.show()
-
-
diff --git a/Data/styled_tables.py b/Data/styled_tables.py
deleted file mode 100644
index 4c3ebc5..0000000
--- a/Data/styled_tables.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import pandas as pd
-import numpy as np
-import seaborn as sns
-
-# Sample data (replace with your actual df)
-df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
-
-# Identify GT and Result columns
-gt_columns = [col for col in df.columns if col.startswith('GT.')]
-result_columns = [col for col in df.columns if col.startswith('result.')]
-
-# Create mapping
-column_mapping = {}
-for gt_col in gt_columns:
-    base_name = gt_col.replace('GT.', '')
-    result_col = f'result.{base_name}'
-    if result_col in result_columns:
-        column_mapping[gt_col] = result_col
-
-# Function to compute match percentage for each GT-Result pair
-def compute_match_percentages(df, column_mapping):
-    percentages = []
-    for gt_col, result_col in column_mapping.items():
-        count = 0
-        total = len(df)
-
-        for _, row in df.iterrows():
-            gt_val = row[gt_col]
-            result_val = row[result_col]
-
-            # Handle NaN values
-            if pd.isna(gt_val) or pd.isna(result_val):
-                continue
-
-            # Handle non-numeric values
-            try:
-                gt_float = float(gt_val)
-                result_float = float(result_val)
-            except (ValueError, TypeError):
-                # Skip rows with non-numeric values
-                continue
-
-            # Check if values are within 0.5 tolerance
-            if abs(gt_float - result_float) <= 0.5:
-                count += 1
-
-        percentage = (count / total) * 100
-        percentages.append({
-            'GT_Column': gt_col,
-            'Result_Column': result_col,
-            'Match_Percentage': round(percentage, 1)
-        })
-
-    return pd.DataFrame(percentages)
-
-# Compute match percentages
-match_df = compute_match_percentages(df, column_mapping)
-
-# Create a pivot table for gradient display (optional but helpful)
-pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
-
-# Apply gradient background
-cm = sns.light_palette("green", as_cmap=True)
-styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
-
-# Display result
-print("Agreement Percentage Table (with gradient):")
-styled_table
-
-
-
-# Save the styled table to a file
-styled_table.to_html("agreement_report.html")
-print("Report saved to agreement_report.html")