import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import dataframe_image as dfi
# Load data
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')

# 1. Identify all GT and result columns
gt_columns = [col for col in df.columns if col.startswith('GT.')]
result_columns = [col for col in df.columns if col.startswith('result.')]

print("GT Columns found:", gt_columns)
print("Result Columns found:", result_columns)

# 2. Create proper mapping between GT and result columns
# Handle various naming conventions (spaces, underscores, etc.)
column_mapping = {}

for gt_col in gt_columns:
    base_name = gt_col.replace('GT.', '')

    # Clean the base name for matching - remove spaces, underscores, etc.
    # Try different matching approaches
    candidates = [
        f'result.{base_name}',  # Exact match
        f'result.{base_name.replace(" ", "_")}',  # With underscores
        f'result.{base_name.replace("_", " ")}',  # With spaces
        f'result.{base_name.replace(" ", "")}',   # No spaces
        f'result.{base_name.replace("_", "")}'    # No underscores
    ]

    # Also try case-insensitive matching
    candidates.append(f'result.{base_name.lower()}')
    candidates.append(f'result.{base_name.upper()}')

    # Try to find matching result column
    matched = False
    for candidate in candidates:
        if candidate in result_columns:
            column_mapping[gt_col] = candidate
            matched = True
            break

    # If no exact match found, try partial matching
    if not matched:
        # Try to match by removing special characters and comparing
        base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
        for result_col in result_columns:
            result_base = result_col.replace('result.', '')
            result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
            if base_clean.lower() == result_clean.lower():
                column_mapping[gt_col] = result_col
                matched = True
                break

print("Column mapping:", column_mapping)

# 3. Faster, vectorized computation using the corrected mapping
data_list = []

for gt_col, result_col in column_mapping.items():
    print(f"Processing {gt_col} vs {result_col}")

    # Convert to numeric, forcing errors to NaN
    s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
    s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)

    # Calculate matches (abs difference <= 0.5)
    diff = np.abs(s1 - s2)
    matches = (diff <= 0.5).sum()

    # Determine the denominator (total valid comparisons)
    valid_count = diff.notna().sum()

    if valid_count > 0:
        percentage = (matches / valid_count) * 100
    else:
        percentage = 0

    # Extract clean base name for display
    base_name = gt_col.replace('GT.', '')

    data_list.append({
        'GT': base_name,
        'Match %': round(percentage, 1)
    })


# 4. Prepare Data for Plotting
match_df = pd.DataFrame(data_list)
match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow

# 5. Create the Styled Gradient Table
def style_agreement_table(df):
    return (df.style
        .format({'Match %': '{:.1f}%'}) # Add % sign
        .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
        .set_properties(**{
            'text-align': 'center',
            'font-size': '12pt',
            'border-collapse': 'collapse',
            'border': '1px solid #D3D3D3'
        })
        .set_table_styles([
            # Style the header
            {'selector': 'th', 'props': [
                ('background-color', '#404040'), 
                ('color', 'white'),
                ('font-weight', 'bold'),
                ('text-transform', 'uppercase'),
                ('padding', '10px')
            ]},
            # Add hover effect
            {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
        ])
        .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
    )

# To display in a Jupyter Notebook:
styled_table = style_agreement_table(match_df)
styled_table

dfi.export(styled_table, "styled_table.png")
#styled_table.to_html("agreement_report.html")
# 6. Save as SVG

#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
#print("Successfully saved agreement_table.svg")

# Show plot if running in a GUI environment
plt.show()