From c986ab92c580a6c7aa40517ec8b64952ca3976ff Mon Sep 17 00:00:00 2001 From: Shahin Ramezanzadeh Date: Mon, 26 Jan 2026 02:03:08 +0100 Subject: [PATCH] deleting not important scripts --- Data/style2.py | 135 ------------------------------------------ Data/styled_tables.py | 74 ----------------------- 2 files changed, 209 deletions(-) delete mode 100644 Data/style2.py delete mode 100644 Data/styled_tables.py diff --git a/Data/style2.py b/Data/style2.py deleted file mode 100644 index ef26d04..0000000 --- a/Data/style2.py +++ /dev/null @@ -1,135 +0,0 @@ -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -import dataframe_image as dfi -# Load data -df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') - -# 1. Identify all GT and result columns -gt_columns = [col for col in df.columns if col.startswith('GT.')] -result_columns = [col for col in df.columns if col.startswith('result.')] - -print("GT Columns found:", gt_columns) -print("Result Columns found:", result_columns) - -# 2. Create proper mapping between GT and result columns -# Handle various naming conventions (spaces, underscores, etc.) -column_mapping = {} - -for gt_col in gt_columns: - base_name = gt_col.replace('GT.', '') - - # Clean the base name for matching - remove spaces, underscores, etc. - # Try different matching approaches - candidates = [ - f'result.{base_name}', # Exact match - f'result.{base_name.replace(" ", "_")}', # With underscores - f'result.{base_name.replace("_", " ")}', # With spaces - f'result.{base_name.replace(" ", "")}', # No spaces - f'result.{base_name.replace("_", "")}' # No underscores - ] - - # Also try case-insensitive matching - candidates.append(f'result.{base_name.lower()}') - candidates.append(f'result.{base_name.upper()}') - - # Try to find matching result column - matched = False - for candidate in candidates: - if candidate in result_columns: - column_mapping[gt_col] = candidate - matched = True - break - - # If no exact match found, try partial matching - if not matched: - # Try to match by removing special characters and comparing - base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' ']) - for result_col in result_columns: - result_base = result_col.replace('result.', '') - result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' ']) - if base_clean.lower() == result_clean.lower(): - column_mapping[gt_col] = result_col - matched = True - break - -print("Column mapping:", column_mapping) - -# 3. Faster, vectorized computation using the corrected mapping -data_list = [] - -for gt_col, result_col in column_mapping.items(): - print(f"Processing {gt_col} vs {result_col}") - - # Convert to numeric, forcing errors to NaN - s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float) - s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float) - - # Calculate matches (abs difference <= 0.5) - diff = np.abs(s1 - s2) - matches = (diff <= 0.5).sum() - - # Determine the denominator (total valid comparisons) - valid_count = diff.notna().sum() - - if valid_count > 0: - percentage = (matches / valid_count) * 100 - else: - percentage = 0 - - # Extract clean base name for display - base_name = gt_col.replace('GT.', '') - - data_list.append({ - 'GT': base_name, - 'Match %': round(percentage, 1) - }) - - - -# 4. Prepare Data for Plotting -match_df = pd.DataFrame(data_list) -match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow - -# 5. Create the Styled Gradient Table -def style_agreement_table(df): - return (df.style - .format({'Match %': '{:.1f}%'}) # Add % sign - .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient - .set_properties(**{ - 'text-align': 'center', - 'font-size': '12pt', - 'border-collapse': 'collapse', - 'border': '1px solid #D3D3D3' - }) - .set_table_styles([ - # Style the header - {'selector': 'th', 'props': [ - ('background-color', '#404040'), - ('color', 'white'), - ('font-weight', 'bold'), - ('text-transform', 'uppercase'), - ('padding', '10px') - ]}, - # Add hover effect - {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]} - ]) - .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)") - ) - -# To display in a Jupyter Notebook: -styled_table = style_agreement_table(match_df) -styled_table - -dfi.export(styled_table, "styled_table.png") -#styled_table.to_html("agreement_report.html") -# 6. Save as SVG - -#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight') -#print("Successfully saved agreement_table.svg") - -# Show plot if running in a GUI environment -plt.show() - - diff --git a/Data/styled_tables.py b/Data/styled_tables.py deleted file mode 100644 index 4c3ebc5..0000000 --- a/Data/styled_tables.py +++ /dev/null @@ -1,74 +0,0 @@ -import pandas as pd -import numpy as np -import seaborn as sns - -# Sample data (replace with your actual df) -df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') - -# Identify GT and Result columns -gt_columns = [col for col in df.columns if col.startswith('GT.')] -result_columns = [col for col in df.columns if col.startswith('result.')] - -# Create mapping -column_mapping = {} -for gt_col in gt_columns: - base_name = gt_col.replace('GT.', '') - result_col = f'result.{base_name}' - if result_col in result_columns: - column_mapping[gt_col] = result_col - -# Function to compute match percentage for each GT-Result pair -def compute_match_percentages(df, column_mapping): - percentages = [] - for gt_col, result_col in column_mapping.items(): - count = 0 - total = len(df) - - for _, row in df.iterrows(): - gt_val = row[gt_col] - result_val = row[result_col] - - # Handle NaN values - if pd.isna(gt_val) or pd.isna(result_val): - continue - - # Handle non-numeric values - try: - gt_float = float(gt_val) - result_float = float(result_val) - except (ValueError, TypeError): - # Skip rows with non-numeric values - continue - - # Check if values are within 0.5 tolerance - if abs(gt_float - result_float) <= 0.5: - count += 1 - - percentage = (count / total) * 100 - percentages.append({ - 'GT_Column': gt_col, - 'Result_Column': result_col, - 'Match_Percentage': round(percentage, 1) - }) - - return pd.DataFrame(percentages) - -# Compute match percentages -match_df = compute_match_percentages(df, column_mapping) - -# Create a pivot table for gradient display (optional but helpful) -pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0) - -# Apply gradient background -cm = sns.light_palette("green", as_cmap=True) -styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None) - -# Display result -print("Agreement Percentage Table (with gradient):") -styled_table - - - -# Save the styled table to a file -styled_table.to_html("agreement_report.html") -print("Report saved to agreement_report.html")