adjusting the script with new paths

This commit is contained in:
2026-05-19 10:13:29 +02:00
parent 98df7c70f1
commit bb9fcf20ae
2 changed files with 71 additions and 455 deletions
+71 -71
View File
@@ -1258,7 +1258,7 @@
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
#
#import time
#import json
#import os
@@ -1279,9 +1279,9 @@
# print("Install with: pip install psutil")
#
#
## =========================
## CONFIGURATION
## =========================
# =========================
# CONFIGURATION
# =========================
#
#load_dotenv()
#
@@ -1289,21 +1289,21 @@
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
#
#MODEL_CONFIGS = [
## {
## "model_name": "qwen3.6-35b-a3b",
## "use_response_format": False,
## "temperature": 0.0,
## "max_tokens": 4096,
##
## # If your backend is vLLM / Qwen chat-template compatible,
## # this may reduce long hidden reasoning and JSON truncation.
## # If your server errors because of extra_body, set this to None.
## "extra_body": {
## "chat_template_kwargs": {
## "enable_thinking": False
## }
## },
## },
# {
# "model_name": "qwen3.6-35b-a3b",
# "use_response_format": False,
# "temperature": 0.0,
# "max_tokens": 4096,
#
# # If your backend is vLLM / Qwen chat-template compatible,
# # this may reduce long hidden reasoning and JSON truncation.
# # If your server errors because of extra_body, set this to None.
# "extra_body": {
# "chat_template_kwargs": {
# "enable_thinking": False
# }
# },
# },
# {
# "model_name": "gemma-4-31B-it",
# "use_response_format": False,
@@ -1328,10 +1328,10 @@
#NUM_ITERATIONS = 10
#STOP_ON_FIRST_ERROR = False
#
## For testing, set to e.g. 2.
## For full run, set to None.
# For testing, set to e.g. 2.
# For full run, set to None.
#MAX_ROWS = 2
## MAX_ROWS = 2
# MAX_ROWS = 2
#
#MAX_TOKENS = 4096
#TEMPERATURE = 0.0
@@ -1340,14 +1340,14 @@
#
#SAVE_EVERY_N_ROWS = 1
#
## Retries for invalid JSON / truncated JSON
# Retries for invalid JSON / truncated JSON
#MAX_JSON_RETRIES = 2
#RETRY_SLEEP_SEC = 2
#
#
## =========================
## CLIENT
## =========================
# =========================
# CLIENT
# =========================
#
#client = OpenAI(
# api_key=OPENAI_API_KEY,
@@ -1355,9 +1355,9 @@
#)
#
#
## =========================
## HELPERS
## =========================
# =========================
# HELPERS
# =========================
#
#def safe_dir_name(name: str) -> str:
# name = str(name).strip()
@@ -1433,9 +1433,9 @@
# return max(self.samples_mb)
#
#
## =========================
## JSON EXTRACTION
## =========================
# =========================
# JSON EXTRACTION
# =========================
#
#def extract_json_from_text(text):
# if text is None:
@@ -1587,17 +1587,17 @@
# return None
#
#
## =========================
## READ INSTRUCTIONS
## =========================
# =========================
# READ INSTRUCTIONS
# =========================
#
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
# EDSS_INSTRUCTIONS = f.read().strip()
#
#
## =========================
## PROMPT
## =========================
# =========================
# PROMPT
# =========================
#
#def build_prompt(patient_text):
# return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
@@ -1676,9 +1676,9 @@
#'''
#
#
## =========================
## VALIDATION / NORMALIZATION
## =========================
# =========================
# VALIDATION / NORMALIZATION
# =========================
#
#def normalize_model_output(parsed):
# if not isinstance(parsed, dict):
@@ -1755,9 +1755,9 @@
# return parsed
#
#
## =========================
## API CALL
## =========================
# =========================
# API CALL
# =========================
#
#def make_chat_completion(model_config, prompt):
# model_name = model_config["model_name"]
@@ -1794,9 +1794,9 @@
# return client.chat.completions.create(**kwargs)
#
#
## =========================
## INFERENCE FUNCTION WITH RETRIES
## =========================
# =========================
# INFERENCE FUNCTION WITH RETRIES
# =========================
#
#def run_inference(patient_text, model_config):
# model_name = model_config["model_name"]
@@ -1912,9 +1912,9 @@
# }
#
#
## =========================
## BUILD PATIENT TEXT
## =========================
# =========================
# BUILD PATIENT TEXT
# =========================
#
#def build_patient_text(row):
# return (
@@ -1925,9 +1925,9 @@
# )
#
#
## =========================
## FLATTEN RESULTS FOR CSV
## =========================
# =========================
# FLATTEN RESULTS FOR CSV
# =========================
#
#def flatten_result(record):
# flat = {
@@ -2012,9 +2012,9 @@
# return pd.DataFrame([summary])
#
#
## =========================
## INCREMENTAL SAVE HELPERS
## =========================
# =========================
# INCREMENTAL SAVE HELPERS
# =========================
#
#def append_jsonl(path, record):
# with open(path, "a", encoding="utf-8") as f:
@@ -2030,9 +2030,9 @@
# df_one.to_csv(path, mode="a", header=not file_exists, index=False)
#
#
## =========================
## MAIN LOOP
## =========================
# =========================
# MAIN LOOP
# =========================
#
#if __name__ == "__main__":
#
@@ -2277,26 +2277,26 @@ MODEL_CONFIGS = [
"max_tokens": 4096,
"extra_body": None,
},
# {
# "model_name": "GPT-OSS-120B",
# "use_response_format": True,
# "temperature": 0.0,
# "max_tokens": 4096,
# "extra_body": None,
# },
{
"model_name": "GPT-OSS-120B",
"use_response_format": True,
"temperature": 0.0,
"max_tokens": 4096,
"extra_body": None,
},
]
INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
INPUT_CSV ="/home/shahin/Lab/Doktorarbeit/Barcelona/data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/prompts/Komplett.txt"
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results_edss_benchmark"
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results/benchmark_runs"
NUM_ITERATIONS = 10
NUM_ITERATIONS = 2
STOP_ON_FIRST_ERROR = False
# For testing, set to e.g. 2.
# For full run, set to None.
MAX_ROWS = None
MAX_ROWS = 2
# MAX_ROWS = 2
MAX_TOKENS = 4096