diff --git a/.gitignore b/.gitignore
index afe2020..9cefb48 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,7 +49,7 @@ Thumbs.db
 # ======================
 # Experimental scripts
 # ======================
-2moodle.py
-app.old_inside_pipeline.py
-feedback_bewertung.py
+#2moodle.py
+#app.old_inside_pipeline.py
+#feedback_bewertung.py
 
diff --git a/exp/2moodle.py b/exp/2moodle.py
new file mode 100644
index 0000000..c77e03b
--- /dev/null
+++ b/exp/2moodle.py
@@ -0,0 +1,1539 @@
+# %% Prompt
+
+import openai
+import os
+from pathlib import Path
+import datetime
+import time
+import json as json_module
+import csv
+from pydantic import BaseModel
+
+# Initialize client with reasoning capabilities
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+# Enhanced prompt with detailed criteria and structured output requirements
+EVAL_PROMPT = """
+SYSTEM:
+Du bist ein Expert:in für medizinische Lehre und Feedback-Didaktik an einer medizinischen Fakultät. 
+Bewerte das folgende Tutor-Feedback gemäß den offiziellen Feedback-Prinzipien der Medizinischen Fakultät Dresden.
+
+KRITERIEN:
+A1 PERSPEKTIVE (Ich-Botschaften)
+"A feedback ... wird in „Ich-Botschaften“ ausgedrückt."
+Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
+
+A2 RESPEKT & WERTFREIHEIT
+"Ein Feedback ... ist nicht (ab)wertend."
+Bewertung: Wird respektvoll und wertfrei kommuniziert?
+
+B1 KONKRETHEIT
+"Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
+Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION
+"Ein Feedback ... gibt erst nach der Äußerung von sinnlich Wahrnehmbarem die Möglichkeit zu Interpretationen, Annahmen und Schlussfolgerungen."
+Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden?
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip)
+WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
+BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
+Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen?
+
+D2 WERTSCHÄTZENDER ABSCHLUSS
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Schließt das Feedback wertschätzend ab?
+
+E1 KOMMUNIKATIONSEBENEN
+"Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell"
+Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen?
+
+F1 FÖRDERUNG VON REFLEXION
+"Feedback ... ist eines der einflussreichsten Faktoren für den Lernerfolg."
+Bewertung: Fördert das Feedback die Reflexion und das Lernen?
+
+SCORING:
+Bewerte jedes Kriterium mit:
+0 = nicht erfüllt
+1 = teilweise erfüllt
+2 = vollständig erfüllt
+
+AUFGABE:
+1. Bewerte jedes Kriterium mit einer Punktzahl (0-2)
+2. Gib eine kurze Begründung für jede Bewertung mit Zitaten oder Paraphrasierungen aus dem Feedback
+3. Berechne die Gesamtpunktzahl (max. 18)
+4. Weise eine qualitative Bewertungsstufe zu
+5. Gib 3 konkrete Verbesserungsvorschläge
+
+OUTPUT FORMAT (JSON):
+{
+  "scores": {
+    "A1": {"score": 0-2, "justification": "..."},
+    "A2": {"score": 0-2, "justification": "..."},
+    "B1": {"score": 0-2, "justification": "..."},
+    "B2": {"score": 0-2, "justification": "..."},
+    "C1": {"score": 0-2, "justification": "..."},
+    "D1": {"score": 0-2, "justification": "..."},
+    "D2": {"score": 0-2, "justification": "..."},
+    "E1": {"score": 0-2, "justification": "..."},
+    "F1": {"score": 0-2, "justification": "..."}
+  },
+  "total_score": 0,
+  "quality_level": "",
+  "strengths": [],
+  "weaknesses": [],
+  "improvement_suggestions": []
+}
+
+TUTOR FEEDBACK:
+"""
+
+# Pydantic models for structured output validation
+class ScoreItem(BaseModel):
+    score: int  # 0-2 (0=not fulfilled, 2=fully fulfilled)
+    justification: str
+
+class EvaluationResult(BaseModel):
+    scores: dict[str, ScoreItem]
+    total_score: int
+    quality_level: str
+    strengths: list[str]
+    weaknesses: list[str]
+    improvement_suggestions: list[str]
+##
+
+# %% Main
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle2"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename", 
+        "Total_Time_sec", 
+        "API_Evaluation_Time_sec",
+        "Reasoning_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Total_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+    
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+    
+    # Get AI evaluation with timing
+    criterion_timings = {}
+    status = "Success"
+    
+    try:
+        # Time the complete API evaluation process
+        eval_start = time.time()
+        
+        # Use reasoning model with specified parameters
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+            max_completion_tokens=1024,
+            reasoning_effort="medium",  # Using the reasoning model capabilities
+            extra_body={"allowed_openai_params": ["reasoning_effort"]}
+        )
+        
+        # Measure reasoning time separately if available
+        reasoning_time = 0
+        if hasattr(response.choices[0].message, 'reasoning_content') and response.choices[0].message.reasoning_content:
+            reasoning_time = time.time() - eval_start
+            criterion_timings["Reasoning"] = reasoning_time
+            print(f"  • Reasoning: {reasoning_time:.2f} sec")
+        
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+        print(f"  • Gesamtbewertung: {eval_duration:.2f} sec")
+        
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+            
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level", 
+                            "strengths", "weaknesses", "improvement_suggestions"]
+            
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+            
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+            
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+            
+            # Create a default evaluation with error messages
+            evaluation = EvaluationResult(
+                scores={
+                    "A1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "A2": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "B1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "B2": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "C1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "D1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "D2": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "E1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "F1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API")
+                },
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Keine Bewertung möglich"],
+                improvement_suggestions=["Korrigieren Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+    
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+        
+        # Create a default evaluation with error messages
+        evaluation = EvaluationResult(
+            scores={
+                "A1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "A2": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "B1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "B2": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "C1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "D1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "D2": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "E1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "F1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}")
+            },
+            total_score=0,
+            quality_level="Fehlerhaft",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=["Keine Bewertung möglich"],
+            improvement_suggestions=["Korrigieren Sie die Feedback-Struktur"]
+        )
+        results[f.name] = evaluation
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+
+    # Generate detailed text report with timing
+    report = f"""FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {time.time() - file_start:.2f} Sekunden
+"""
+    
+    # Add timing for evaluation
+    for criterion, duration in criterion_timings.items():
+        report += f"  • {criterion}: {duration:.2f} Sekunden\n"
+    
+    # Add evaluation results
+    report += f"""
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/2
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/2
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/2
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/2
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/2
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/2
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/2
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/2
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/2
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Gesamtpunktzahl: {evaluation.total_score}/18
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+"""
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+        
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+        
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+    
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+    
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{time.time() - file_start:.2f}",
+            f"{eval_duration:.2f}",
+            f"{reasoning_time:.2f}" if 'reasoning_time' in locals() else "0.00",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            evaluation.total_score,
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {time.time() - file_start:.2f} Sekunden\n")
+        log.write("Detailierte Zeiten:\n")
+        for criterion, duration in criterion_timings.items():
+            log.write(f"  • {criterion}: {duration:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+    
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {time.time() - file_start:.2f} Sekunden")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+
+
+
+##
+
+
+
+
+
+
+# %% Feedback_Bewertung
+import openai
+import os
+from pathlib import Path
+import datetime
+import time
+import json as json_module
+import csv
+from pydantic import BaseModel
+
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+EVAL_PROMPT = '''
+Du bist ein strenger, objektiver Bewertender für medizinische Lehre.
+Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics".
+
+KRITERIEN (basierend auf Kursmaterial):
+A1 PERSPEKTIVE (Ich-Botschaften)
+"A feedback ... wird in „Ich-Botschaften" ausgedrückt."
+Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
+
+A2 RESPEKT & WERTFREIHEIT
+"Ein Feedback ... ist nicht (ab)wertend."
+Bewertung: Wird respektvoll und wertfrei kommuniziert?
+
+B1 KONKRETHEIT
+"Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
+Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION
+"Ein Feedback ... gibt erst nach der Äußerung von sinnlich Wahrnehmbarem die Möglichkeit zu Interpretationen, Annahmen und Schlussfolgerungen."
+Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden?
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip)
+WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
+BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
+Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen?
+
+D2 WERTSCHÄTZENDER ABSCHLUSS
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Schließt das Feedback wertschätzend ab?
+
+E1 KOMMUNIKATIONSEBENEN
+"Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell"
+Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen?
+
+F1 FÖRDERUNG VON REFLEXION
+"Feedback ... ist eines der einflussreichsten Faktoren für den Lernerfolg."
+Bewertung: Fördert das Feedback die Reflexion und das Lernen?
+
+SCORING-ANLEITUNG:
+1 = Vollständige Umsetzung (exzellentes Beispiel)
+2 = Gute Umsetzung mit minimalen Lücken
+3 = Grundlegende Umsetzung mit signifikanten Mängeln
+4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
+5 = Keine erkennbare Umsetzung (kriterienwidrig)
+
+WICHTIG: Gib die Ergebnisse AUSSCHLIESSLICH als JSON mit EXAKT folgender Struktur zurück:
+{
+  "scores": {
+    "A1": {"score": 1, "justification": "Begründung hier"},
+    "A2": {"score": 1, "justification": "Begründung hier"},
+    "B1": {"score": 1, "justification": "Begründung hier"},
+    "B2": {"score": 1, "justification": "Begründung hier"},
+    "C1": {"score": 1, "justification": "Begründung hier"},
+    "D1": {"score": 1, "justification": "Begründung hier"},
+    "D2": {"score": 1, "justification": "Begründung hier"},
+    "E1": {"score": 1, "justification": "Begründung hier"},
+    "F1": {"score": 1, "justification": "Begründung hier"}
+  },
+  "total_score": 0,
+  "quality_level": "Beispiel-Qualitätsstufe",
+  "strengths": ["Stärke 1", "Stärke 2"],
+  "weaknesses": ["Schwäche 1", "Schwäche 2"],
+  "improvement_suggestions": ["Vorschlag 1", "Vorschlag 2", "Vorschlag 3"]
+}
+'''
+
+class ScoreItem(BaseModel):
+    score: int  # 1-5 (1=excellent, 5=failed)
+    justification: str
+
+class EvaluationResult(BaseModel):
+    scores: dict[str, ScoreItem]
+    total_score: int
+    quality_level: str
+    strengths: list[str]
+    weaknesses: list[str]
+    improvement_suggestions: list[str]
+##
+
+# %% Main
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle3"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename",
+        "Total_Time_sec",
+        "API_Evaluation_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Average_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+
+    # Get AI evaluation with timing
+    criterion_timings = {}
+    status = "Success"
+
+    # We'll evaluate all criteria in one call with strict JSON structure
+    try:
+        eval_start = time.time()
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+        print(f"  • Gesamtbewertung: {eval_duration:.2f} sec")
+
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level",
+                            "strengths", "weaknesses", "improvement_suggestions"]
+
+            # If the response has a different structure, try to fix it
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+
+                # Create a properly structured response
+                fixed_response = {
+                    "scores": {
+                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
+                    },
+                    "total_score": 0,
+                    "quality_level": "Fehlerhaft",
+                    "strengths": ["Strukturfehler in der Bewertung"],
+                    "weaknesses": ["Antwortstruktur nicht korrekt"],
+                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
+                }
+
+                # Try to populate with available data
+                if "scores" in parsed_response:
+                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
+                        if key in parsed_response["scores"]:
+                            fixed_response["scores"][key] = parsed_response["scores"][key]
+
+                if "total_score" in parsed_response:
+                    fixed_response["total_score"] = parsed_response["total_score"]
+
+                if "quality_level" in parsed_response and parsed_response["quality_level"]:
+                    fixed_response["quality_level"] = parsed_response["quality_level"]
+
+                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
+                    fixed_response["strengths"] = parsed_response["strengths"]
+
+                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
+                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
+
+                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
+                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
+
+                parsed_response = fixed_response
+
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+
+            # Create a default evaluation with error messages
+            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
+            default_scores = {
+                key: ScoreItem(score=5, justification=error_explanation)
+                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+            }
+
+            evaluation = EvaluationResult(
+                scores=default_scores,
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Ungültiges JSON-Format"],
+                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+
+        # Create a default evaluation with error messages
+        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
+        default_scores = {
+            key: ScoreItem(score=5, justification=error_explanation)
+            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+        }
+
+        evaluation = EvaluationResult(
+            scores=default_scores,
+            total_score=0,
+            quality_level="Systemfehler",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=[f"Technischer Fehler: {str(e)}"],
+            improvement_suggestions=["Kontaktieren Sie den Support"]
+        )
+        results[f.name] = evaluation
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+
+    # Calculate the AVERAGE score (not sum)
+    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
+    valid_scores = [s for s in all_scores if isinstance(s, int)]
+    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
+
+    # Generate detailed text report with timing
+    report = f'''FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {time.time() - file_start:.2f} Sekunden
+'''
+
+    # Add timing for evaluation
+    for criterion, duration in criterion_timings.items():
+        report += f"  • {criterion}: {duration:.2f} Sekunden\n"
+
+    # Add evaluation results
+    report += f'''
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Durchschnittliche Bewertung: {average_score:.1f}/5
+(1=exzellent, 5=nicht bestanden)
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+'''
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{time.time() - file_start:.2f}",
+            f"{eval_duration:.2f}",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            f"{average_score:.1f}",  # Using the calculated average score
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {time.time() - file_start:.2f} Sekunden\n")
+        log.write("Detailierte Zeiten:\n")
+        for criterion, duration in criterion_timings.items():
+            log.write(f"  • {criterion}: {duration:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {time.time() - file_start:.2f} Sekunden")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+##
+
+
+
+
+# %% Isabella
+
+
+import openai
+import os
+from pathlib import Path
+import datetime
+import time
+import json as json_module
+import csv
+from pydantic import BaseModel
+import math # Importiere das math Modul für floor/ceil/round falls benötigt, aber Python's round() reicht hier
+
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+EVAL_PROMPT = '''
+ Du bist ein strenger, objektiver Bewertender für medizinische Lehre. Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics". KRITERIEN (basierend auf Kursmaterial): A1 PERSPEKTIVE (Ich-Botschaften) Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt? A2 RESPEKT & WERTFREIHEIT Bewertung: Wird respektvoll und wertfrei kommuniziert? B1 KONKRETHEIT "Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich." Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen? B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden? C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip) WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..." BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..." Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)? D1 ZUKUNGSORIENTIERTE EMPFEHLUNG Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen, die wertschätzend formuliert sind? D2 WERTSCHÄTZENDER ABSCHLUSS Bewertung: Schließt das Feedback wertschätzend ab? E1 KOMMUNIKATIONSEBENEN "Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell" Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen? F1 FÖRDERUNG VON REFLEXION Bewertung: Fördert das Feedback die Reflexion und das Lernen?
+
+SCORING-ANLEITUNG:
+1 = Vollständige Umsetzung (exzellentes Beispiel)
+2 = Gute Umsetzung mit minimalen Lücken
+3 = Grundlegende Umsetzung mit signifikanten Mängeln
+4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
+5 = Keine erkennbare Umsetzung (kriterienwidrig)
+
+WICHTIG: Gib die Ergebnisse AUSSCHLIESSLICH als JSON mit EXAKT folgender Struktur zurück:
+{
+  "scores": {
+    "A1": {"score": 1, "justification": "Begründung hier"},
+    "A2": {"score": 1, "justification": "Begründung hier"},
+    "B1": {"score": 1, "justification": "Begründung hier"},
+    "B2": {"score": 1, "justification": "Begründung hier"},
+    "C1": {"score": 1, "justification": "Begründung hier"},
+    "D1": {"score": 1, "justification": "Begründung hier"},
+    "D2": {"score": 1, "justification": "Begründung hier"},
+    "E1": {"score": 1, "justification": "Begründung hier"},
+    "F1": {"score": 1, "justification": "Begründung hier"}
+  },
+  "total_score": 0,
+  "quality_level": "Beispiel-Qualitätsstufe",
+  "strengths": ["Stärke 1", "Stärke 2"],
+  "weaknesses": ["Schwäche 1", "Schwäche 2"],
+  "improvement_suggestions": ["Vorschlag 1", "Vorschlag 2", "Vorschlag 3"]
+}
+'''
+
+class ScoreItem(BaseModel):
+    score: int  # 1-5 (1=excellent, 5=failed)
+    justification: str
+
+class EvaluationResult(BaseModel):
+    scores: dict[str, ScoreItem]
+    total_score: int
+    quality_level: str
+    strengths: list[str]
+    weaknesses: list[str]
+    improvement_suggestions: list[str]
+##
+
+
+
+# %% Main
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle3"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename",
+        "Total_Time_sec",
+        "API_Evaluation_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Average_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+
+    # Get AI evaluation with timing
+    status = "Success"
+    eval_duration = 0.0
+    eval_end = file_start # Initialisierung
+
+    # We'll evaluate all criteria in one call with strict JSON structure
+    try:
+        eval_start_api = time.time()
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        eval_duration = time.time() - eval_start_api # API-Zeit gemessen
+        
+        print(f"  • Gesamtbewertung (API-Laufzeit): {eval_duration:.2f} sec")
+
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+            eval_end = time.time() # Zeitpunkt nach JSON-Parsing
+
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level",
+                             "strengths", "weaknesses", "improvement_suggestions"]
+
+            # If the response has a different structure, try to fix it
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+
+                # Create a properly structured response
+                fixed_response = {
+                    "scores": {
+                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
+                    },
+                    "total_score": 0,
+                    "quality_level": "Fehlerhaft",
+                    "strengths": ["Strukturfehler in der Bewertung"],
+                    "weaknesses": ["Antwortstruktur nicht korrekt"],
+                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
+                }
+
+                # Try to populate with available data
+                if "scores" in parsed_response:
+                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
+                        if key in parsed_response["scores"]:
+                            fixed_response["scores"][key] = parsed_response["scores"][key]
+
+                if "total_score" in parsed_response:
+                    fixed_response["total_score"] = parsed_response["total_score"]
+
+                if "quality_level" in parsed_response and parsed_response["quality_level"]:
+                    fixed_response["quality_level"] = parsed_response["quality_level"]
+
+                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
+                    fixed_response["strengths"] = parsed_response["strengths"]
+
+                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
+                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
+
+                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
+                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
+
+                parsed_response = fixed_response
+
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+            eval_end = time.time() # Zeitpunkt nach Fehler
+
+            # Create a default evaluation with error messages
+            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
+            default_scores = {
+                key: ScoreItem(score=5, justification=error_explanation)
+                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+            }
+
+            evaluation = EvaluationResult(
+                scores=default_scores,
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Ungültiges JSON-Format"],
+                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+        
+        # Create a default evaluation with error messages
+        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
+        default_scores = {
+            key: ScoreItem(score=5, justification=error_explanation)
+            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+        }
+
+        evaluation = EvaluationResult(
+            scores=default_scores,
+            total_score=0,
+            quality_level="Systemfehler",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=[f"Technischer Fehler: {str(e)}"],
+            improvement_suggestions=["Kontaktieren Sie den Support"]
+        )
+        results[f.name] = evaluation
+        eval_end = time.time() # Zeitpunkt nach API-Fehler
+
+    # Calculate the AVERAGE score (not sum)
+    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
+    valid_scores = [s for s in all_scores if isinstance(s, int)]
+    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
+    
+    # Runden auf die nächste ganze Zahl (natürliche Zahl-Format)
+    rounded_average_score = int(round(average_score))
+
+    # --- Zeitmessungs-Korrektur (Neu) ---
+    # Gesamtzeit für die Datei (bis zum Ende der Verarbeitung)
+    total_file_duration = time.time() - file_start
+    # Zeit für lokale Verarbeitung: Alles, was nach dem Start bis zum Ende der API/JSON-Verarbeitung (eval_end) passiert ist,
+    # abzüglich der reinen API-Wartezeit (eval_duration).
+    # Eine einfachere und präzisere Methode ist: Gesamtzeit minus API-Zeit.
+    local_processing_time = total_file_duration - eval_duration
+    if local_processing_time < 0: # Sicherstellen, dass die Zeit nicht negativ wird, falls die API-Messung ungenau ist
+        local_processing_time = 0.0
+    # -------------------------------------
+
+    # Generate detailed text report with timing
+    report = f'''FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {total_file_duration:.2f} Sekunden
+  • API-Bewertungszeit: {eval_duration:.2f} Sekunden
+  • Lokale Verarbeitungszeit (Lesen, JSON, Bericht): {local_processing_time:.2f} Sekunden 
+'''
+
+    # Add evaluation results
+    report += f'''
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Durchschnittliche Bewertung: {rounded_average_score}/5
+(1=exzellent, 5=nicht bestanden)
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+'''
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{total_file_duration:.2f}", 
+            f"{eval_duration:.2f}",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            rounded_average_score, 
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {total_file_duration:.2f} Sekunden\n") 
+        log.write("Detailierte Zeiten:\n")
+        log.write(f"  • API-Bewertung: {eval_duration:.2f} Sekunden\n")
+        log.write(f"  • Lokale Verarbeitung: {local_processing_time:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {total_file_duration:.2f} Sekunden (API: {eval_duration:.2f}, Lokal: {local_processing_time:.2f})")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+
+##
+
+
+
+
+# %% Main
+
+QUALITY_LEVEL_MAP = {
+    1: "Exzellent (1)",
+    2: "Gut (2)",
+    3: "Befriedigend (3)",
+    4: "Ausreichend (4)",
+    5: "Mangelhaft/Ungenügend (5)",
+    0: "Fehlerhaft/Unbekannt"
+}
+
+
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle_isabella2"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename",
+        "Total_Time_sec",
+        "API_Evaluation_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Average_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+
+    # Get AI evaluation with timing
+    status = "Success"
+    eval_duration = 0.0
+
+    # We'll evaluate all criteria in one call with strict JSON structure
+    try:
+        eval_start_api = time.time()
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        eval_duration = time.time() - eval_start_api # API-Zeit gemessen
+
+        print(f"  • Gesamtbewertung (API-Laufzeit): {eval_duration:.2f} sec")
+
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level",
+                             "strengths", "weaknesses", "improvement_suggestions"]
+
+            # If the response has a different structure, try to fix it
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+
+                # Create a properly structured response
+                fixed_response = {
+                    "scores": {
+                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
+                    },
+                    "total_score": 0,
+                    "quality_level": "Fehlerhaft",
+                    "strengths": ["Strukturfehler in der Bewertung"],
+                    "weaknesses": ["Antwortstruktur nicht korrekt"],
+                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
+                }
+
+                # Try to populate with available data
+                if "scores" in parsed_response:
+                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
+                        if key in parsed_response["scores"]:
+                            fixed_response["scores"][key] = parsed_response["scores"][key]
+
+                if "total_score" in parsed_response:
+                    fixed_response["total_score"] = parsed_response["total_score"]
+
+                if "quality_level" in parsed_response and parsed_response["quality_level"]:
+                    fixed_response["quality_level"] = parsed_response["quality_level"]
+
+                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
+                    fixed_response["strengths"] = parsed_response["strengths"]
+
+                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
+                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
+
+                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
+                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
+
+                parsed_response = fixed_response
+
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+
+            # Create a default evaluation with error messages
+            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
+            default_scores = {
+                key: ScoreItem(score=5, justification=error_explanation)
+                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+            }
+
+            evaluation = EvaluationResult(
+                scores=default_scores,
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Ungültiges JSON-Format"],
+                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+
+        # Create a default evaluation with error messages
+        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
+        default_scores = {
+            key: ScoreItem(score=5, justification=error_explanation)
+            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+        }
+
+        evaluation = EvaluationResult(
+            scores=default_scores,
+            total_score=0,
+            quality_level="Systemfehler",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=[f"Technischer Fehler: {str(e)}"],
+            improvement_suggestions=["Kontaktieren Sie den Support"]
+        )
+        results[f.name] = evaluation
+
+    # Calculate the AVERAGE score (not sum)
+    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
+    valid_scores = [s for s in all_scores if isinstance(s, int)]
+    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
+
+    # Runden auf die nächste ganze Zahl (natürliche Zahl-Format)
+    rounded_average_score = int(round(average_score))
+
+    # --- Konkrete Qualitätsstufen zuweisen (Neu implementiert) ---
+    if status in ["JSON Error", "API Error", "Systemfehler", "Partial Structure"]:
+        final_quality_level = evaluation.quality_level # Behält Fehlerstatus bei
+    else:
+        # Weist die definierte Qualitätsstufe basierend auf dem Durchschnitt zu
+        final_quality_level = QUALITY_LEVEL_MAP.get(rounded_average_score, "Fehlerhaft/Unbekannt")
+
+    # Überschreibe den Wert im evaluation-Objekt
+    evaluation.quality_level = final_quality_level
+    # ----------------------------------------------------------------
+
+    # --- Zeitmessungs-Korrektur (Überprüfung) ---
+    # Gesamtzeit für die Datei (bis zum Ende der Verarbeitung)
+    total_file_duration = time.time() - file_start
+    # Lokale Verarbeitungszeit: Gesamtzeit minus der reinen API-Wartezeit.
+    local_processing_time = total_file_duration - eval_duration
+    if local_processing_time < 0: # Sicherstellen, dass die Zeit nicht negativ wird
+        local_processing_time = 0.0
+    # -------------------------------------
+
+    # Generate detailed text report with timing
+    report = f'''FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {total_file_duration:.2f} Sekunden
+  • API-Bewertungszeit: {eval_duration:.2f} Sekunden
+  • Lokale Verarbeitungszeit (Lesen, JSON, Bericht): {local_processing_time:.2f} Sekunden
+'''
+
+    # Add evaluation results
+    report += f'''
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Durchschnittliche Bewertung: {rounded_average_score}/5
+(1=exzellent, 5=nicht bestanden)
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+'''
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{total_file_duration:.2f}",
+            f"{eval_duration:.2f}",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            rounded_average_score,
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {total_file_duration:.2f} Sekunden\n")
+        log.write("Detailierte Zeiten:\n")
+        log.write(f"  • API-Bewertung: {eval_duration:.2f} Sekunden\n")
+        log.write(f"  • Lokale Verarbeitung: {local_processing_time:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {total_file_duration:.2f} Sekunden (API: {eval_duration:.2f}, Lokal: {local_processing_time:.2f})")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+##
diff --git a/exp/app.old_inside_pipeline.py b/exp/app.old_inside_pipeline.py
new file mode 100644
index 0000000..c77e03b
--- /dev/null
+++ b/exp/app.old_inside_pipeline.py
@@ -0,0 +1,1539 @@
+# %% Prompt
+
+import openai
+import os
+from pathlib import Path
+import datetime
+import time
+import json as json_module
+import csv
+from pydantic import BaseModel
+
+# Initialize client with reasoning capabilities
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+# Enhanced prompt with detailed criteria and structured output requirements
+EVAL_PROMPT = """
+SYSTEM:
+Du bist ein Expert:in für medizinische Lehre und Feedback-Didaktik an einer medizinischen Fakultät. 
+Bewerte das folgende Tutor-Feedback gemäß den offiziellen Feedback-Prinzipien der Medizinischen Fakultät Dresden.
+
+KRITERIEN:
+A1 PERSPEKTIVE (Ich-Botschaften)
+"A feedback ... wird in „Ich-Botschaften“ ausgedrückt."
+Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
+
+A2 RESPEKT & WERTFREIHEIT
+"Ein Feedback ... ist nicht (ab)wertend."
+Bewertung: Wird respektvoll und wertfrei kommuniziert?
+
+B1 KONKRETHEIT
+"Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
+Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION
+"Ein Feedback ... gibt erst nach der Äußerung von sinnlich Wahrnehmbarem die Möglichkeit zu Interpretationen, Annahmen und Schlussfolgerungen."
+Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden?
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip)
+WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
+BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
+Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen?
+
+D2 WERTSCHÄTZENDER ABSCHLUSS
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Schließt das Feedback wertschätzend ab?
+
+E1 KOMMUNIKATIONSEBENEN
+"Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell"
+Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen?
+
+F1 FÖRDERUNG VON REFLEXION
+"Feedback ... ist eines der einflussreichsten Faktoren für den Lernerfolg."
+Bewertung: Fördert das Feedback die Reflexion und das Lernen?
+
+SCORING:
+Bewerte jedes Kriterium mit:
+0 = nicht erfüllt
+1 = teilweise erfüllt
+2 = vollständig erfüllt
+
+AUFGABE:
+1. Bewerte jedes Kriterium mit einer Punktzahl (0-2)
+2. Gib eine kurze Begründung für jede Bewertung mit Zitaten oder Paraphrasierungen aus dem Feedback
+3. Berechne die Gesamtpunktzahl (max. 18)
+4. Weise eine qualitative Bewertungsstufe zu
+5. Gib 3 konkrete Verbesserungsvorschläge
+
+OUTPUT FORMAT (JSON):
+{
+  "scores": {
+    "A1": {"score": 0-2, "justification": "..."},
+    "A2": {"score": 0-2, "justification": "..."},
+    "B1": {"score": 0-2, "justification": "..."},
+    "B2": {"score": 0-2, "justification": "..."},
+    "C1": {"score": 0-2, "justification": "..."},
+    "D1": {"score": 0-2, "justification": "..."},
+    "D2": {"score": 0-2, "justification": "..."},
+    "E1": {"score": 0-2, "justification": "..."},
+    "F1": {"score": 0-2, "justification": "..."}
+  },
+  "total_score": 0,
+  "quality_level": "",
+  "strengths": [],
+  "weaknesses": [],
+  "improvement_suggestions": []
+}
+
+TUTOR FEEDBACK:
+"""
+
+# Pydantic models for structured output validation
+class ScoreItem(BaseModel):
+    score: int  # 0-2 (0=not fulfilled, 2=fully fulfilled)
+    justification: str
+
+class EvaluationResult(BaseModel):
+    scores: dict[str, ScoreItem]
+    total_score: int
+    quality_level: str
+    strengths: list[str]
+    weaknesses: list[str]
+    improvement_suggestions: list[str]
+##
+
+# %% Main
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle2"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename", 
+        "Total_Time_sec", 
+        "API_Evaluation_Time_sec",
+        "Reasoning_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Total_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+    
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+    
+    # Get AI evaluation with timing
+    criterion_timings = {}
+    status = "Success"
+    
+    try:
+        # Time the complete API evaluation process
+        eval_start = time.time()
+        
+        # Use reasoning model with specified parameters
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+            max_completion_tokens=1024,
+            reasoning_effort="medium",  # Using the reasoning model capabilities
+            extra_body={"allowed_openai_params": ["reasoning_effort"]}
+        )
+        
+        # Measure reasoning time separately if available
+        reasoning_time = 0
+        if hasattr(response.choices[0].message, 'reasoning_content') and response.choices[0].message.reasoning_content:
+            reasoning_time = time.time() - eval_start
+            criterion_timings["Reasoning"] = reasoning_time
+            print(f"  • Reasoning: {reasoning_time:.2f} sec")
+        
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+        print(f"  • Gesamtbewertung: {eval_duration:.2f} sec")
+        
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+            
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level", 
+                            "strengths", "weaknesses", "improvement_suggestions"]
+            
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+            
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+            
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+            
+            # Create a default evaluation with error messages
+            evaluation = EvaluationResult(
+                scores={
+                    "A1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "A2": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "B1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "B2": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "C1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "D1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "D2": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "E1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API"),
+                    "F1": ScoreItem(score=0, justification="FEHLER: Ungültige JSON-Antwort vom API")
+                },
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Keine Bewertung möglich"],
+                improvement_suggestions=["Korrigieren Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+    
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+        
+        # Create a default evaluation with error messages
+        evaluation = EvaluationResult(
+            scores={
+                "A1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "A2": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "B1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "B2": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "C1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "D1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "D2": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "E1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"),
+                "F1": ScoreItem(score=0, justification=f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}")
+            },
+            total_score=0,
+            quality_level="Fehlerhaft",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=["Keine Bewertung möglich"],
+            improvement_suggestions=["Korrigieren Sie die Feedback-Struktur"]
+        )
+        results[f.name] = evaluation
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+
+    # Generate detailed text report with timing
+    report = f"""FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {time.time() - file_start:.2f} Sekunden
+"""
+    
+    # Add timing for evaluation
+    for criterion, duration in criterion_timings.items():
+        report += f"  • {criterion}: {duration:.2f} Sekunden\n"
+    
+    # Add evaluation results
+    report += f"""
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/2
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/2
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/2
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/2
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/2
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/2
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/2
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/2
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/2
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Gesamtpunktzahl: {evaluation.total_score}/18
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+"""
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+        
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+        
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+    
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+    
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{time.time() - file_start:.2f}",
+            f"{eval_duration:.2f}",
+            f"{reasoning_time:.2f}" if 'reasoning_time' in locals() else "0.00",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            evaluation.total_score,
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {time.time() - file_start:.2f} Sekunden\n")
+        log.write("Detailierte Zeiten:\n")
+        for criterion, duration in criterion_timings.items():
+            log.write(f"  • {criterion}: {duration:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+    
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {time.time() - file_start:.2f} Sekunden")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+
+
+
+##
+
+
+
+
+
+
+# %% Feedback_Bewertung
+import openai
+import os
+from pathlib import Path
+import datetime
+import time
+import json as json_module
+import csv
+from pydantic import BaseModel
+
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+EVAL_PROMPT = '''
+Du bist ein strenger, objektiver Bewertender für medizinische Lehre.
+Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics".
+
+KRITERIEN (basierend auf Kursmaterial):
+A1 PERSPEKTIVE (Ich-Botschaften)
+"A feedback ... wird in „Ich-Botschaften" ausgedrückt."
+Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
+
+A2 RESPEKT & WERTFREIHEIT
+"Ein Feedback ... ist nicht (ab)wertend."
+Bewertung: Wird respektvoll und wertfrei kommuniziert?
+
+B1 KONKRETHEIT
+"Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
+Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION
+"Ein Feedback ... gibt erst nach der Äußerung von sinnlich Wahrnehmbarem die Möglichkeit zu Interpretationen, Annahmen und Schlussfolgerungen."
+Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden?
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip)
+WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
+BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
+Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen?
+
+D2 WERTSCHÄTZENDER ABSCHLUSS
+"Ein Feedback ... endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+Bewertung: Schließt das Feedback wertschätzend ab?
+
+E1 KOMMUNIKATIONSEBENEN
+"Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell"
+Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen?
+
+F1 FÖRDERUNG VON REFLEXION
+"Feedback ... ist eines der einflussreichsten Faktoren für den Lernerfolg."
+Bewertung: Fördert das Feedback die Reflexion und das Lernen?
+
+SCORING-ANLEITUNG:
+1 = Vollständige Umsetzung (exzellentes Beispiel)
+2 = Gute Umsetzung mit minimalen Lücken
+3 = Grundlegende Umsetzung mit signifikanten Mängeln
+4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
+5 = Keine erkennbare Umsetzung (kriterienwidrig)
+
+WICHTIG: Gib die Ergebnisse AUSSCHLIESSLICH als JSON mit EXAKT folgender Struktur zurück:
+{
+  "scores": {
+    "A1": {"score": 1, "justification": "Begründung hier"},
+    "A2": {"score": 1, "justification": "Begründung hier"},
+    "B1": {"score": 1, "justification": "Begründung hier"},
+    "B2": {"score": 1, "justification": "Begründung hier"},
+    "C1": {"score": 1, "justification": "Begründung hier"},
+    "D1": {"score": 1, "justification": "Begründung hier"},
+    "D2": {"score": 1, "justification": "Begründung hier"},
+    "E1": {"score": 1, "justification": "Begründung hier"},
+    "F1": {"score": 1, "justification": "Begründung hier"}
+  },
+  "total_score": 0,
+  "quality_level": "Beispiel-Qualitätsstufe",
+  "strengths": ["Stärke 1", "Stärke 2"],
+  "weaknesses": ["Schwäche 1", "Schwäche 2"],
+  "improvement_suggestions": ["Vorschlag 1", "Vorschlag 2", "Vorschlag 3"]
+}
+'''
+
+class ScoreItem(BaseModel):
+    score: int  # 1-5 (1=excellent, 5=failed)
+    justification: str
+
+class EvaluationResult(BaseModel):
+    scores: dict[str, ScoreItem]
+    total_score: int
+    quality_level: str
+    strengths: list[str]
+    weaknesses: list[str]
+    improvement_suggestions: list[str]
+##
+
+# %% Main
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle3"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename",
+        "Total_Time_sec",
+        "API_Evaluation_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Average_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+
+    # Get AI evaluation with timing
+    criterion_timings = {}
+    status = "Success"
+
+    # We'll evaluate all criteria in one call with strict JSON structure
+    try:
+        eval_start = time.time()
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+        print(f"  • Gesamtbewertung: {eval_duration:.2f} sec")
+
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level",
+                            "strengths", "weaknesses", "improvement_suggestions"]
+
+            # If the response has a different structure, try to fix it
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+
+                # Create a properly structured response
+                fixed_response = {
+                    "scores": {
+                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
+                    },
+                    "total_score": 0,
+                    "quality_level": "Fehlerhaft",
+                    "strengths": ["Strukturfehler in der Bewertung"],
+                    "weaknesses": ["Antwortstruktur nicht korrekt"],
+                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
+                }
+
+                # Try to populate with available data
+                if "scores" in parsed_response:
+                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
+                        if key in parsed_response["scores"]:
+                            fixed_response["scores"][key] = parsed_response["scores"][key]
+
+                if "total_score" in parsed_response:
+                    fixed_response["total_score"] = parsed_response["total_score"]
+
+                if "quality_level" in parsed_response and parsed_response["quality_level"]:
+                    fixed_response["quality_level"] = parsed_response["quality_level"]
+
+                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
+                    fixed_response["strengths"] = parsed_response["strengths"]
+
+                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
+                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
+
+                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
+                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
+
+                parsed_response = fixed_response
+
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+
+            # Create a default evaluation with error messages
+            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
+            default_scores = {
+                key: ScoreItem(score=5, justification=error_explanation)
+                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+            }
+
+            evaluation = EvaluationResult(
+                scores=default_scores,
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Ungültiges JSON-Format"],
+                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+
+        # Create a default evaluation with error messages
+        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
+        default_scores = {
+            key: ScoreItem(score=5, justification=error_explanation)
+            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+        }
+
+        evaluation = EvaluationResult(
+            scores=default_scores,
+            total_score=0,
+            quality_level="Systemfehler",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=[f"Technischer Fehler: {str(e)}"],
+            improvement_suggestions=["Kontaktieren Sie den Support"]
+        )
+        results[f.name] = evaluation
+        eval_duration = time.time() - eval_start
+        criterion_timings["Gesamtbewertung"] = eval_duration
+
+    # Calculate the AVERAGE score (not sum)
+    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
+    valid_scores = [s for s in all_scores if isinstance(s, int)]
+    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
+
+    # Generate detailed text report with timing
+    report = f'''FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {time.time() - file_start:.2f} Sekunden
+'''
+
+    # Add timing for evaluation
+    for criterion, duration in criterion_timings.items():
+        report += f"  • {criterion}: {duration:.2f} Sekunden\n"
+
+    # Add evaluation results
+    report += f'''
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Durchschnittliche Bewertung: {average_score:.1f}/5
+(1=exzellent, 5=nicht bestanden)
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+'''
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{time.time() - file_start:.2f}",
+            f"{eval_duration:.2f}",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            f"{average_score:.1f}",  # Using the calculated average score
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {time.time() - file_start:.2f} Sekunden\n")
+        log.write("Detailierte Zeiten:\n")
+        for criterion, duration in criterion_timings.items():
+            log.write(f"  • {criterion}: {duration:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {time.time() - file_start:.2f} Sekunden")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+##
+
+
+
+
+# %% Isabella
+
+
+import openai
+import os
+from pathlib import Path
+import datetime
+import time
+import json as json_module
+import csv
+from pydantic import BaseModel
+import math # Importiere das math Modul für floor/ceil/round falls benötigt, aber Python's round() reicht hier
+
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+EVAL_PROMPT = '''
+ Du bist ein strenger, objektiver Bewertender für medizinische Lehre. Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics". KRITERIEN (basierend auf Kursmaterial): A1 PERSPEKTIVE (Ich-Botschaften) Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt? A2 RESPEKT & WERTFREIHEIT Bewertung: Wird respektvoll und wertfrei kommuniziert? B1 KONKRETHEIT "Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich." Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen? B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden? C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip) WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..." BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..." Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)? D1 ZUKUNGSORIENTIERTE EMPFEHLUNG Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen, die wertschätzend formuliert sind? D2 WERTSCHÄTZENDER ABSCHLUSS Bewertung: Schließt das Feedback wertschätzend ab? E1 KOMMUNIKATIONSEBENEN "Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell" Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen? F1 FÖRDERUNG VON REFLEXION Bewertung: Fördert das Feedback die Reflexion und das Lernen?
+
+SCORING-ANLEITUNG:
+1 = Vollständige Umsetzung (exzellentes Beispiel)
+2 = Gute Umsetzung mit minimalen Lücken
+3 = Grundlegende Umsetzung mit signifikanten Mängeln
+4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
+5 = Keine erkennbare Umsetzung (kriterienwidrig)
+
+WICHTIG: Gib die Ergebnisse AUSSCHLIESSLICH als JSON mit EXAKT folgender Struktur zurück:
+{
+  "scores": {
+    "A1": {"score": 1, "justification": "Begründung hier"},
+    "A2": {"score": 1, "justification": "Begründung hier"},
+    "B1": {"score": 1, "justification": "Begründung hier"},
+    "B2": {"score": 1, "justification": "Begründung hier"},
+    "C1": {"score": 1, "justification": "Begründung hier"},
+    "D1": {"score": 1, "justification": "Begründung hier"},
+    "D2": {"score": 1, "justification": "Begründung hier"},
+    "E1": {"score": 1, "justification": "Begründung hier"},
+    "F1": {"score": 1, "justification": "Begründung hier"}
+  },
+  "total_score": 0,
+  "quality_level": "Beispiel-Qualitätsstufe",
+  "strengths": ["Stärke 1", "Stärke 2"],
+  "weaknesses": ["Schwäche 1", "Schwäche 2"],
+  "improvement_suggestions": ["Vorschlag 1", "Vorschlag 2", "Vorschlag 3"]
+}
+'''
+
+class ScoreItem(BaseModel):
+    score: int  # 1-5 (1=excellent, 5=failed)
+    justification: str
+
+class EvaluationResult(BaseModel):
+    scores: dict[str, ScoreItem]
+    total_score: int
+    quality_level: str
+    strengths: list[str]
+    weaknesses: list[str]
+    improvement_suggestions: list[str]
+##
+
+
+
+# %% Main
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle3"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename",
+        "Total_Time_sec",
+        "API_Evaluation_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Average_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+
+    # Get AI evaluation with timing
+    status = "Success"
+    eval_duration = 0.0
+    eval_end = file_start # Initialisierung
+
+    # We'll evaluate all criteria in one call with strict JSON structure
+    try:
+        eval_start_api = time.time()
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        eval_duration = time.time() - eval_start_api # API-Zeit gemessen
+        
+        print(f"  • Gesamtbewertung (API-Laufzeit): {eval_duration:.2f} sec")
+
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+            eval_end = time.time() # Zeitpunkt nach JSON-Parsing
+
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level",
+                             "strengths", "weaknesses", "improvement_suggestions"]
+
+            # If the response has a different structure, try to fix it
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+
+                # Create a properly structured response
+                fixed_response = {
+                    "scores": {
+                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
+                    },
+                    "total_score": 0,
+                    "quality_level": "Fehlerhaft",
+                    "strengths": ["Strukturfehler in der Bewertung"],
+                    "weaknesses": ["Antwortstruktur nicht korrekt"],
+                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
+                }
+
+                # Try to populate with available data
+                if "scores" in parsed_response:
+                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
+                        if key in parsed_response["scores"]:
+                            fixed_response["scores"][key] = parsed_response["scores"][key]
+
+                if "total_score" in parsed_response:
+                    fixed_response["total_score"] = parsed_response["total_score"]
+
+                if "quality_level" in parsed_response and parsed_response["quality_level"]:
+                    fixed_response["quality_level"] = parsed_response["quality_level"]
+
+                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
+                    fixed_response["strengths"] = parsed_response["strengths"]
+
+                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
+                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
+
+                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
+                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
+
+                parsed_response = fixed_response
+
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+            eval_end = time.time() # Zeitpunkt nach Fehler
+
+            # Create a default evaluation with error messages
+            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
+            default_scores = {
+                key: ScoreItem(score=5, justification=error_explanation)
+                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+            }
+
+            evaluation = EvaluationResult(
+                scores=default_scores,
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Ungültiges JSON-Format"],
+                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+        
+        # Create a default evaluation with error messages
+        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
+        default_scores = {
+            key: ScoreItem(score=5, justification=error_explanation)
+            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+        }
+
+        evaluation = EvaluationResult(
+            scores=default_scores,
+            total_score=0,
+            quality_level="Systemfehler",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=[f"Technischer Fehler: {str(e)}"],
+            improvement_suggestions=["Kontaktieren Sie den Support"]
+        )
+        results[f.name] = evaluation
+        eval_end = time.time() # Zeitpunkt nach API-Fehler
+
+    # Calculate the AVERAGE score (not sum)
+    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
+    valid_scores = [s for s in all_scores if isinstance(s, int)]
+    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
+    
+    # Runden auf die nächste ganze Zahl (natürliche Zahl-Format)
+    rounded_average_score = int(round(average_score))
+
+    # --- Zeitmessungs-Korrektur (Neu) ---
+    # Gesamtzeit für die Datei (bis zum Ende der Verarbeitung)
+    total_file_duration = time.time() - file_start
+    # Zeit für lokale Verarbeitung: Alles, was nach dem Start bis zum Ende der API/JSON-Verarbeitung (eval_end) passiert ist,
+    # abzüglich der reinen API-Wartezeit (eval_duration).
+    # Eine einfachere und präzisere Methode ist: Gesamtzeit minus API-Zeit.
+    local_processing_time = total_file_duration - eval_duration
+    if local_processing_time < 0: # Sicherstellen, dass die Zeit nicht negativ wird, falls die API-Messung ungenau ist
+        local_processing_time = 0.0
+    # -------------------------------------
+
+    # Generate detailed text report with timing
+    report = f'''FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {total_file_duration:.2f} Sekunden
+  • API-Bewertungszeit: {eval_duration:.2f} Sekunden
+  • Lokale Verarbeitungszeit (Lesen, JSON, Bericht): {local_processing_time:.2f} Sekunden 
+'''
+
+    # Add evaluation results
+    report += f'''
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Durchschnittliche Bewertung: {rounded_average_score}/5
+(1=exzellent, 5=nicht bestanden)
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+'''
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{total_file_duration:.2f}", 
+            f"{eval_duration:.2f}",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            rounded_average_score, 
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {total_file_duration:.2f} Sekunden\n") 
+        log.write("Detailierte Zeiten:\n")
+        log.write(f"  • API-Bewertung: {eval_duration:.2f} Sekunden\n")
+        log.write(f"  • Lokale Verarbeitung: {local_processing_time:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {total_file_duration:.2f} Sekunden (API: {eval_duration:.2f}, Lokal: {local_processing_time:.2f})")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+
+##
+
+
+
+
+# %% Main
+
+QUALITY_LEVEL_MAP = {
+    1: "Exzellent (1)",
+    2: "Gut (2)",
+    3: "Befriedigend (3)",
+    4: "Ausreichend (4)",
+    5: "Mangelhaft/Ungenügend (5)",
+    0: "Fehlerhaft/Unbekannt"
+}
+
+
+input_dir = "./cruscloud/Teil3/Transkripte/"
+# Hardcoded output directory - CHANGE THIS PATH AS NEEDED
+output_dir = "./cruscloud/Teil3/Evaluations_moodle_isabella2"
+Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+# Create timing log file
+timing_log_path = Path(output_dir) / "evaluation_timing.log"
+with open(timing_log_path, "w", encoding="utf-8") as log:
+    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    log.write("="*80 + "\n\n")
+
+# Create CSV timing file with headers
+csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
+with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",")
+    # Write CSV header
+    csv_writer.writerow([
+        "Filename",
+        "Total_Time_sec",
+        "API_Evaluation_Time_sec",
+        "Start_Time",
+        "End_Time",
+        "Status",
+        "Average_Score",
+        "Quality_Level",
+        "A1_Score",
+        "A2_Score",
+        "B1_Score",
+        "B2_Score",
+        "C1_Score",
+        "D1_Score",
+        "D2_Score",
+        "E1_Score",
+        "F1_Score"
+    ])
+
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+total_start = time.time()
+
+for f in files:
+    file_start = time.time()
+    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f"\n{'='*50}")
+    print(f"Beginne Bewertung: {f.name}")
+    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
+
+    # Read input text
+    text = f.read_text(encoding="utf-8")
+
+    # Get AI evaluation with timing
+    status = "Success"
+    eval_duration = 0.0
+
+    # We'll evaluate all criteria in one call with strict JSON structure
+    try:
+        eval_start_api = time.time()
+        response = client.chat.completions.create(
+            model="GPT-OSS-120B",
+            messages=[
+                {"role": "system", "content": EVAL_PROMPT},
+                {"role": "user", "content": text},
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        eval_duration = time.time() - eval_start_api # API-Zeit gemessen
+
+        print(f"  • Gesamtbewertung (API-Laufzeit): {eval_duration:.2f} sec")
+
+        # Parse the JSON response
+        try:
+            parsed_response = json_module.loads(response.choices[0].message.content)
+
+            # Validate structure before passing to Pydantic
+            required_keys = ["scores", "total_score", "quality_level",
+                             "strengths", "weaknesses", "improvement_suggestions"]
+
+            # If the response has a different structure, try to fix it
+            if not all(key in parsed_response for key in required_keys):
+                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
+                status = "Partial Structure"
+
+                # Create a properly structured response
+                fixed_response = {
+                    "scores": {
+                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
+                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
+                    },
+                    "total_score": 0,
+                    "quality_level": "Fehlerhaft",
+                    "strengths": ["Strukturfehler in der Bewertung"],
+                    "weaknesses": ["Antwortstruktur nicht korrekt"],
+                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
+                }
+
+                # Try to populate with available data
+                if "scores" in parsed_response:
+                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
+                        if key in parsed_response["scores"]:
+                            fixed_response["scores"][key] = parsed_response["scores"][key]
+
+                if "total_score" in parsed_response:
+                    fixed_response["total_score"] = parsed_response["total_score"]
+
+                if "quality_level" in parsed_response and parsed_response["quality_level"]:
+                    fixed_response["quality_level"] = parsed_response["quality_level"]
+
+                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
+                    fixed_response["strengths"] = parsed_response["strengths"]
+
+                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
+                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
+
+                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
+                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
+
+                parsed_response = fixed_response
+
+            # Create evaluation object
+            evaluation = EvaluationResult(**parsed_response)
+            results[f.name] = evaluation
+
+        except json_module.JSONDecodeError as e:
+            print(f"  ! JSON-Decoding-Fehler: {e}")
+            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
+            status = f"JSON Error: {str(e)}"
+
+            # Create a default evaluation with error messages
+            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
+            default_scores = {
+                key: ScoreItem(score=5, justification=error_explanation)
+                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+            }
+
+            evaluation = EvaluationResult(
+                scores=default_scores,
+                total_score=0,
+                quality_level="Fehlerhaft",
+                strengths=["Bewertung fehlgeschlagen"],
+                weaknesses=["Ungültiges JSON-Format"],
+                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
+            )
+            results[f.name] = evaluation
+
+    except Exception as e:
+        print(f"  ! Unerwarteter Fehler: {str(e)}")
+        status = f"API Error: {str(e)}"
+
+        # Create a default evaluation with error messages
+        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
+        default_scores = {
+            key: ScoreItem(score=5, justification=error_explanation)
+            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
+        }
+
+        evaluation = EvaluationResult(
+            scores=default_scores,
+            total_score=0,
+            quality_level="Systemfehler",
+            strengths=["Bewertung fehlgeschlagen"],
+            weaknesses=[f"Technischer Fehler: {str(e)}"],
+            improvement_suggestions=["Kontaktieren Sie den Support"]
+        )
+        results[f.name] = evaluation
+
+    # Calculate the AVERAGE score (not sum)
+    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
+    valid_scores = [s for s in all_scores if isinstance(s, int)]
+    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
+
+    # Runden auf die nächste ganze Zahl (natürliche Zahl-Format)
+    rounded_average_score = int(round(average_score))
+
+    # --- Konkrete Qualitätsstufen zuweisen (Neu implementiert) ---
+    if status in ["JSON Error", "API Error", "Systemfehler", "Partial Structure"]:
+        final_quality_level = evaluation.quality_level # Behält Fehlerstatus bei
+    else:
+        # Weist die definierte Qualitätsstufe basierend auf dem Durchschnitt zu
+        final_quality_level = QUALITY_LEVEL_MAP.get(rounded_average_score, "Fehlerhaft/Unbekannt")
+
+    # Überschreibe den Wert im evaluation-Objekt
+    evaluation.quality_level = final_quality_level
+    # ----------------------------------------------------------------
+
+    # --- Zeitmessungs-Korrektur (Überprüfung) ---
+    # Gesamtzeit für die Datei (bis zum Ende der Verarbeitung)
+    total_file_duration = time.time() - file_start
+    # Lokale Verarbeitungszeit: Gesamtzeit minus der reinen API-Wartezeit.
+    local_processing_time = total_file_duration - eval_duration
+    if local_processing_time < 0: # Sicherstellen, dass die Zeit nicht negativ wird
+        local_processing_time = 0.0
+    # -------------------------------------
+
+    # Generate detailed text report with timing
+    report = f'''FEEDBACK-EVALUATION BERICHT
+============================
+Eingabedatei: {f.name}
+Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
+
+VERARBEITUNGSZEITEN
+----------------------------------------
+Gesamtverarbeitung: {total_file_duration:.2f} Sekunden
+  • API-Bewertungszeit: {eval_duration:.2f} Sekunden
+  • Lokale Verarbeitungszeit (Lesen, JSON, Bericht): {local_processing_time:.2f} Sekunden
+'''
+
+    # Add evaluation results
+    report += f'''
+
+KRITERIENBEWERTUNG
+----------------------------------------
+A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A1'].justification}
+
+A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['A2'].justification}
+
+B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B1'].justification}
+
+B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['B2'].justification}
+
+C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['C1'].justification}
+
+D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D1'].justification}
+
+D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['D2'].justification}
+
+E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['E1'].justification}
+
+F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
+(1=exzellent, 5=nicht bestanden)
+Begründung: {evaluation.scores['F1'].justification}
+
+GESAMTBEWERTUNG
+----------------------------------------
+Durchschnittliche Bewertung: {rounded_average_score}/5
+(1=exzellent, 5=nicht bestanden)
+
+Qualitätsstufe: {evaluation.quality_level}
+
+Stärken:
+'''
+    for strength in evaluation.strengths:
+        report += f"- {strength}\n"
+
+    report += "\nSchwächen:\n"
+    for weakness in evaluation.weaknesses:
+        report += f"- {weakness}\n"
+
+    report += "\nVerbesserungsvorschläge:\n"
+    for suggestion in evaluation.improvement_suggestions:
+        report += f"- {suggestion}\n"
+
+    # Save report to output directory
+    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
+    with open(output_path, "w", encoding="utf-8") as out_file:
+        out_file.write(report)
+
+    # Write timing data to CSV
+    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
+        csv_writer = csv.writer(csv_file, delimiter=",")
+        csv_writer.writerow([
+            f.name,
+            f"{total_file_duration:.2f}",
+            f"{eval_duration:.2f}",
+            start_time_str,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            status,
+            rounded_average_score,
+            evaluation.quality_level,
+            evaluation.scores['A1'].score,
+            evaluation.scores['A2'].score,
+            evaluation.scores['B1'].score,
+            evaluation.scores['B2'].score,
+            evaluation.scores['C1'].score,
+            evaluation.scores['D1'].score,
+            evaluation.scores['D2'].score,
+            evaluation.scores['E1'].score,
+            evaluation.scores['F1'].score
+        ])
+
+    # Log timing to central log file
+    with open(timing_log_path, "a", encoding="utf-8") as log:
+        log.write(f"Datei: {f.name}\n")
+        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
+        log.write(f"Dauer: {total_file_duration:.2f} Sekunden\n")
+        log.write("Detailierte Zeiten:\n")
+        log.write(f"  • API-Bewertung: {eval_duration:.2f} Sekunden\n")
+        log.write(f"  • Lokale Verarbeitung: {local_processing_time:.2f} Sekunden\n")
+        log.write("-"*50 + "\n\n")
+
+    print(f"\nBewertungsbericht erstellt: {output_path}")
+    print(f"Gesamtzeit für {f.name}: {total_file_duration:.2f} Sekunden (API: {eval_duration:.2f}, Lokal: {local_processing_time:.2f})")
+    print(f"{'='*50}")
+
+total_duration = time.time() - total_start
+print(f"\n{'='*50}")
+print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
+print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
+print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
+print(f"Bewertungsberichte gespeichert in: {output_dir}")
+print(f"Timing-Log aktualisiert: {timing_log_path}")
+print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
+print(f"{'='*50}")
+##
diff --git a/exp/feedback_bewertung.py b/exp/feedback_bewertung.py
new file mode 100644
index 0000000..81d4982
--- /dev/null
+++ b/exp/feedback_bewertung.py
@@ -0,0 +1,191 @@
+# %% Feedback_Bewertung
+import openai
+import os
+from pathlib import Path
+
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+
+
+
+EVAL_PROMPT = """
+Du bist ein strenger, objektiver Bewertender. 
+Bewerte das folgende Feedback anhand der untenstehenden Kriterien.
+
+Für jedes Kriterium musst du GENAU folgendes liefern:
+
+- answer: true oder false  
+- explanation: eine kurze, klare Begründung auf Deutsch, basierend ausschließlich auf dem gegebenen Feedback
+
+KRITERIEN:
+1. Ist das Feedback zeitnah? (<48 Stunden)
+2. Enthält das Feedback narrative Kommentare?
+3. Ist der Ton positiv und/oder unterstützend?
+4. Beschreibt das Feedback beobachtbares Verhalten in klarer, eindeutiger Sprache?
+5. Verstärkt das Feedback, was gut gemacht wurde?
+6. Zeigt das Feedback Bereiche auf, in denen Verbesserungen nötig sind?
+7. Enthält das Feedback konkrete, spezifische Strategien zur Verbesserung?
+8. Bezieht sich das Feedback auf definierte Standards (z. B. EPA, Leistungsniveaus)?
+9. Passt der Anspruch/die Komplexität des Feedbacks zum Lernstand der Lernenden?
+
+Gib die Ergebnisse AUSSCHLIESSLICH als JSON gemäß dem bereitgestellten Schema zurück.
+"""
+
+
+from pydantic import BaseModel
+
+class CriterionResult(BaseModel):
+    answer: bool | None
+    explanation: str
+
+class FeedbackEvaluation(BaseModel):
+    timely: CriterionResult
+    narrative_comments: CriterionResult
+    positive_supportive_tone: CriterionResult
+    clear_language: CriterionResult
+    reinforces_strengths: CriterionResult
+    identifies_improvements: CriterionResult
+    includes_strategies: CriterionResult
+    relates_to_standards: CriterionResult
+    congruent_with_learner_level: CriterionResult
+
+
+
+##
+
+
+
+
+# %% Main
+
+input_dir = "./cruscloud/AudioFeedbackProject/Transkripte"
+
+files = list(Path(input_dir).glob("*.txt"))
+
+results = {}
+
+for f in files:
+    text = f.read_text(encoding="utf-8")
+
+    response = client.responses.parse(
+        model="GPT-OSS-120B",
+        input=[
+            {"role": "system", "content": EVAL_PROMPT},
+            {"role": "user", "content": text},
+        ],
+        text_format=FeedbackEvaluation,
+    )
+
+    results[f.name] = response.output_parsed
+
+# Save results
+import json
+with open("feedback_evaluations.json", "w", encoding="utf-8") as out:
+    json.dump({k: v.dict() for k, v in results.items()}, out, indent=4)
+
+##
+
+
+
+
+
+# %% Feedback_Bewertung_Moodle
+import openai
+import os
+from pathlib import Path
+client = openai.OpenAI(
+    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
+    base_url="http://pluto/v1"
+)
+
+
+EVAL_PROMPT = """
+Du bist ein strenger, objektiver Bewertender für medizinische Lehre.
+Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics".
+Für jedes Kriterium musst du GENAU folgendes liefern:
+- score: Zahl von 1 (beste Bewertung) bis 5 (gescheitert)
+- explanation: Ausführliche Begründung auf Deutsch mit:
+  a) Zitat aus dem Kursmaterial das das Kriterium definiert
+  b) Konkrete Analyse des Feedback-Textes mit Zitaten
+  c) Klare Verknüpfung zwischen Kursanforderung und Feedback-Umsetzung
+
+KRITERIEN (basierend auf Kursmaterial):
+1. KONKRETHEIT (Bild 4: Merkmale von Feedback)
+   "Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
+   Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
+
+2. ICH-BOTSCHAFTEN (Feedbackregeln)
+   "Ein Feedback … wird in „Ich-Botschaften“ ausgedrückt. … ist nicht (ab)wertend."
+   Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
+
+3. STRUKTUR (WWW/BEB-Prinzip)
+   WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
+   BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
+   Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
+
+4. WERTSCHÄTZUNG (Bild 4: Merkmale von Feedback)
+   "Feedback ... auf Augenhöhe: Feedbackgeber und Feedbackempfänger gehen respektvoll und wertschätzend miteinander um."
+   Bewertung: Wird respektvoll und auf Augenhöhe kommuniziert?
+
+5. VERBESSERUNGSEMPFEHLUNG (Feedbackregeln)
+   "Ein Feedback … endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
+   Bewertung: Gibt es konkrete, wertschätzende Handlungsempfehlungen?
+
+SCORING-ANLEITUNG:
+1 = Vollständige Umsetzung (exzellentes Beispiel)
+2 = Gute Umsetzung mit minimalen Lücken
+3 = Grundlegende Umsetzung mit signifikanten Mängeln
+4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
+5 = Keine erkennbare Umsetzung (kriterienwidrig)
+
+Gib die Ergebnisse AUSSCHLIESSLICH als JSON gemäß dem bereitgestellten Schema zurück.
+"""
+
+from pydantic import BaseModel
+class CriterionResult(BaseModel):
+    score: int  # 1-5 (1=best, 5=failed)
+    explanation: str
+class FeedbackEvaluation(BaseModel):
+    konkretes_feedback: CriterionResult
+    ich_botschaften: CriterionResult
+    struktur: CriterionResult
+    wertschaetzung: CriterionResult
+    verbesserungsempfehlung: CriterionResult
+
+
+
+##
+
+
+
+
+
+# %% Main
+input_dir = "./cruscloud/AudioFeedbackProject/Transkripte"
+files = list(Path(input_dir).glob("*.txt"))
+results = {}
+for f in files:
+    text = f.read_text(encoding="utf-8")
+    response = client.chat.completions.create(
+        model="GPT-OSS-120B",
+        messages=[
+            {"role": "system", "content": EVAL_PROMPT},
+            {"role": "user", "content": text},
+        ],
+        response_format={"type": "json_object"},
+        temperature=0.1
+    )
+    # Parse the JSON response
+    import json
+    parsed_response = json.loads(response.choices[0].message.content)
+    results[f.name] = FeedbackEvaluation(**parsed_response)
+
+# Save results
+import json
+with open("feedback_evaluations.json", "w", encoding="utf-8") as out:
+    json.dump({k: v.dict() for k, v in results.items()}, out, indent=4, ensure_ascii=False)
+
+##